* [PATCH] patch to support super page (2M) with EPT
@ 2008-05-09 9:10 Xin, Xiaohui
2008-05-11 20:33 ` Huang2, Wei
0 siblings, 1 reply; 14+ messages in thread
From: Xin, Xiaohui @ 2008-05-09 9:10 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 835 bytes --]
Attached are the patches to support super page with EPT. We only support
2M size. And shadow may still work fine with 4K pages.
The patches can be split into 3 parts. Apply order is as attached.
tool.diff
To allocate 2M physical contiguous memory in guest except the first 2M
and the last 2M.
The first 2M covers special memory, and Xen use the last few pages in
guest memory to do special things.
We let them to be 4K pages as normal.
super_page_common.patch
To modify the p2m interfaces by adding an order parameter, such as
guest_physmap_add_page(), p2m_set_entry(), etc.
p2m-ept-file.patch
To handle the EPT tables to support super page.
Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
Signed-off-by: Li Xin, B <xin.b.li@intel.com>
[-- Attachment #1.2: Type: text/html, Size: 5406 bytes --]
[-- Attachment #2: tool.diff --]
[-- Type: application/octet-stream, Size: 2945 bytes --]
diff -r ccbbe6fe5827 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c Mon May 05 10:16:58 2008 +0100
+++ b/tools/libxc/xc_hvm_build.c Fri May 09 01:07:36 2008 +0800
@@ -165,7 +165,7 @@ static int setup_guest(int xc_handle,
uint32_t *ident_pt;
struct elf_binary elf;
uint64_t v_start, v_end;
- int rc;
+ int rc, left;
xen_capabilities_info_t caps;
/* An HVM guest must be initialised with at least 2MB memory. */
@@ -213,19 +213,64 @@ static int setup_guest(int xc_handle,
* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
* We allocate pages in batches of no more than 2048 to ensure that
* we can be preempted and hence dom0 remains responsive.
- */
+ * 1) Allocate 4K pages for the first 2M guest memory;
+ * 2) try to allocate 2M continous pages for the left guest memory
+ * or use 4K pages;
+ * 3) Since the last page of the guest memory will be dereserved at last,
+ * we try just allocate 4K pages for the last 2M guest memory.
+ */
+
rc = xc_domain_memory_populate_physmap(
xc_handle, dom, 0xa0, 0, 0, &page_array[0x00]);
cur_pages = 0xc0;
- while ( (rc == 0) && (nr_pages > cur_pages) )
+
+ if ( rc == 0 )
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, 0x200-0xc0, 0, 0, &page_array[0xc0]);
+
+ cur_pages = 0x200;
+
+ left = nr_pages - ((nr_pages >> 9 ) << 9 );
+
+ while ( (rc == 0) && ( (left ? nr_pages : (nr_pages - 0x200)) > cur_pages) )
{
unsigned long count = nr_pages - cur_pages;
if ( count > 2048 )
+ {
count = 2048;
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, 4, 9, 0, &page_array[cur_pages]);
+ if ( rc != 0 )
+ {
+ PERROR("Cannot allocate more 2M pages for HVM guest.\n");
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
+ if ( rc != 0 )
+ {
+ PERROR("Could not allocate memory for HVM guest.\n");
+ goto error_out;
+ }
+ }
+ }
+ else
+ {
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
+
+ if ( rc != 0 )
+ {
+ PERROR("Could not allocate memory for HVM guest.\n");
+ goto error_out;
+ }
+ }
+
+ cur_pages += count;
+ }
+
+ if ( !left )
rc = xc_domain_memory_populate_physmap(
- xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
- cur_pages += count;
- }
+ xc_handle, dom, nr_pages - cur_pages, 0, 0, &page_array[cur_pages]);
+
if ( rc != 0 )
{
PERROR("Could not allocate memory for HVM guest.\n");
[-- Attachment #3: super_page_common.patch --]
[-- Type: application/octet-stream, Size: 15687 bytes --]
diff -r 26b88953b0c8 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c Wed May 07 23:13:22 2008 +0800
+++ b/xen/arch/ia64/xen/mm.c Thu May 08 00:35:05 2008 +0800
@@ -2415,7 +2415,7 @@ steal_page(struct domain *d, struct page
int
guest_physmap_add_page(struct domain *d, unsigned long gpfn,
- unsigned long mfn)
+ unsigned long mfn, int order)
{
BUG_ON(!mfn_valid(mfn));
BUG_ON(mfn_to_page(mfn)->count_info != (PGC_allocated | 1));
@@ -2432,7 +2432,7 @@ guest_physmap_add_page(struct domain *d,
void
guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
- unsigned long mfn)
+ unsigned long mfn, int order)
{
BUG_ON(mfn == 0);//XXX
zap_domain_page_one(d, gpfn << PAGE_SHIFT, 0, mfn);
@@ -2838,7 +2838,7 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
if (prev_mfn && mfn_valid(prev_mfn)) {
if (is_xen_heap_mfn(prev_mfn))
/* Xen heap frames are simply unhooked from this phys slot. */
- guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
+ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0);
else
/* Normal domain memory is freed, to avoid leaking memory. */
guest_remove_page(d, xatp.gpfn);
@@ -2847,10 +2847,10 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
/* Unmap from old location, if any. */
gpfn = get_gpfn_from_mfn(mfn);
if (gpfn != INVALID_M2P_ENTRY)
- guest_physmap_remove_page(d, gpfn, mfn);
+ guest_physmap_remove_page(d, gpfn, mfn, 0);
/* Map at new location. */
- guest_physmap_add_page(d, xatp.gpfn, mfn);
+ guest_physmap_add_page(d, xatp.gpfn, mfn, 0);
out:
domain_unlock(d);
diff -r 26b88953b0c8 xen/arch/powerpc/mm.c
--- a/xen/arch/powerpc/mm.c Wed May 07 23:13:22 2008 +0800
+++ b/xen/arch/powerpc/mm.c Thu May 08 00:35:05 2008 +0800
@@ -591,7 +591,7 @@ void guest_physmap_add_page(
}
void guest_physmap_remove_page(
- struct domain *d, unsigned long gpfn, unsigned long mfn)
+ struct domain *d, unsigned long gpfn, unsigned long mfn, int order)
{
if (page_get_owner(mfn_to_page(mfn)) != d) {
printk("Won't unmap foreign MFN 0x%lx for DOM%d\n", mfn, d->domain_id);
diff -r 26b88953b0c8 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed May 07 23:13:22 2008 +0800
+++ b/xen/arch/x86/mm.c Thu May 08 00:35:05 2008 +0800
@@ -3310,7 +3310,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
{
if ( is_xen_heap_mfn(prev_mfn) )
/* Xen heap frames are simply unhooked from this phys slot. */
- guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
+ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0);
else
/* Normal domain memory is freed, to avoid leaking memory. */
guest_remove_page(d, xatp.gpfn);
@@ -3319,10 +3319,10 @@ long arch_memory_op(int op, XEN_GUEST_HA
/* Unmap from old location, if any. */
gpfn = get_gpfn_from_mfn(mfn);
if ( gpfn != INVALID_M2P_ENTRY )
- guest_physmap_remove_page(d, gpfn, mfn);
+ guest_physmap_remove_page(d, gpfn, mfn, 0);
/* Map at new location. */
- guest_physmap_add_page(d, xatp.gpfn, mfn);
+ guest_physmap_add_page(d, xatp.gpfn, mfn, 0);
domain_unlock(d);
diff -r 26b88953b0c8 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Wed May 07 23:13:22 2008 +0800
+++ b/xen/arch/x86/mm/p2m.c Thu May 08 01:17:02 2008 +0800
@@ -204,7 +204,7 @@ p2m_next_level(struct domain *d, mfn_t *
// Returns 0 on error (out of memory)
static int
-p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
+p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, int order, p2m_type_t p2mt)
{
// XXX -- this might be able to be faster iff current->domain == d
mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
@@ -433,9 +433,9 @@ void p2m_change_entry_type_global(struct
}
static inline
-int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
-{
- return d->arch.p2m->set_entry(d, gfn, mfn, p2mt);
+int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, int order, p2m_type_t p2mt)
+{
+ return d->arch.p2m->set_entry(d, gfn, mfn, order, p2mt);
}
// Allocate a new p2m table for a domain.
@@ -498,7 +498,7 @@ int p2m_alloc_table(struct domain *d,
P2M_PRINTK("populating p2m table\n");
/* Initialise physmap tables for slot zero. Other code assumes this. */
- if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), p2m_invalid) )
+ if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), 0, p2m_invalid) )
goto error;
/* Copy all existing mappings from the page list and m2p */
@@ -517,7 +517,7 @@ int p2m_alloc_table(struct domain *d,
(gfn != 0x55555555L)
#endif
&& gfn != INVALID_M2P_ENTRY
- && !set_p2m_entry(d, gfn, mfn, p2m_ram_rw) )
+ && !set_p2m_entry(d, gfn, mfn, 0, p2m_ram_rw) )
goto error;
}
@@ -750,30 +750,32 @@ static void audit_p2m(struct domain *d)
static void
-p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn)
+p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
+ int order )
{
if ( !paging_mode_translate(d) )
return;
P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
- set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid);
+ set_p2m_entry(d, gfn, _mfn(INVALID_MFN), order, p2m_invalid);
set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
}
void
guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn)
+ unsigned long mfn, int order )
{
p2m_lock(d->arch.p2m);
audit_p2m(d);
- p2m_remove_page(d, gfn, mfn);
+ for ( int i = 0; i < ( 1<< order); i++ )
+ p2m_remove_page(d, gfn+i, mfn+i, order);
audit_p2m(d);
p2m_unlock(d->arch.p2m);
}
int
guest_physmap_add_entry(struct domain *d, unsigned long gfn,
- unsigned long mfn, p2m_type_t t)
+ unsigned long mfn, int order, p2m_type_t t)
{
unsigned long ogfn;
p2m_type_t ot;
@@ -831,13 +833,13 @@ guest_physmap_add_entry(struct domain *d
P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
ogfn , mfn_x(omfn));
if ( mfn_x(omfn) == mfn )
- p2m_remove_page(d, ogfn, mfn);
+ p2m_remove_page(d, ogfn, mfn, order);
}
}
if ( mfn_valid(_mfn(mfn)) )
{
- if ( !set_p2m_entry(d, gfn, _mfn(mfn), t) )
+ if ( !set_p2m_entry(d, gfn, _mfn(mfn), order, t) )
rc = -EINVAL;
set_gpfn_from_mfn(mfn, gfn);
}
@@ -845,7 +847,7 @@ guest_physmap_add_entry(struct domain *d
{
gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
gfn, mfn);
- if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid) )
+ if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), order, p2m_invalid) )
rc = -EINVAL;
}
@@ -967,7 +969,7 @@ p2m_type_t p2m_change_type(struct domain
mfn = gfn_to_mfn(d, gfn, &pt);
if ( pt == ot )
- set_p2m_entry(d, gfn, mfn, nt);
+ set_p2m_entry(d, gfn, mfn, 0, nt);
p2m_unlock(d->arch.p2m);
@@ -991,7 +993,7 @@ set_mmio_p2m_entry(struct domain *d, uns
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
}
- rc = set_p2m_entry(d, gfn, mfn, p2m_mmio_direct);
+ rc = set_p2m_entry(d, gfn, mfn, 0, p2m_mmio_direct);
if ( 0 == rc )
gdprintk(XENLOG_ERR,
"set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
@@ -1015,7 +1017,7 @@ clear_mmio_p2m_entry(struct domain *d, u
"clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
return 0;
}
- rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
+ rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0, 0);
return rc;
}
diff -r 26b88953b0c8 xen/common/grant_table.c
--- a/xen/common/grant_table.c Wed May 07 23:13:22 2008 +0800
+++ b/xen/common/grant_table.c Thu May 08 00:35:05 2008 +0800
@@ -1159,7 +1159,7 @@ gnttab_transfer(
spin_lock(&e->grant_table->lock);
sha = &shared_entry(e->grant_table, gop.ref);
- guest_physmap_add_page(e, sha->frame, mfn);
+ guest_physmap_add_page(e, sha->frame, mfn, 0);
sha->frame = mfn;
wmb();
sha->flags |= GTF_transfer_completed;
diff -r 26b88953b0c8 xen/common/memory.c
--- a/xen/common/memory.c Wed May 07 23:13:22 2008 +0800
+++ b/xen/common/memory.c Thu May 08 00:35:05 2008 +0800
@@ -109,8 +109,11 @@ static void populate_physmap(struct memo
goto out;
}
- if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
- goto out;
+ if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i << a->extent_order, 1)) )
+ {
+ printk("copy_from_guest failed.\n");
+ goto out;
+ }
page = alloc_domheap_pages(
d, a->extent_order, a->memflags | MEMF_node(node));
@@ -126,11 +129,7 @@ static void populate_physmap(struct memo
mfn = page_to_mfn(page);
if ( unlikely(paging_mode_translate(d)) )
- {
- for ( j = 0; j < (1 << a->extent_order); j++ )
- if ( guest_physmap_add_page(d, gpfn + j, mfn + j) )
- goto out;
- }
+ guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
else
{
for ( j = 0; j < (1 << a->extent_order); j++ )
@@ -172,7 +171,7 @@ int guest_remove_page(struct domain *d,
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
- guest_physmap_remove_page(d, gmfn, mfn);
+ guest_physmap_remove_page(d, gmfn, mfn, 0);
put_page(page);
@@ -419,7 +418,7 @@ static long memory_exchange(XEN_GUEST_HA
if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
BUG();
mfn = page_to_mfn(page);
- guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
+ guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn, 0);
put_page(page);
}
@@ -441,8 +440,7 @@ static long memory_exchange(XEN_GUEST_HA
if ( unlikely(paging_mode_translate(d)) )
{
/* Ignore failure here. There's nothing we can do. */
- for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
- (void)guest_physmap_add_page(d, gpfn + k, mfn + k);
+ (void)guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
}
else
{
diff -r 26b88953b0c8 xen/include/asm-ia64/grant_table.h
--- a/xen/include/asm-ia64/grant_table.h Wed May 07 23:13:22 2008 +0800
+++ b/xen/include/asm-ia64/grant_table.h Thu May 08 00:35:05 2008 +0800
@@ -13,7 +13,7 @@ int replace_grant_host_mapping(unsigned
int replace_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, unsigned long new_gpaddr, unsigned int flags);
// for grant transfer
-int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
+int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn, int order);
/* XXX
* somewhere appropriate
diff -r 26b88953b0c8 xen/include/asm-ia64/shadow.h
--- a/xen/include/asm-ia64/shadow.h Wed May 07 23:13:22 2008 +0800
+++ b/xen/include/asm-ia64/shadow.h Thu May 08 00:35:05 2008 +0800
@@ -40,8 +40,10 @@
* Utilities to change relationship of gpfn->mfn for designated domain,
* which is required by gnttab transfer, balloon, device model and etc.
*/
-int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
-void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
+int guest_physmap_add_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn, int order);
+void guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn, int order);
static inline int
shadow_mode_enabled(struct domain *d)
diff -r 26b88953b0c8 xen/include/asm-powerpc/mm.h
--- a/xen/include/asm-powerpc/mm.h Wed May 07 23:13:22 2008 +0800
+++ b/xen/include/asm-powerpc/mm.h Thu May 08 00:35:05 2008 +0800
@@ -278,9 +278,9 @@ extern int guest_physmap_max_mem_pages(s
extern int guest_physmap_max_mem_pages(struct domain *d, unsigned long new_max);
extern void guest_physmap_add_page(
- struct domain *d, unsigned long gpfn, unsigned long mfn);
+ struct domain *d, unsigned long gpfn, unsigned long mfn, int order);
extern void guest_physmap_remove_page(
- struct domain *d, unsigned long gpfn, unsigned long mfn);
+ struct domain *d, unsigned long gpfn, unsigned long mfn, int order);
#endif
diff -r 26b88953b0c8 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Wed May 07 23:13:22 2008 +0800
+++ b/xen/include/asm-x86/p2m.h Thu May 08 19:52:57 2008 +0800
@@ -102,7 +102,7 @@ struct p2m_domain {
void (*free_page )(struct domain *d,
struct page_info *pg);
int (*set_entry )(struct domain *d, unsigned long gfn,
- mfn_t mfn, p2m_type_t p2mt);
+ mfn_t mfn, int order, p2m_type_t p2mt);
mfn_t (*get_entry )(struct domain *d, unsigned long gfn,
p2m_type_t *p2mt);
mfn_t (*get_entry_current)(unsigned long gfn,
@@ -203,21 +203,32 @@ void p2m_final_teardown(struct domain *d
/* Add a page to a domain's p2m table */
int guest_physmap_add_entry(struct domain *d, unsigned long gfn,
- unsigned long mfn, p2m_type_t t);
+ unsigned long mfn, int order, p2m_type_t t);
/* Untyped version for RAM only, for compatibility
*
* Return 0 for success
*/
static inline int guest_physmap_add_page(struct domain *d, unsigned long gfn,
- unsigned long mfn)
-{
- return guest_physmap_add_entry(d, gfn, mfn, p2m_ram_rw);
+ unsigned long mfn, int order)
+{
+ int ret;
+
+ for ( int i = 0; i < (1 << order); i++ )
+ {
+ ret = guest_physmap_add_entry(d, gfn+i, mfn+i, order, p2m_ram_rw);
+ if ( ret != 0 )
+ break;
+ }
+
+ /* TODO: fix exit path when failure */
+
+ return ret;
}
/* Remove a page from a domain's p2m table */
void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn);
+ unsigned long mfn, int order);
/* Change types across all p2m entries in a domain */
void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
diff -r 26b88953b0c8 xen/include/xen/paging.h
--- a/xen/include/xen/paging.h Wed May 07 23:13:22 2008 +0800
+++ b/xen/include/xen/paging.h Thu May 08 00:35:05 2008 +0800
@@ -18,8 +18,8 @@
#else
#define paging_mode_translate(d) (0)
-#define guest_physmap_add_page(d, p, m) (0)
-#define guest_physmap_remove_page(d, p, m) ((void)0)
+#define guest_physmap_add_page(d, p, m, order) (0)
+#define guest_physmap_remove_page(d, p, m, order) ((void)0)
#endif
[-- Attachment #4: p2m-ept-file.patch --]
[-- Type: application/octet-stream, Size: 9705 bytes --]
diff -r bc9cf015d722 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c Fri May 09 01:46:33 2008 +0800
+++ b/xen/arch/x86/mm/hap/p2m-ept.c Sat May 10 01:16:31 2008 +0800
@@ -20,6 +20,7 @@
#include <xen/domain_page.h>
#include <xen/sched.h>
#include <asm/current.h>
+#include <asm/paging.h>
#include <asm/types.h>
#include <asm/domain.h>
#include <asm/p2m.h>
@@ -46,6 +47,9 @@ static void ept_p2m_type_to_flags(ept_en
}
}
+#define GUEST_TABLE_NORMAL_PAGE 1
+#define GUEST_TABLE_SUPER_PAGE 2
+
static int ept_next_level(struct domain *d, bool_t read_only,
ept_entry_t **table, unsigned long *gfn_remainder,
u32 shift)
@@ -54,7 +58,6 @@ static int ept_next_level(struct domain
u32 index;
index = *gfn_remainder >> shift;
- *gfn_remainder &= (1UL << shift) - 1;
ept_entry = (*table) + index;
@@ -83,31 +86,53 @@ static int ept_next_level(struct domain
ept_entry->r = ept_entry->w = ept_entry->x = 1;
}
- next = map_domain_page(ept_entry->mfn);
- unmap_domain_page(*table);
- *table = next;
-
- return 1;
+ if ( !ept_entry->sp_avail )
+ {
+ *gfn_remainder &= (1UL << shift) - 1;
+ next = map_domain_page(ept_entry->mfn);
+ unmap_domain_page(*table);
+ *table = next;
+ return GUEST_TABLE_NORMAL_PAGE;
+ }
+ else
+ return GUEST_TABLE_SUPER_PAGE;
}
static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
-{
- ept_entry_t *table =
- map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
- unsigned long gfn_remainder = gfn;
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, int order,
+ p2m_type_t p2mt)
+{
+ ept_entry_t *table = NULL;
+ unsigned long gfn_remainder = gfn, offset = 0;
ept_entry_t *ept_entry = NULL;
u32 index;
- int i, rv = 0;
+ int i, rv = 0, ret = 0;
+ int walk_level = order / EPT_TABLE_ORDER;
/* Should check if gfn obeys GAW here */
- for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
- if ( !ept_next_level(d, 0, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER) )
+ if ( order != 0 )
+ if ( (gfn & ((1UL << order) - 1)) )
+ return 1;
+
+ table = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+
+ ASSERT(table != NULL);
+
+ for ( i = EPT_DEFAULT_GAW; i > walk_level; i-- )
+ {
+ ret = ept_next_level(d, 0, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER);
+ if ( !ret )
goto out;
-
- index = gfn_remainder;
+ else if ( ret == GUEST_TABLE_SUPER_PAGE )
+ break;
+ }
+
+ index = gfn_remainder >> ( i ? (i * EPT_TABLE_ORDER): order);
+ walk_level = ( i ? ( i * EPT_TABLE_ORDER) : order) / EPT_TABLE_ORDER;
+ offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1)));
+
ept_entry = table + index;
if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
@@ -117,9 +142,20 @@ ept_set_entry(struct domain *d, unsigned
d->arch.p2m->max_mapped_pfn = gfn;
ept_entry->emt = EPT_DEFAULT_MT;
- ept_entry->sp_avail = 0;
+ ept_entry->sp_avail = walk_level ? 1 : 0;
+
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ ept_entry->mfn = mfn_x(mfn) - offset;
+ if ( ept_entry->avail1 == p2m_ram_logdirty &&
+ p2mt == p2m_ram_rw )
+ for ( i = 0; i < 512; i++ )
+ paging_mark_dirty(d, mfn_x(mfn)-offset+i);
+ }
+ else
+ ept_entry->mfn = mfn_x(mfn);
+
ept_entry->avail1 = p2mt;
- ept_entry->mfn = mfn_x(mfn);
ept_entry->rsvd = 0;
ept_entry->avail2 = 0;
/* last step */
@@ -132,14 +168,42 @@ ept_set_entry(struct domain *d, unsigned
/* Success */
rv = 1;
- out:
+out:
unmap_domain_page(table);
ept_sync_domain(d);
+ /* Now the p2m table is not shared with vt-d page table */
+
+ if ( iommu_enabled && is_hvm_domain(d) )
+ {
+ if ( p2mt == p2m_ram_rw )
+ {
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ for ( i = 0; i < 512; i++ )
+ iommu_map_page(d, gfn-offset+i, mfn_x(mfn)-offset+i);
+ }
+ else if ( ret )
+ iommu_map_page(d, gfn, mfn_x(mfn));
+ }
+ else
+ {
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ for ( i = 0; i < 512; i++ )
+ iommu_unmap_page(d, gfn-offset+i);
+ }
+ else if ( ret )
+ iommu_unmap_page(d, gfn);
+ }
+ }
+
+#ifdef P2M_SHARE_WITH_VTD_PAGE_TABLE
/* If p2m table is shared with vtd page-table. */
if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) )
iommu_flush(d, gfn, (u64*)ept_entry);
+#endif
return rv;
}
@@ -152,7 +216,7 @@ static mfn_t ept_get_entry(struct domain
unsigned long gfn_remainder = gfn;
ept_entry_t *ept_entry;
u32 index;
- int i;
+ int i, ret=0;
mfn_t mfn = _mfn(INVALID_MFN);
*t = p2m_mmio_dm;
@@ -164,17 +228,31 @@ static mfn_t ept_get_entry(struct domain
/* Should check if gfn obeys GAW here. */
for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
- if ( !ept_next_level(d, 1, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER) )
+ {
+ ret = ept_next_level(d, 1, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER);
+ if ( !ret )
goto out;
-
- index = gfn_remainder;
+ else if ( ret == GUEST_TABLE_SUPER_PAGE )
+ break;
+ }
+
+ index = gfn_remainder >> ( i * EPT_TABLE_ORDER);
ept_entry = table + index;
if ( ept_entry->avail1 != p2m_invalid )
{
*t = ept_entry->avail1;
mfn = _mfn(ept_entry->mfn);
+ if ( i )
+ {
+ /* we may meet super pages, and to split into 4k pages
+ * to emulate p2m table
+ */
+ unsigned long split_mfn =
+ mfn_x(mfn) + (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1 )));
+ mfn = _mfn(split_mfn);
+ }
}
out:
@@ -205,33 +283,63 @@ static void ept_change_entry_type_global
l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
for (i4 = 0; i4 < EPT_PAGETABLE_ENTRIES; i4++ )
{
- if ( !l4e[i4].epte || l4e[i4].sp_avail )
+ if ( !l4e[i4].epte )
continue;
- l3e = map_domain_page(l4e[i4].mfn);
- for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
- {
- if ( !l3e[i3].epte || l3e[i3].sp_avail )
+ if ( !l4e[i4].sp_avail )
+ {
+ l3e = map_domain_page(l4e[i4].mfn);
+ for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
+ {
+ if ( !l3e[i3].epte )
+ continue;
+ if ( !l3e[i3].sp_avail )
+ {
+ l2e = map_domain_page(l3e[i3].mfn);
+ for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
+ {
+ if ( !l2e[i2].epte )
+ continue;
+ if ( !l2e[i2].sp_avail )
+ {
+ l1e = map_domain_page(l2e[i2].mfn);
+ for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
+ {
+ if ( !l1e[i1].epte )
+ continue;
+ if ( l1e[i1].avail1 != ot )
+ continue;
+ l1e[i1].avail1 = nt;
+ ept_p2m_type_to_flags(l1e+i1, nt);
+ }
+ unmap_domain_page(l1e);
+ }
+ else
+ {
+ if ( l2e[i2].avail1 != ot )
+ continue;
+ l2e[i2].avail1 = nt;
+ ept_p2m_type_to_flags(l2e+i2, nt);
+ }
+ }
+ unmap_domain_page(l2e);
+ }
+ else
+ {
+ if ( l3e[i3].avail1 != ot )
+ continue;
+ l3e[i3].avail1 = nt;
+ ept_p2m_type_to_flags(l3e+i3, nt);
+ }
+ }
+ unmap_domain_page(l3e);
+ }
+ else
+ {
+ if ( l4e[i4].avail1 != ot )
continue;
- l2e = map_domain_page(l3e[i3].mfn);
- for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
- {
- if ( !l2e[i2].epte || l2e[i2].sp_avail )
- continue;
- l1e = map_domain_page(l2e[i2].mfn);
- for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
- {
- if ( !l1e[i1].epte )
- continue;
- if ( l1e[i1].avail1 != ot )
- continue;
- l1e[i1].avail1 = nt;
- ept_p2m_type_to_flags(l1e+i1, nt);
- }
- unmap_domain_page(l1e);
- }
- unmap_domain_page(l2e);
- }
- unmap_domain_page(l3e);
+ l4e[i4].avail1 = nt;
+ ept_p2m_type_to_flags(l4e+i4, nt);
+ }
}
unmap_domain_page(l4e);
[-- Attachment #5: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH] patch to support super page (2M) with EPT
2008-05-09 9:10 Xin, Xiaohui
@ 2008-05-11 20:33 ` Huang2, Wei
2008-05-12 4:36 ` Huang2, Wei
0 siblings, 1 reply; 14+ messages in thread
From: Huang2, Wei @ 2008-05-11 20:33 UTC (permalink / raw)
To: Xin, Xiaohui, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 1292 bytes --]
Could we work together for a common solution? As far as I can see, it
largely overlaps with my super page patch. The major difference is
between p2m.c and p2m-ept.c.
-Wei
From: xen-devel-bounces@lists.xensource.com
[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Xin, Xiaohui
Sent: Friday, May 09, 2008 4:11 AM
To: xen-devel@lists.xensource.com
Subject: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Attached are the patches to support super page with EPT. We only support
2M size. And shadow may still work fine with 4K pages.
The patches can be split into 3 parts. Apply order is as attached.
tool.diff
To allocate 2M physical contiguous memory in guest except the first 2M
and the last 2M.
The first 2M covers special memory, and Xen use the last few pages in
guest memory to do special things.
We let them to be 4K pages as normal.
super_page_common.patch
To modify the p2m interfaces by adding an order parameter, such as
guest_physmap_add_page(), p2m_set_entry(), etc.
p2m-ept-file.patch
To handle the EPT tables to support super page.
Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
Signed-off-by: Li Xin, B <xin.b.li@intel.com>
[-- Attachment #1.2: Type: text/html, Size: 6329 bytes --]
[-- Attachment #2: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH] patch to support super page (2M) with EPT
2008-05-11 20:33 ` Huang2, Wei
@ 2008-05-12 4:36 ` Huang2, Wei
2008-05-12 5:04 ` Xin, Xiaohui
0 siblings, 1 reply; 14+ messages in thread
From: Huang2, Wei @ 2008-05-12 4:36 UTC (permalink / raw)
To: Xin, Xiaohui, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 2084 bytes --]
This is the latest one I created. Please review it and I will re-submit.
1. It includes the patch for p2m-ept.c, directly from your previous
patch.
2. Xc_hvm_create.c is based on my original approach. It includes
support for both 2MB and 4MB pages. Also it considers the case of odd
page size (such as 255MB). But I did not allocate the last 2MB area
using 4KB pages. Let me know if it is a big issue.
3. The rest are pretty similar.
Thanks,
-Wei
From: xen-devel-bounces@lists.xensource.com
[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Sunday, May 11, 2008 3:34 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with
EPT
Could we work together for a common solution? As far as I can see, it
largely overlaps with my super page patch. The major difference is
between p2m.c and p2m-ept.c.
-Wei
From: xen-devel-bounces@lists.xensource.com
[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Xin, Xiaohui
Sent: Friday, May 09, 2008 4:11 AM
To: xen-devel@lists.xensource.com
Subject: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Attached are the patches to support super page with EPT. We only support
2M size. And shadow may still work fine with 4K pages.
The patches can be split into 3 parts. Apply order is as attached.
tool.diff
To allocate 2M physical contiguous memory in guest except the first 2M
and the last 2M.
The first 2M covers special memory, and Xen use the last few pages in
guest memory to do special things.
We let them to be 4K pages as normal.
super_page_common.patch
To modify the p2m interfaces by adding an order parameter, such as
guest_physmap_add_page(), p2m_set_entry(), etc.
p2m-ept-file.patch
To handle the EPT tables to support super page.
Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
Signed-off-by: Li Xin, B <xin.b.li@intel.com>
[-- Attachment #1.2: Type: text/html, Size: 10340 bytes --]
[-- Attachment #2: super_page_patch.txt --]
[-- Type: text/plain, Size: 44486 bytes --]
diff -r 810d8c3ac992 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c Thu May 08 16:58:33 2008 +0100
+++ b/tools/libxc/xc_hvm_build.c Sun May 11 17:21:52 2008 -0500
@@ -157,8 +157,10 @@ static int setup_guest(int xc_handle,
char *image, unsigned long image_size)
{
xen_pfn_t *page_array = NULL;
+ xen_pfn_t *super_page_array = NULL;
unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
- unsigned long special_page_nr, entry_eip, cur_pages;
+ unsigned long nr_super_pages;
+ unsigned long special_page_nr, entry_eip, cur_pages, limit;
struct xen_add_to_physmap xatp;
struct shared_info *shared_info;
void *e820_page;
@@ -167,6 +169,8 @@ static int setup_guest(int xc_handle,
uint64_t v_start, v_end;
int rc;
xen_capabilities_info_t caps;
+ int super_page_shift;
+ int super_page_order;
/* An HVM guest must be initialised with at least 2MB memory. */
if ( memsize < 2 )
@@ -189,6 +193,15 @@ static int setup_guest(int xc_handle,
PERROR("Guest OS must load to a page boundary.\n");
goto error_out;
}
+
+ /* check for PAE support and setup page size shift appropriately */
+ if ( strstr(caps, "x86_32p") )
+ super_page_shift = 1;
+ else
+ super_page_shift = 2;
+
+ nr_super_pages = (unsigned long)memsize >> super_page_shift;
+ super_page_order = 9 + (super_page_shift - 1);
IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
" Loader: %016"PRIx64"->%016"PRIx64"\n"
@@ -198,7 +211,9 @@ static int setup_guest(int xc_handle,
v_start, v_end,
elf_uval(&elf, elf.ehdr, e_entry));
- if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
+ if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ||
+ (super_page_array =
+ malloc(nr_super_pages * sizeof(xen_pfn_t))) == NULL )
{
PERROR("Could not allocate memory.\n");
goto error_out;
@@ -206,26 +221,45 @@ static int setup_guest(int xc_handle,
for ( i = 0; i < nr_pages; i++ )
page_array[i] = i;
+ for ( i = 0; i < nr_super_pages; i++ )
+ super_page_array[i] = i << super_page_order;
for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
-
- /*
- * Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
- * We allocate pages in batches of no more than 2048 to ensure that
- * we can be preempted and hence dom0 remains responsive.
- */
+ for ( i = HVM_BELOW_4G_RAM_END >> (PAGE_SHIFT + super_page_order);
+ i < nr_super_pages; i++ )
+ super_page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
+
+
+ /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
rc = xc_domain_memory_populate_physmap(
xc_handle, dom, 0xa0, 0, 0, &page_array[0x00]);
- cur_pages = 0xc0;
- while ( (rc == 0) && (nr_pages > cur_pages) )
- {
- unsigned long count = nr_pages - cur_pages;
- if ( count > 2048 )
- count = 2048;
+ if ( rc == 0 )
rc = xc_domain_memory_populate_physmap(
- xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
+ xc_handle, dom, (0x200<<(super_page_shift-1)) - 0xc0, 0, 0,
+ &page_array[0xc0]);
+
+ /* We allocate pages in batches of no more than 8MB to ensure that
+ * we can be preempted and hence dom0 remains responsive.
+ */
+ limit = 4 / super_page_shift;
+ cur_pages = 1;
+ while ( (rc == 0) && (nr_super_pages > cur_pages) )
+ {
+ unsigned long count = nr_super_pages - cur_pages;
+ if ( count > limit )
+ count = limit;
+ rc = xc_domain_memory_populate_physmap(xc_handle, dom, count,
+ super_page_order, 0,
+ &super_page_array[cur_pages]);
cur_pages += count;
}
+
+ /* handle the case of odd number physical memory size (such as 255MB) */
+ if ( rc == 0 )
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, nr_pages - (nr_super_pages << super_page_order),
+ 0, 0, &page_array[nr_super_pages << super_page_order]);
+
if ( rc != 0 )
{
PERROR("Could not allocate memory for HVM guest.\n");
@@ -314,10 +348,12 @@ static int setup_guest(int xc_handle,
}
free(page_array);
+ free(super_page_array);
return 0;
error_out:
free(page_array);
+ free(super_page_array);
return -1;
}
diff -r 810d8c3ac992 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/ia64/xen/mm.c Sun May 11 17:29:52 2008 -0500
@@ -2415,7 +2415,7 @@ steal_page(struct domain *d, struct page
int
guest_physmap_add_page(struct domain *d, unsigned long gpfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
BUG_ON(!mfn_valid(mfn));
BUG_ON(mfn_to_page(mfn)->count_info != (PGC_allocated | 1));
@@ -2432,7 +2432,7 @@ guest_physmap_add_page(struct domain *d,
void
guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
BUG_ON(mfn == 0);//XXX
zap_domain_page_one(d, gpfn << PAGE_SHIFT, 0, mfn);
@@ -2838,7 +2838,7 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
if (prev_mfn && mfn_valid(prev_mfn)) {
if (is_xen_heap_mfn(prev_mfn))
/* Xen heap frames are simply unhooked from this phys slot. */
- guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
+ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0);
else
/* Normal domain memory is freed, to avoid leaking memory. */
guest_remove_page(d, xatp.gpfn);
@@ -2847,10 +2847,10 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
/* Unmap from old location, if any. */
gpfn = get_gpfn_from_mfn(mfn);
if (gpfn != INVALID_M2P_ENTRY)
- guest_physmap_remove_page(d, gpfn, mfn);
+ guest_physmap_remove_page(d, gpfn, mfn, 0);
/* Map at new location. */
- guest_physmap_add_page(d, xatp.gpfn, mfn);
+ guest_physmap_add_page(d, xatp.gpfn, mfn, 0);
out:
domain_unlock(d);
diff -r 810d8c3ac992 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/x86/mm.c Sun May 11 17:21:52 2008 -0500
@@ -3287,7 +3287,8 @@ long arch_memory_op(int op, XEN_GUEST_HA
{
if ( is_xen_heap_mfn(prev_mfn) )
/* Xen heap frames are simply unhooked from this phys slot. */
- guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
+ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn,
+ NORMAL_PAGE_ORDER);
else
/* Normal domain memory is freed, to avoid leaking memory. */
guest_remove_page(d, xatp.gpfn);
@@ -3296,10 +3297,10 @@ long arch_memory_op(int op, XEN_GUEST_HA
/* Unmap from old location, if any. */
gpfn = get_gpfn_from_mfn(mfn);
if ( gpfn != INVALID_M2P_ENTRY )
- guest_physmap_remove_page(d, gpfn, mfn);
+ guest_physmap_remove_page(d, gpfn, mfn, NORMAL_PAGE_ORDER);
/* Map at new location. */
- guest_physmap_add_page(d, xatp.gpfn, mfn);
+ guest_physmap_add_page(d, xatp.gpfn, mfn, NORMAL_PAGE_ORDER);
domain_unlock(d);
diff -r 810d8c3ac992 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/x86/mm/hap/p2m-ept.c Sun May 11 17:21:52 2008 -0500
@@ -20,6 +20,7 @@
#include <xen/domain_page.h>
#include <xen/sched.h>
#include <asm/current.h>
+#include <asm/paging.h>
#include <asm/types.h>
#include <asm/domain.h>
#include <asm/p2m.h>
@@ -46,6 +47,9 @@ static void ept_p2m_type_to_flags(ept_en
}
}
+#define GUEST_TABLE_NORMAL_PAGE 1
+#define GUEST_TABLE_SUPER_PAGE 2
+
static int ept_next_level(struct domain *d, bool_t read_only,
ept_entry_t **table, unsigned long *gfn_remainder,
u32 shift)
@@ -54,7 +58,6 @@ static int ept_next_level(struct domain
u32 index;
index = *gfn_remainder >> shift;
- *gfn_remainder &= (1UL << shift) - 1;
ept_entry = (*table) + index;
@@ -83,31 +86,53 @@ static int ept_next_level(struct domain
ept_entry->r = ept_entry->w = ept_entry->x = 1;
}
- next = map_domain_page(ept_entry->mfn);
- unmap_domain_page(*table);
- *table = next;
-
- return 1;
+ if ( !ept_entry->sp_avail )
+ {
+ *gfn_remainder &= (1UL << shift) - 1;
+ next = map_domain_page(ept_entry->mfn);
+ unmap_domain_page(*table);
+ *table = next;
+ return GUEST_TABLE_NORMAL_PAGE;
+ }
+ else
+ return GUEST_TABLE_SUPER_PAGE;
}
static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
-{
- ept_entry_t *table =
- map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
- unsigned long gfn_remainder = gfn;
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int order, p2m_type_t p2mt)
+{
+ ept_entry_t *table = NULL;
+ unsigned long gfn_remainder = gfn, offset = 0;
ept_entry_t *ept_entry = NULL;
u32 index;
- int i, rv = 0;
+ int i, rv = 0, ret = 0;
+ int walk_level = order / EPT_TABLE_ORDER;
/* Should check if gfn obeys GAW here */
- for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
- if ( !ept_next_level(d, 0, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER) )
+ if ( order != 0 )
+ if ( (gfn & ((1UL << order) - 1)) )
+ return 1;
+
+ table = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+
+ ASSERT(table != NULL);
+
+ for ( i = EPT_DEFAULT_GAW; i > walk_level; i-- )
+ {
+ ret = ept_next_level(d, 0, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER);
+ if ( !ret )
goto out;
-
- index = gfn_remainder;
+ else if ( ret == GUEST_TABLE_SUPER_PAGE )
+ break;
+ }
+
+ index = gfn_remainder >> ( i ? (i * EPT_TABLE_ORDER): order);
+ walk_level = ( i ? ( i * EPT_TABLE_ORDER) : order) / EPT_TABLE_ORDER;
+ offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1)));
+
ept_entry = table + index;
if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
@@ -117,9 +142,20 @@ ept_set_entry(struct domain *d, unsigned
d->arch.p2m->max_mapped_pfn = gfn;
ept_entry->emt = EPT_DEFAULT_MT;
- ept_entry->sp_avail = 0;
+ ept_entry->sp_avail = walk_level ? 1 : 0;
+
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ ept_entry->mfn = mfn_x(mfn) - offset;
+ if ( ept_entry->avail1 == p2m_ram_logdirty &&
+ p2mt == p2m_ram_rw )
+ for ( i = 0; i < 512; i++ )
+ paging_mark_dirty(d, mfn_x(mfn)-offset+i);
+ }
+ else
+ ept_entry->mfn = mfn_x(mfn);
+
ept_entry->avail1 = p2mt;
- ept_entry->mfn = mfn_x(mfn);
ept_entry->rsvd = 0;
ept_entry->avail2 = 0;
/* last step */
@@ -132,14 +168,42 @@ ept_set_entry(struct domain *d, unsigned
/* Success */
rv = 1;
- out:
+out:
unmap_domain_page(table);
ept_sync_domain(d);
+ /* Now the p2m table is not shared with vt-d page table */
+
+ if ( iommu_enabled && is_hvm_domain(d) )
+ {
+ if ( p2mt == p2m_ram_rw )
+ {
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ for ( i = 0; i < 512; i++ )
+ iommu_map_page(d, gfn-offset+i, mfn_x(mfn)-offset+i);
+ }
+ else if ( ret )
+ iommu_map_page(d, gfn, mfn_x(mfn));
+ }
+ else
+ {
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ for ( i = 0; i < 512; i++ )
+ iommu_unmap_page(d, gfn-offset+i);
+ }
+ else if ( ret )
+ iommu_unmap_page(d, gfn);
+ }
+ }
+
+#ifdef P2M_SHARE_WITH_VTD_PAGE_TABLE
/* If p2m table is shared with vtd page-table. */
if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) )
iommu_flush(d, gfn, (u64*)ept_entry);
+#endif
return rv;
}
@@ -152,7 +216,7 @@ static mfn_t ept_get_entry(struct domain
unsigned long gfn_remainder = gfn;
ept_entry_t *ept_entry;
u32 index;
- int i;
+ int i, ret=0;
mfn_t mfn = _mfn(INVALID_MFN);
*t = p2m_mmio_dm;
@@ -164,17 +228,31 @@ static mfn_t ept_get_entry(struct domain
/* Should check if gfn obeys GAW here. */
for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
- if ( !ept_next_level(d, 1, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER) )
+ {
+ ret = ept_next_level(d, 1, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER);
+ if ( !ret )
goto out;
-
- index = gfn_remainder;
+ else if ( ret == GUEST_TABLE_SUPER_PAGE )
+ break;
+ }
+
+ index = gfn_remainder >> ( i * EPT_TABLE_ORDER);
ept_entry = table + index;
if ( ept_entry->avail1 != p2m_invalid )
{
*t = ept_entry->avail1;
mfn = _mfn(ept_entry->mfn);
+ if ( i )
+ {
+ /* we may meet super pages, and to split into 4k pages
+ * to emulate p2m table
+ */
+ unsigned long split_mfn =
+ mfn_x(mfn) + (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1 )));
+ mfn = _mfn(split_mfn);
+ }
}
out:
@@ -205,33 +283,63 @@ static void ept_change_entry_type_global
l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
for (i4 = 0; i4 < EPT_PAGETABLE_ENTRIES; i4++ )
{
- if ( !l4e[i4].epte || l4e[i4].sp_avail )
+ if ( !l4e[i4].epte )
continue;
- l3e = map_domain_page(l4e[i4].mfn);
- for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
- {
- if ( !l3e[i3].epte || l3e[i3].sp_avail )
+ if ( !l4e[i4].sp_avail )
+ {
+ l3e = map_domain_page(l4e[i4].mfn);
+ for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
+ {
+ if ( !l3e[i3].epte )
+ continue;
+ if ( !l3e[i3].sp_avail )
+ {
+ l2e = map_domain_page(l3e[i3].mfn);
+ for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
+ {
+ if ( !l2e[i2].epte )
+ continue;
+ if ( !l2e[i2].sp_avail )
+ {
+ l1e = map_domain_page(l2e[i2].mfn);
+ for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
+ {
+ if ( !l1e[i1].epte )
+ continue;
+ if ( l1e[i1].avail1 != ot )
+ continue;
+ l1e[i1].avail1 = nt;
+ ept_p2m_type_to_flags(l1e+i1, nt);
+ }
+ unmap_domain_page(l1e);
+ }
+ else
+ {
+ if ( l2e[i2].avail1 != ot )
+ continue;
+ l2e[i2].avail1 = nt;
+ ept_p2m_type_to_flags(l2e+i2, nt);
+ }
+ }
+ unmap_domain_page(l2e);
+ }
+ else
+ {
+ if ( l3e[i3].avail1 != ot )
+ continue;
+ l3e[i3].avail1 = nt;
+ ept_p2m_type_to_flags(l3e+i3, nt);
+ }
+ }
+ unmap_domain_page(l3e);
+ }
+ else
+ {
+ if ( l4e[i4].avail1 != ot )
continue;
- l2e = map_domain_page(l3e[i3].mfn);
- for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
- {
- if ( !l2e[i2].epte || l2e[i2].sp_avail )
- continue;
- l1e = map_domain_page(l2e[i2].mfn);
- for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
- {
- if ( !l1e[i1].epte )
- continue;
- if ( l1e[i1].avail1 != ot )
- continue;
- l1e[i1].avail1 = nt;
- ept_p2m_type_to_flags(l1e+i1, nt);
- }
- unmap_domain_page(l1e);
- }
- unmap_domain_page(l2e);
- }
- unmap_domain_page(l3e);
+ l4e[i4].avail1 = nt;
+ ept_p2m_type_to_flags(l4e+i4, nt);
+ }
}
unmap_domain_page(l4e);
diff -r 810d8c3ac992 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/x86/mm/p2m.c Sun May 11 17:21:52 2008 -0500
@@ -151,9 +151,11 @@ p2m_next_level(struct domain *d, mfn_t *
unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
u32 max, unsigned long type)
{
+ l1_pgentry_t *l1_entry;
l1_pgentry_t *p2m_entry;
l1_pgentry_t new_entry;
void *next;
+ int i;
ASSERT(d->arch.p2m->alloc_page);
if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
@@ -194,6 +196,44 @@ p2m_next_level(struct domain *d, mfn_t *
break;
}
}
+
+ ASSERT(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT);
+
+ /* split single large page into 4KB page in P2M table */
+ if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+ {
+ unsigned long flags, pfn;
+ struct page_info *pg = d->arch.p2m->alloc_page(d);
+ if ( pg == NULL )
+ return 0;
+ list_add_tail(&pg->list, &d->arch.p2m->pages);
+ pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
+ pg->count_info = 1;
+
+ /* New splintered mappings inherit the flags of the old superpage,
+ * with a little reorganisation for the _PAGE_PSE_PAT bit. */
+ flags = l1e_get_flags(*p2m_entry);
+ pfn = l1e_get_pfn(*p2m_entry);
+ if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
+ pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
+ else
+ flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
+
+ l1_entry = map_domain_page(mfn_x(page_to_mfn(pg)));
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ {
+ new_entry = l1e_from_pfn(pfn + i, flags);
+ paging_write_p2m_entry(d, gfn,
+ l1_entry+i, *table_mfn, new_entry, 1);
+ }
+ unmap_domain_page(l1_entry);
+
+ new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
+ __PAGE_HYPERVISOR|_PAGE_USER);
+ paging_write_p2m_entry(d, gfn,
+ p2m_entry, *table_mfn, new_entry, 2);
+ }
+
*table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
next = map_domain_page(mfn_x(*table_mfn));
unmap_domain_page(*table);
@@ -204,7 +244,8 @@ p2m_next_level(struct domain *d, mfn_t *
// Returns 0 on error (out of memory)
static int
-p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
+p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int page_order, p2m_type_t p2mt)
{
// XXX -- this might be able to be faster iff current->domain == d
mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
@@ -212,6 +253,7 @@ p2m_set_entry(struct domain *d, unsigned
unsigned long gfn_remainder = gfn;
l1_pgentry_t *p2m_entry;
l1_pgentry_t entry_content;
+ l2_pgentry_t l2e_content;
int rv=0;
#if CONFIG_PAGING_LEVELS >= 4
@@ -235,26 +277,53 @@ p2m_set_entry(struct domain *d, unsigned
PGT_l2_page_table) )
goto out;
- if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
- L2_PAGETABLE_SHIFT - PAGE_SHIFT,
- L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
- goto out;
-
- p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
- 0, L1_PAGETABLE_ENTRIES);
- ASSERT(p2m_entry);
+ if ( page_order == NORMAL_PAGE_ORDER )
+ {
+ if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+ L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+ L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
+ goto out;
+
+ p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+ 0, L1_PAGETABLE_ENTRIES);
+ ASSERT(p2m_entry);
+
+ if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
+ entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
+ else
+ entry_content = l1e_empty();
+
+ /* level 1 entry */
+ paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
+ }
+ else
+ {
+ p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+ L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+ L2_PAGETABLE_ENTRIES);
+ ASSERT(p2m_entry);
+
+ if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
+ !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+ {
+ P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
+ domain_crash(d);
+ goto out;
+ }
+
+ if ( mfn_valid(mfn) )
+ l2e_content = l2e_from_pfn(mfn_x(mfn),
+ p2m_type_to_flags(p2mt) | _PAGE_PSE);
+ else
+ l2e_content = l2e_empty();
+
+ entry_content.l1 = l2e_content.l2;
+ paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2);
+ }
/* Track the highest gfn for which we have ever had a valid mapping */
if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
d->arch.p2m->max_mapped_pfn = gfn;
-
- if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
- entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
- else
- entry_content = l1e_empty();
-
- /* level 1 entry */
- paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
if ( iommu_enabled && is_hvm_domain(d) )
{
@@ -335,6 +404,16 @@ p2m_gfn_to_mfn(struct domain *d, unsigne
unmap_domain_page(l2e);
return _mfn(INVALID_MFN);
}
+ else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
+ {
+ mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
+ *t = p2m_flags_to_type(l2e_get_flags(*l2e));
+ unmap_domain_page(l2e);
+
+ ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+ return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
+ }
+
mfn = _mfn(l2e_get_pfn(*l2e));
unmap_domain_page(l2e);
@@ -358,6 +437,7 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
{
mfn_t mfn = _mfn(INVALID_MFN);
p2m_type_t p2mt = p2m_mmio_dm;
+ paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
/* XXX This is for compatibility with the old model, where anything not
* XXX marked as RAM was considered to be emulated MMIO space.
* XXX Once we start explicitly registering MMIO regions in the p2m
@@ -366,25 +446,44 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
{
l1_pgentry_t l1e = l1e_empty();
+ l2_pgentry_t l2e = l2e_empty();
int ret;
ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
/ sizeof(l1_pgentry_t));
- /* Need to __copy_from_user because the p2m is sparse and this
- * part might not exist */
- ret = __copy_from_user(&l1e,
- &phys_to_machine_mapping[gfn],
- sizeof(l1e));
-
- if ( ret == 0 ) {
- p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
- ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+ ret = __copy_from_user(&l2e,
+ &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) + l2_linear_offset(addr)],
+ sizeof(l2e));
+
+ if ( (ret == 0) && (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
+ (l2e_get_flags(l2e) & _PAGE_PSE) )
+ {
+ p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
+ ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
if ( p2m_is_valid(p2mt) )
- mfn = _mfn(l1e_get_pfn(l1e));
- else
- /* XXX see above */
+ mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
+ else
p2mt = p2m_mmio_dm;
+ }
+ else
+ {
+
+ /* Need to __copy_from_user because the p2m is sparse and this
+ * part might not exist */
+ ret = __copy_from_user(&l1e,
+ &phys_to_machine_mapping[gfn],
+ sizeof(l1e));
+
+ if ( ret == 0 ) {
+ p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+ ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+ if ( p2m_is_valid(p2mt) )
+ mfn = _mfn(l1e_get_pfn(l1e));
+ else
+ /* XXX see above */
+ p2mt = p2m_mmio_dm;
+ }
}
}
@@ -430,9 +529,10 @@ void p2m_change_entry_type_global(struct
}
static inline
-int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
-{
- return d->arch.p2m->set_entry(d, gfn, mfn, p2mt);
+int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int page_order, p2m_type_t p2mt)
+{
+ return d->arch.p2m->set_entry(d, gfn, mfn, page_order, p2mt);
}
// Allocate a new p2m table for a domain.
@@ -493,7 +593,8 @@ int p2m_alloc_table(struct domain *d,
P2M_PRINTK("populating p2m table\n");
/* Initialise physmap tables for slot zero. Other code assumes this. */
- if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), p2m_invalid) )
+ if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), NORMAL_PAGE_ORDER,
+ p2m_invalid) )
goto error;
/* Copy all existing mappings from the page list and m2p */
@@ -512,7 +613,7 @@ int p2m_alloc_table(struct domain *d,
(gfn != 0x55555555L)
#endif
&& gfn != INVALID_M2P_ENTRY
- && !set_p2m_entry(d, gfn, mfn, p2m_ram_rw) )
+ && !set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, p2m_ram_rw) )
goto error;
}
@@ -688,6 +789,28 @@ static void audit_p2m(struct domain *d)
gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
continue;
}
+
+ /* check for super page */
+ if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
+ {
+ mfn = l2e_get_pfn(l2e[i2]);
+ ASSERT(mfn_valid(_mfn(mfn)));
+ for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
+ {
+ m2pfn = get_gpfn_from_mfn(mfn+i1);
+ if ( m2pfn != (gfn + i) )
+ {
+ pmbad++;
+ P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
+ " -> gfn %#lx\n", gfn+i, mfn+i,
+ m2pfn);
+ BUG();
+ }
+ }
+ gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
+ continue;
+ }
+
l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
@@ -737,35 +860,40 @@ static void audit_p2m(struct domain *d)
static void
-p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn)
-{
+p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
+ unsigned int page_order)
+{
+ int i;
if ( !paging_mode_translate(d) )
return;
P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
- set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid);
- set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+ set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
}
void
guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
p2m_lock(d->arch.p2m);
audit_p2m(d);
- p2m_remove_page(d, gfn, mfn);
+ p2m_remove_page(d, gfn, mfn, page_order);
audit_p2m(d);
p2m_unlock(d->arch.p2m);
}
int
guest_physmap_add_entry(struct domain *d, unsigned long gfn,
- unsigned long mfn, p2m_type_t t)
+ unsigned long mfn, unsigned int page_order,
+ p2m_type_t t)
{
unsigned long ogfn;
p2m_type_t ot;
mfn_t omfn;
int rc = 0;
+ int i;
if ( !paging_mode_translate(d) )
return -EINVAL;
@@ -795,7 +923,8 @@ guest_physmap_add_entry(struct domain *d
if ( p2m_is_ram(ot) )
{
ASSERT(mfn_valid(omfn));
- set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn_x(omfn)+i, INVALID_M2P_ENTRY);
}
ogfn = mfn_to_gfn(d, _mfn(mfn));
@@ -818,21 +947,23 @@ guest_physmap_add_entry(struct domain *d
P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
ogfn , mfn_x(omfn));
if ( mfn_x(omfn) == mfn )
- p2m_remove_page(d, ogfn, mfn);
+ p2m_remove_page(d, ogfn, mfn, page_order);
}
}
if ( mfn_valid(_mfn(mfn)) )
{
- if ( !set_p2m_entry(d, gfn, _mfn(mfn), t) )
+ if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
rc = -EINVAL;
- set_gpfn_from_mfn(mfn, gfn);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn+i, gfn+i);
}
else
{
gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
gfn, mfn);
- if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid) )
+ if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order,
+ p2m_invalid) )
rc = -EINVAL;
}
@@ -851,7 +982,7 @@ void p2m_change_type_global(struct domai
l1_pgentry_t l1e_content;
l1_pgentry_t *l1e;
l2_pgentry_t *l2e;
- mfn_t l1mfn;
+ mfn_t l1mfn, l2mfn;
int i1, i2;
l3_pgentry_t *l3e;
int i3;
@@ -891,11 +1022,26 @@ void p2m_change_type_global(struct domai
{
continue;
}
+ l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
{
if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
{
+ continue;
+ }
+
+ if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) )
+ {
+ flags = l2e_get_flags(l2e[i2]);
+ if ( p2m_flags_to_type(flags) != ot )
+ continue;
+ mfn = l2e_get_pfn(l2e[i2]);
+ gfn = get_gpfn_from_mfn(mfn);
+ flags = p2m_flags_to_type(nt);
+ l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
+ paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2],
+ l2mfn, l1e_content, 2);
continue;
}
@@ -944,7 +1090,7 @@ p2m_type_t p2m_change_type(struct domain
mfn = gfn_to_mfn(d, gfn, &pt);
if ( pt == ot )
- set_p2m_entry(d, gfn, mfn, nt);
+ set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, nt);
p2m_unlock(d->arch.p2m);
@@ -968,7 +1114,7 @@ set_mmio_p2m_entry(struct domain *d, uns
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
}
- rc = set_p2m_entry(d, gfn, mfn, p2m_mmio_direct);
+ rc = set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, p2m_mmio_direct);
if ( 0 == rc )
gdprintk(XENLOG_ERR,
"set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
@@ -992,7 +1138,7 @@ clear_mmio_p2m_entry(struct domain *d, u
"clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
return 0;
}
- rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
+ rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), NORMAL_PAGE_ORDER, 0);
return rc;
}
diff -r 810d8c3ac992 xen/common/grant_table.c
--- a/xen/common/grant_table.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/common/grant_table.c Sun May 11 17:21:52 2008 -0500
@@ -1159,7 +1159,7 @@ gnttab_transfer(
spin_lock(&e->grant_table->lock);
sha = &shared_entry(e->grant_table, gop.ref);
- guest_physmap_add_page(e, sha->frame, mfn);
+ guest_physmap_add_page(e, sha->frame, mfn, NORMAL_PAGE_ORDER);
sha->frame = mfn;
wmb();
sha->flags |= GTF_transfer_completed;
diff -r 810d8c3ac992 xen/common/memory.c
--- a/xen/common/memory.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/common/memory.c Sun May 11 17:21:52 2008 -0500
@@ -114,34 +114,60 @@ static void populate_physmap(struct memo
page = alloc_domheap_pages(
d, a->extent_order, a->memflags | MEMF_node(node));
- if ( unlikely(page == NULL) )
- {
- gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
- "id=%d memflags=%x (%ld of %d)\n",
- a->extent_order, d->domain_id, a->memflags,
- i, a->nr_extents);
- goto out;
- }
-
- mfn = page_to_mfn(page);
-
- if ( unlikely(paging_mode_translate(d)) )
- {
+
+ if ( unlikely(page == NULL) )
+ {
+ /* fail if it is not under translate mode */
+ if ( !paging_mode_translate(d) )
+ {
+ gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
+ "id=%d memflags=%x (%ld of %d)\n",
+ a->extent_order, d->domain_id, a->memflags,
+ i, a->nr_extents);
+ goto out;
+ }
+
+ /* try to allocate using 4KB page instead */
for ( j = 0; j < (1 << a->extent_order); j++ )
- if ( guest_physmap_add_page(d, gpfn + j, mfn + j) )
+ {
+ page = alloc_domheap_pages(d, 0,
+ a->memflags | MEMF_node(node));
+ if ( page == NULL )
+ {
+ gdprintk(XENLOG_INFO, "Could not allocate order=%d extent:"
+ "id=%d memflags=%x (%ld of %d)\n",
+ 0, d->domain_id, a->memflags, i, a->nr_extents);
goto out;
- }
- else
- {
- for ( j = 0; j < (1 << a->extent_order); j++ )
- set_gpfn_from_mfn(mfn + j, gpfn + j);
-
- /* Inform the domain of the new page's machine address. */
- if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
- goto out;
- }
- }
-
+ }
+
+ mfn = page_to_mfn(page);
+
+ if ( guest_physmap_add_page(d, gpfn+j, mfn,
+ NORMAL_PAGE_ORDER) )
+ goto out;
+ }
+ }
+ else /* successful in allocating page of extent_order */
+ {
+ mfn = page_to_mfn(page);
+
+ if ( unlikely(paging_mode_translate(d)) )
+ {
+ if ( guest_physmap_add_page(d, gpfn, mfn, a->extent_order) )
+ goto out;
+ }
+ else
+ {
+ for ( j = 0; j < (1 << a->extent_order); j++ )
+ set_gpfn_from_mfn(mfn + j, gpfn + j);
+
+ /* Inform the domain of the new page's machine address. */
+ if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn,
+ 1)) )
+ goto out;
+ }
+ }
+ }
out:
a->nr_done = i;
}
@@ -172,7 +198,7 @@ int guest_remove_page(struct domain *d,
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
- guest_physmap_remove_page(d, gmfn, mfn);
+ guest_physmap_remove_page(d, gmfn, mfn, NORMAL_PAGE_ORDER);
put_page(page);
@@ -419,7 +445,8 @@ static long memory_exchange(XEN_GUEST_HA
if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
BUG();
mfn = page_to_mfn(page);
- guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
+ guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn,
+ NORMAL_PAGE_ORDER);
put_page(page);
}
@@ -441,8 +468,8 @@ static long memory_exchange(XEN_GUEST_HA
if ( unlikely(paging_mode_translate(d)) )
{
/* Ignore failure here. There's nothing we can do. */
- for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
- (void)guest_physmap_add_page(d, gpfn + k, mfn + k);
+ (void)guest_physmap_add_page(d, gpfn, mfn,
+ exch.out.extent_order);
}
else
{
diff -r 810d8c3ac992 xen/include/asm-ia64/shadow.h
--- a/xen/include/asm-ia64/shadow.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/asm-ia64/shadow.h Sun May 11 17:21:52 2008 -0500
@@ -40,8 +40,10 @@
* Utilities to change relationship of gpfn->mfn for designated domain,
* which is required by gnttab transfer, balloon, device model and etc.
*/
-int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
-void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
+int guest_physmap_add_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn, unsigned int page_order);
+void guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn, unsigned int page_order);
static inline int
shadow_mode_enabled(struct domain *d)
diff -r 810d8c3ac992 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/asm-x86/mm.h Sun May 11 17:21:52 2008 -0500
@@ -124,6 +124,14 @@ static inline u32 pickle_domptr(struct d
/* The order of the largest allocation unit we use for shadow pages */
#define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
+/* The order of continuously allocated super page frames */
+#define NORMAL_PAGE_ORDER 0 /* 4KB page */
+#if CONFIG_PAGING_LEVELS == 2
+#define SUPER_PAGE_ORDER 10 /* 4MB page */
+#else
+#define SUPER_PAGE_ORDER 9 /* 2MB page */
+#endif
+
#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
diff -r 810d8c3ac992 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/asm-x86/p2m.h Sun May 11 17:21:52 2008 -0500
@@ -102,7 +102,8 @@ struct p2m_domain {
void (*free_page )(struct domain *d,
struct page_info *pg);
int (*set_entry )(struct domain *d, unsigned long gfn,
- mfn_t mfn, p2m_type_t p2mt);
+ mfn_t mfn, unsigned int page_order,
+ p2m_type_t p2mt);
mfn_t (*get_entry )(struct domain *d, unsigned long gfn,
p2m_type_t *p2mt);
mfn_t (*get_entry_current)(unsigned long gfn,
@@ -203,21 +204,23 @@ void p2m_final_teardown(struct domain *d
/* Add a page to a domain's p2m table */
int guest_physmap_add_entry(struct domain *d, unsigned long gfn,
- unsigned long mfn, p2m_type_t t);
+ unsigned long mfn, unsigned int page_order,
+ p2m_type_t t);
/* Untyped version for RAM only, for compatibility
*
* Return 0 for success
*/
static inline int guest_physmap_add_page(struct domain *d, unsigned long gfn,
- unsigned long mfn)
-{
- return guest_physmap_add_entry(d, gfn, mfn, p2m_ram_rw);
+ unsigned long mfn,
+ unsigned int page_order)
+{
+ return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw);
}
/* Remove a page from a domain's p2m table */
void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn);
+ unsigned long mfn, unsigned int page_order);
/* Change types across all p2m entries in a domain */
void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
diff -r 810d8c3ac992 xen/include/xen/paging.h
--- a/xen/include/xen/paging.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/xen/paging.h Sun May 11 17:21:52 2008 -0500
@@ -18,8 +18,8 @@
#else
#define paging_mode_translate(d) (0)
-#define guest_physmap_add_page(d, p, m) (0)
-#define guest_physmap_remove_page(d, p, m) ((void)0)
+#define guest_physmap_add_page(d, p, m, o) (0)
+#define guest_physmap_remove_page(d, p, m, o) ((void)0)
#endif
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH] patch to support super page (2M) with EPT
2008-05-12 4:36 ` Huang2, Wei
@ 2008-05-12 5:04 ` Xin, Xiaohui
2008-05-12 7:03 ` Keir Fraser
0 siblings, 1 reply; 14+ messages in thread
From: Xin, Xiaohui @ 2008-05-12 5:04 UTC (permalink / raw)
To: Huang2, Wei, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 3075 bytes --]
Some comments here:
1) Basically 4M pages allocations is not hardware naturally for EPT, we
only use 2M super pages now.
I remembered that Keir said that 2M pages allocation is sufficient, and
he removed all the pure 32bit support already.
2) If we don't allocate the last 2M area with 4kb pages, the EPT will
meet some problem. Xen will set one of the 4k page
there to be invalid, logically that means we should invalid the all
the 2M page if we allocate it with 2M, and then the
special pages Xen used in the high end of the guest memory can not
be used then. May we know how you cope with that?
Thanks
Xiaohui
________________________________
From: xen-devel-bounces@lists.xensource.com
[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Monday, May 12, 2008 12:36 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with
EPT
This is the latest one I created. Please review it and I will re-submit.
1. It includes the patch for p2m-ept.c, directly from your
previous patch.
2. Xc_hvm_create.c is based on my original approach. It includes
support for both 2MB and 4MB pages. Also it considers the case of odd
page size (such as 255MB). But I did not allocate the last 2MB area
using 4KB pages. Let me know if it is a big issue.
3. The rest are pretty similar.
Thanks,
-Wei
From: xen-devel-bounces@lists.xensource.com
[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Sunday, May 11, 2008 3:34 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with
EPT
Could we work together for a common solution? As far as I can see, it
largely overlaps with my super page patch. The major difference is
between p2m.c and p2m-ept.c.
-Wei
From: xen-devel-bounces@lists.xensource.com
[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Xin, Xiaohui
Sent: Friday, May 09, 2008 4:11 AM
To: xen-devel@lists.xensource.com
Subject: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Attached are the patches to support super page with EPT. We only support
2M size. And shadow may still work fine with 4K pages.
The patches can be split into 3 parts. Apply order is as attached.
tool.diff
To allocate 2M physical contiguous memory in guest except the first 2M
and the last 2M.
The first 2M covers special memory, and Xen use the last few pages in
guest memory to do special things.
We let them to be 4K pages as normal.
super_page_common.patch
To modify the p2m interfaces by adding an order parameter, such as
guest_physmap_add_page(), p2m_set_entry(), etc.
p2m-ept-file.patch
To handle the EPT tables to support super page.
Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
Signed-off-by: Li Xin, B <xin.b.li@intel.com>
[-- Attachment #1.2: Type: text/html, Size: 16792 bytes --]
[-- Attachment #2: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] patch to support super page (2M) with EPT
2008-05-12 5:04 ` Xin, Xiaohui
@ 2008-05-12 7:03 ` Keir Fraser
2008-05-12 17:28 ` Huang2, Wei
0 siblings, 1 reply; 14+ messages in thread
From: Keir Fraser @ 2008-05-12 7:03 UTC (permalink / raw)
To: Xin, Xiaohui, Huang2, Wei, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 3450 bytes --]
Yes, absolutely no need for 4M page support. We do not support the 32-bit
non-PAE build target any more.
-- Keir
On 12/5/08 06:04, "Xin, Xiaohui" <xiaohui.xin@intel.com> wrote:
> Some comments here:
> 1) Basically 4M pages allocations is not hardware naturally for EPT, we only
> use 2M super pages now.
> I remembered that Keir said that 2M pages allocation is sufficient, and he
> removed all the pure 32bit support already.
> 2) If we don¹t allocate the last 2M area with 4kb pages, the EPT will meet
> some problem. Xen will set one of the 4k page
> there to be invalid, logically that means we should invalid the all the
> 2M page if we allocate it with 2M, and then the
> special pages Xen used in the high end of the guest memory can not be
> used then. May we know how you cope with that?
>
> Thanks
> Xiaohui
>
>
>
> From: xen-devel-bounces@lists.xensource.com
> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
> Sent: Monday, May 12, 2008 12:36 PM
> To: Xin, Xiaohui; xen-devel@lists.xensource.com
> Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>
> This is the latest one I created. Please review it and I will re-submit.
>
> 1. It includes the patch for p2m-ept.c, directly from your previous
> patch.
>
> 2. Xc_hvm_create.c is based on my original approach. It includes support
> for both 2MB and 4MB pages. Also it considers the case of odd page size (such
> as 255MB). But I did not allocate the last 2MB area using 4KB pages. Let me
> know if it is a big issue.
>
> 3. The rest are pretty similar.
>
>
> Thanks,
>
> -Wei
>
>
> From: xen-devel-bounces@lists.xensource.com
> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
> Sent: Sunday, May 11, 2008 3:34 PM
> To: Xin, Xiaohui; xen-devel@lists.xensource.com
> Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>
> Could we work together for a common solution? As far as I can see, it largely
> overlaps with my super page patch. The major difference is between p2m.c and
> p2m-ept.c.
>
> -Wei
>
>
> From: xen-devel-bounces@lists.xensource.com
> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Xin, Xiaohui
> Sent: Friday, May 09, 2008 4:11 AM
> To: xen-devel@lists.xensource.com
> Subject: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>
> Attached are the patches to support super page with EPT. We only support 2M
> size. And shadow may still work fine with 4K pages.
> The patches can be split into 3 parts. Apply order is as attached.
>
> tool.diff
> To allocate 2M physical contiguous memory in guest except the first 2M and the
> last 2M.
> The first 2M covers special memory, and Xen use the last few pages in guest
> memory to do special things.
> We let them to be 4K pages as normal.
> super_page_common.patch
> To modify the p2m interfaces by adding an order parameter, such as
> guest_physmap_add_page(), p2m_set_entry(), etc.
> p2m-ept-file.patch
> To handle the EPT tables to support super page.
>
>
> Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
> Signed-off-by: Li Xin, B <xin.b.li@intel.com>
>
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 7488 bytes --]
[-- Attachment #2: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH] patch to support super page (2M) with EPT
@ 2008-05-12 12:14 Huang2, Wei
0 siblings, 0 replies; 14+ messages in thread
From: Huang2, Wei @ 2008-05-12 12:14 UTC (permalink / raw)
To: Keir Fraser, Xin, Xiaohui, xen-devel
I will re-submit another one today.
-----Original Message-----
From: Keir Fraser <keir.fraser@eu.citrix.com>
Sent: Monday, May 12, 2008 2:03 AM
To: Xin, Xiaohui <xiaohui.xin@intel.com>; Huang2, Wei <Wei.Huang2@amd.com>; xen-devel@lists.xensource.com <xen-devel@lists.xensource.com>
Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Yes, absolutely no need for 4M page support. We do not support the 32-bit non-PAE build target any more.
-- Keir
On 12/5/08 06:04, "Xin, Xiaohui" <xiaohui.xin@intel.com> wrote:
Some comments here:
1) Basically 4M pages allocations is not hardware naturally for EPT, we only use 2M super pages now.
I remembered that Keir said that 2M pages allocation is sufficient, and he removed all the pure 32bit support already.
2) If we dont allocate the last 2M area with 4kb pages, the EPT will meet some problem. Xen will set one of the 4k page
there to be invalid, logically that means we should invalid the all the 2M page if we allocate it with 2M, and then the
special pages Xen used in the high end of the guest memory can not be used then. May we know how you cope with that?
Thanks
Xiaohui
________________________________
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Monday, May 12, 2008 12:36 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
This is the latest one I created. Please review it and I will re-submit.
1. It includes the patch for p2m-ept.c, directly from your previous patch.
2. Xc_hvm_create.c is based on my original approach. It includes support for both 2MB and 4MB pages. Also it considers the case of odd page size (such as 255MB). But I did not allocate the last 2MB area using 4KB pages. Let me know if it is a big issue.
3. The rest are pretty similar.
Thanks,
-Wei
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Sunday, May 11, 2008 3:34 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Could we work together for a common solution? As far as I can see, it largely overlaps with my super page patch. The major difference is between p2m.c and p2m-ept.c.
-Wei
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Xin, Xiaohui
Sent: Friday, May 09, 2008 4:11 AM
To: xen-devel@lists.xensource.com
Subject: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Attached are the patches to support super page with EPT. We only support 2M size. And shadow may still work fine with 4K pages.
The patches can be split into 3 parts. Apply order is as attached.
tool.diff
To allocate 2M physical contiguous memory in guest except the first 2M and the last 2M.
The first 2M covers special memory, and Xen use the last few pages in guest memory to do special things.
We let them to be 4K pages as normal.
super_page_common.patch
To modify the p2m interfaces by adding an order parameter, such as guest_physmap_add_page(), p2m_set_entry(), etc.
p2m-ept-file.patch
To handle the EPT tables to support super page.
Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
Signed-off-by: Li Xin, B <xin.b.li@intel.com>
________________________________
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH] patch to support super page (2M) with EPT
2008-05-12 7:03 ` Keir Fraser
@ 2008-05-12 17:28 ` Huang2, Wei
2008-05-13 8:46 ` Keir Fraser
0 siblings, 1 reply; 14+ messages in thread
From: Huang2, Wei @ 2008-05-12 17:28 UTC (permalink / raw)
To: Keir Fraser, Xin, Xiaohui, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 3984 bytes --]
Here is a revised version. I get rid of 4MB support, as suggested. I did
not see the issue mentioned by Xiaohui related to splitting last 2M into
4KB pages. But anyway, I attached two versions for your reference. Keir,
please let me know if you have comments.
-Wei
________________________________
From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
Sent: Monday, May 12, 2008 2:03 AM
To: Xin, Xiaohui; Huang2, Wei; xen-devel@lists.xensource.com
Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with
EPT
Yes, absolutely no need for 4M page support. We do not support the
32-bit non-PAE build target any more.
-- Keir
On 12/5/08 06:04, "Xin, Xiaohui" <xiaohui.xin@intel.com> wrote:
Some comments here:
1) Basically 4M pages allocations is not hardware naturally for
EPT, we only use 2M super pages now.
I remembered that Keir said that 2M pages allocation is
sufficient, and he removed all the pure 32bit support already.
2) If we don't allocate the last 2M area with 4kb pages, the
EPT will meet some problem. Xen will set one of the 4k page
there to be invalid, logically that means we should invalid
the all the 2M page if we allocate it with 2M, and then the
special pages Xen used in the high end of the guest memory
can not be used then. May we know how you cope with that?
Thanks
Xiaohui
________________________________
From: xen-devel-bounces@lists.xensource.com [
mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Monday, May 12, 2008 12:36 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M)
with EPT
This is the latest one I created. Please review it and I will
re-submit.
1. It includes the patch for p2m-ept.c, directly from your
previous patch.
2. Xc_hvm_create.c is based on my original approach. It
includes support for both 2MB and 4MB pages. Also it considers the case
of odd page size (such as 255MB). But I did not allocate the last 2MB
area using 4KB pages. Let me know if it is a big issue.
3. The rest are pretty similar.
Thanks,
-Wei
From: xen-devel-bounces@lists.xensource.com [
mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Sunday, May 11, 2008 3:34 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M)
with EPT
Could we work together for a common solution? As far as I can
see, it largely overlaps with my super page patch. The major difference
is between p2m.c and p2m-ept.c.
-Wei
From: xen-devel-bounces@lists.xensource.com [
mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Xin, Xiaohui
Sent: Friday, May 09, 2008 4:11 AM
To: xen-devel@lists.xensource.com
Subject: [Xen-devel][PATCH] patch to support super page (2M)
with EPT
Attached are the patches to support super page with EPT. We only
support 2M size. And shadow may still work fine with 4K pages.
The patches can be split into 3 parts. Apply order is as
attached.
tool.diff
To allocate 2M physical contiguous memory in guest except the
first 2M and the last 2M.
The first 2M covers special memory, and Xen use the last few
pages in guest memory to do special things.
We let them to be 4K pages as normal.
super_page_common.patch
To modify the p2m interfaces by adding an order parameter, such
as guest_physmap_add_page(), p2m_set_entry(), etc.
p2m-ept-file.patch
To handle the EPT tables to support super page.
Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
Signed-off-by: Li Xin, B <xin.b.li@intel.com>
________________________________
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 8934 bytes --]
[-- Attachment #2: super_page_patch_fix_last_2MB.txt --]
[-- Type: text/plain, Size: 44600 bytes --]
diff -r 810d8c3ac992 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c Thu May 08 16:58:33 2008 +0100
+++ b/tools/libxc/xc_hvm_build.c Mon May 12 06:05:39 2008 -0500
@@ -157,8 +157,10 @@ static int setup_guest(int xc_handle,
char *image, unsigned long image_size)
{
xen_pfn_t *page_array = NULL;
+ xen_pfn_t *super_page_array = NULL;
unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
- unsigned long special_page_nr, entry_eip, cur_pages;
+ unsigned long nr_super_pages;
+ unsigned long special_page_nr, entry_eip, cur_pages, limit;
struct xen_add_to_physmap xatp;
struct shared_info *shared_info;
void *e820_page;
@@ -167,6 +169,7 @@ static int setup_guest(int xc_handle,
uint64_t v_start, v_end;
int rc;
xen_capabilities_info_t caps;
+ int super_page_order;
/* An HVM guest must be initialised with at least 2MB memory. */
if ( memsize < 2 )
@@ -189,6 +192,17 @@ static int setup_guest(int xc_handle,
PERROR("Guest OS must load to a page boundary.\n");
goto error_out;
}
+
+ /* We only support 2MB super pages since 32bit non-PAE is not officially
+ * supported by Xen any more. Plus, we try to allocate the last memory
+ * area using 4KB pages. For this reason, if memsize is an even number,
+ * we have to decrese nr_super_pages by 1. As for an odd memsize, this is
+ * enforced automatically (see below).
+ */
+ nr_super_pages = (unsigned long)memsize >> 1;
+ if ( (memsize % 2) == 0 )
+ nr_super_pages -= 1;
+ super_page_order = 9;
IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
" Loader: %016"PRIx64"->%016"PRIx64"\n"
@@ -198,7 +212,9 @@ static int setup_guest(int xc_handle,
v_start, v_end,
elf_uval(&elf, elf.ehdr, e_entry));
- if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
+ if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ||
+ (super_page_array =
+ malloc(nr_super_pages * sizeof(xen_pfn_t))) == NULL )
{
PERROR("Could not allocate memory.\n");
goto error_out;
@@ -206,26 +222,44 @@ static int setup_guest(int xc_handle,
for ( i = 0; i < nr_pages; i++ )
page_array[i] = i;
+ for ( i = 0; i < nr_super_pages; i++ )
+ super_page_array[i] = i << super_page_order;
for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
-
- /*
- * Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
- * We allocate pages in batches of no more than 2048 to ensure that
- * we can be preempted and hence dom0 remains responsive.
- */
+ for ( i = HVM_BELOW_4G_RAM_END >> (PAGE_SHIFT + super_page_order);
+ i < nr_super_pages; i++ )
+ super_page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
+
+
+ /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
rc = xc_domain_memory_populate_physmap(
xc_handle, dom, 0xa0, 0, 0, &page_array[0x00]);
- cur_pages = 0xc0;
- while ( (rc == 0) && (nr_pages > cur_pages) )
- {
- unsigned long count = nr_pages - cur_pages;
- if ( count > 2048 )
- count = 2048;
+ if ( rc == 0 )
rc = xc_domain_memory_populate_physmap(
- xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
+ xc_handle, dom, 0x200-0xc0, 0, 0, &page_array[0xc0]);
+
+ /* We allocate pages in batches of no more than 8MB to ensure that
+ * we can be preempted and hence dom0 remains responsive.
+ */
+ limit = 4;
+ cur_pages = 1;
+ while ( (rc == 0) && (nr_super_pages > cur_pages) )
+ {
+ unsigned long count = nr_super_pages - cur_pages;
+ if ( count > limit )
+ count = limit;
+ rc = xc_domain_memory_populate_physmap(xc_handle, dom, count,
+ super_page_order, 0,
+ &super_page_array[cur_pages]);
cur_pages += count;
}
+
+ /* handle the case of odd number physical memory size, such as 255MB */
+ if ( rc == 0 )
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, nr_pages - (nr_super_pages << super_page_order),
+ 0, 0, &page_array[nr_super_pages << super_page_order]);
+
if ( rc != 0 )
{
PERROR("Could not allocate memory for HVM guest.\n");
@@ -314,10 +348,12 @@ static int setup_guest(int xc_handle,
}
free(page_array);
+ free(super_page_array);
return 0;
error_out:
free(page_array);
+ free(super_page_array);
return -1;
}
diff -r 810d8c3ac992 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/ia64/xen/mm.c Sun May 11 16:58:19 2008 -0500
@@ -2415,7 +2415,7 @@ steal_page(struct domain *d, struct page
int
guest_physmap_add_page(struct domain *d, unsigned long gpfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
BUG_ON(!mfn_valid(mfn));
BUG_ON(mfn_to_page(mfn)->count_info != (PGC_allocated | 1));
@@ -2432,7 +2432,7 @@ guest_physmap_add_page(struct domain *d,
void
guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
BUG_ON(mfn == 0);//XXX
zap_domain_page_one(d, gpfn << PAGE_SHIFT, 0, mfn);
@@ -2838,7 +2838,7 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
if (prev_mfn && mfn_valid(prev_mfn)) {
if (is_xen_heap_mfn(prev_mfn))
/* Xen heap frames are simply unhooked from this phys slot. */
- guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
+ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0);
else
/* Normal domain memory is freed, to avoid leaking memory. */
guest_remove_page(d, xatp.gpfn);
@@ -2847,10 +2847,10 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
/* Unmap from old location, if any. */
gpfn = get_gpfn_from_mfn(mfn);
if (gpfn != INVALID_M2P_ENTRY)
- guest_physmap_remove_page(d, gpfn, mfn);
+ guest_physmap_remove_page(d, gpfn, mfn, 0);
/* Map at new location. */
- guest_physmap_add_page(d, xatp.gpfn, mfn);
+ guest_physmap_add_page(d, xatp.gpfn, mfn, 0);
out:
domain_unlock(d);
diff -r 810d8c3ac992 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/x86/mm.c Sun May 11 10:53:29 2008 -0500
@@ -3287,7 +3287,8 @@ long arch_memory_op(int op, XEN_GUEST_HA
{
if ( is_xen_heap_mfn(prev_mfn) )
/* Xen heap frames are simply unhooked from this phys slot. */
- guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
+ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn,
+ NORMAL_PAGE_ORDER);
else
/* Normal domain memory is freed, to avoid leaking memory. */
guest_remove_page(d, xatp.gpfn);
@@ -3296,10 +3297,10 @@ long arch_memory_op(int op, XEN_GUEST_HA
/* Unmap from old location, if any. */
gpfn = get_gpfn_from_mfn(mfn);
if ( gpfn != INVALID_M2P_ENTRY )
- guest_physmap_remove_page(d, gpfn, mfn);
+ guest_physmap_remove_page(d, gpfn, mfn, NORMAL_PAGE_ORDER);
/* Map at new location. */
- guest_physmap_add_page(d, xatp.gpfn, mfn);
+ guest_physmap_add_page(d, xatp.gpfn, mfn, NORMAL_PAGE_ORDER);
domain_unlock(d);
diff -r 810d8c3ac992 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/x86/mm/hap/p2m-ept.c Sun May 11 16:45:36 2008 -0500
@@ -20,6 +20,7 @@
#include <xen/domain_page.h>
#include <xen/sched.h>
#include <asm/current.h>
+#include <asm/paging.h>
#include <asm/types.h>
#include <asm/domain.h>
#include <asm/p2m.h>
@@ -46,6 +47,9 @@ static void ept_p2m_type_to_flags(ept_en
}
}
+#define GUEST_TABLE_NORMAL_PAGE 1
+#define GUEST_TABLE_SUPER_PAGE 2
+
static int ept_next_level(struct domain *d, bool_t read_only,
ept_entry_t **table, unsigned long *gfn_remainder,
u32 shift)
@@ -54,7 +58,6 @@ static int ept_next_level(struct domain
u32 index;
index = *gfn_remainder >> shift;
- *gfn_remainder &= (1UL << shift) - 1;
ept_entry = (*table) + index;
@@ -83,31 +86,53 @@ static int ept_next_level(struct domain
ept_entry->r = ept_entry->w = ept_entry->x = 1;
}
- next = map_domain_page(ept_entry->mfn);
- unmap_domain_page(*table);
- *table = next;
-
- return 1;
+ if ( !ept_entry->sp_avail )
+ {
+ *gfn_remainder &= (1UL << shift) - 1;
+ next = map_domain_page(ept_entry->mfn);
+ unmap_domain_page(*table);
+ *table = next;
+ return GUEST_TABLE_NORMAL_PAGE;
+ }
+ else
+ return GUEST_TABLE_SUPER_PAGE;
}
static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
-{
- ept_entry_t *table =
- map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
- unsigned long gfn_remainder = gfn;
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int order, p2m_type_t p2mt)
+{
+ ept_entry_t *table = NULL;
+ unsigned long gfn_remainder = gfn, offset = 0;
ept_entry_t *ept_entry = NULL;
u32 index;
- int i, rv = 0;
+ int i, rv = 0, ret = 0;
+ int walk_level = order / EPT_TABLE_ORDER;
/* Should check if gfn obeys GAW here */
- for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
- if ( !ept_next_level(d, 0, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER) )
+ if ( order != 0 )
+ if ( (gfn & ((1UL << order) - 1)) )
+ return 1;
+
+ table = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+
+ ASSERT(table != NULL);
+
+ for ( i = EPT_DEFAULT_GAW; i > walk_level; i-- )
+ {
+ ret = ept_next_level(d, 0, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER);
+ if ( !ret )
goto out;
-
- index = gfn_remainder;
+ else if ( ret == GUEST_TABLE_SUPER_PAGE )
+ break;
+ }
+
+ index = gfn_remainder >> ( i ? (i * EPT_TABLE_ORDER): order);
+ walk_level = ( i ? ( i * EPT_TABLE_ORDER) : order) / EPT_TABLE_ORDER;
+ offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1)));
+
ept_entry = table + index;
if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
@@ -117,9 +142,20 @@ ept_set_entry(struct domain *d, unsigned
d->arch.p2m->max_mapped_pfn = gfn;
ept_entry->emt = EPT_DEFAULT_MT;
- ept_entry->sp_avail = 0;
+ ept_entry->sp_avail = walk_level ? 1 : 0;
+
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ ept_entry->mfn = mfn_x(mfn) - offset;
+ if ( ept_entry->avail1 == p2m_ram_logdirty &&
+ p2mt == p2m_ram_rw )
+ for ( i = 0; i < 512; i++ )
+ paging_mark_dirty(d, mfn_x(mfn)-offset+i);
+ }
+ else
+ ept_entry->mfn = mfn_x(mfn);
+
ept_entry->avail1 = p2mt;
- ept_entry->mfn = mfn_x(mfn);
ept_entry->rsvd = 0;
ept_entry->avail2 = 0;
/* last step */
@@ -132,14 +168,42 @@ ept_set_entry(struct domain *d, unsigned
/* Success */
rv = 1;
- out:
+out:
unmap_domain_page(table);
ept_sync_domain(d);
+ /* Now the p2m table is not shared with vt-d page table */
+
+ if ( iommu_enabled && is_hvm_domain(d) )
+ {
+ if ( p2mt == p2m_ram_rw )
+ {
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ for ( i = 0; i < 512; i++ )
+ iommu_map_page(d, gfn-offset+i, mfn_x(mfn)-offset+i);
+ }
+ else if ( ret )
+ iommu_map_page(d, gfn, mfn_x(mfn));
+ }
+ else
+ {
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ for ( i = 0; i < 512; i++ )
+ iommu_unmap_page(d, gfn-offset+i);
+ }
+ else if ( ret )
+ iommu_unmap_page(d, gfn);
+ }
+ }
+
+#ifdef P2M_SHARE_WITH_VTD_PAGE_TABLE
/* If p2m table is shared with vtd page-table. */
if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) )
iommu_flush(d, gfn, (u64*)ept_entry);
+#endif
return rv;
}
@@ -152,7 +216,7 @@ static mfn_t ept_get_entry(struct domain
unsigned long gfn_remainder = gfn;
ept_entry_t *ept_entry;
u32 index;
- int i;
+ int i, ret=0;
mfn_t mfn = _mfn(INVALID_MFN);
*t = p2m_mmio_dm;
@@ -164,17 +228,31 @@ static mfn_t ept_get_entry(struct domain
/* Should check if gfn obeys GAW here. */
for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
- if ( !ept_next_level(d, 1, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER) )
+ {
+ ret = ept_next_level(d, 1, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER);
+ if ( !ret )
goto out;
-
- index = gfn_remainder;
+ else if ( ret == GUEST_TABLE_SUPER_PAGE )
+ break;
+ }
+
+ index = gfn_remainder >> ( i * EPT_TABLE_ORDER);
ept_entry = table + index;
if ( ept_entry->avail1 != p2m_invalid )
{
*t = ept_entry->avail1;
mfn = _mfn(ept_entry->mfn);
+ if ( i )
+ {
+ /* we may meet super pages, and to split into 4k pages
+ * to emulate p2m table
+ */
+ unsigned long split_mfn =
+ mfn_x(mfn) + (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1 )));
+ mfn = _mfn(split_mfn);
+ }
}
out:
@@ -205,33 +283,63 @@ static void ept_change_entry_type_global
l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
for (i4 = 0; i4 < EPT_PAGETABLE_ENTRIES; i4++ )
{
- if ( !l4e[i4].epte || l4e[i4].sp_avail )
+ if ( !l4e[i4].epte )
continue;
- l3e = map_domain_page(l4e[i4].mfn);
- for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
- {
- if ( !l3e[i3].epte || l3e[i3].sp_avail )
+ if ( !l4e[i4].sp_avail )
+ {
+ l3e = map_domain_page(l4e[i4].mfn);
+ for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
+ {
+ if ( !l3e[i3].epte )
+ continue;
+ if ( !l3e[i3].sp_avail )
+ {
+ l2e = map_domain_page(l3e[i3].mfn);
+ for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
+ {
+ if ( !l2e[i2].epte )
+ continue;
+ if ( !l2e[i2].sp_avail )
+ {
+ l1e = map_domain_page(l2e[i2].mfn);
+ for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
+ {
+ if ( !l1e[i1].epte )
+ continue;
+ if ( l1e[i1].avail1 != ot )
+ continue;
+ l1e[i1].avail1 = nt;
+ ept_p2m_type_to_flags(l1e+i1, nt);
+ }
+ unmap_domain_page(l1e);
+ }
+ else
+ {
+ if ( l2e[i2].avail1 != ot )
+ continue;
+ l2e[i2].avail1 = nt;
+ ept_p2m_type_to_flags(l2e+i2, nt);
+ }
+ }
+ unmap_domain_page(l2e);
+ }
+ else
+ {
+ if ( l3e[i3].avail1 != ot )
+ continue;
+ l3e[i3].avail1 = nt;
+ ept_p2m_type_to_flags(l3e+i3, nt);
+ }
+ }
+ unmap_domain_page(l3e);
+ }
+ else
+ {
+ if ( l4e[i4].avail1 != ot )
continue;
- l2e = map_domain_page(l3e[i3].mfn);
- for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
- {
- if ( !l2e[i2].epte || l2e[i2].sp_avail )
- continue;
- l1e = map_domain_page(l2e[i2].mfn);
- for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
- {
- if ( !l1e[i1].epte )
- continue;
- if ( l1e[i1].avail1 != ot )
- continue;
- l1e[i1].avail1 = nt;
- ept_p2m_type_to_flags(l1e+i1, nt);
- }
- unmap_domain_page(l1e);
- }
- unmap_domain_page(l2e);
- }
- unmap_domain_page(l3e);
+ l4e[i4].avail1 = nt;
+ ept_p2m_type_to_flags(l4e+i4, nt);
+ }
}
unmap_domain_page(l4e);
diff -r 810d8c3ac992 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/x86/mm/p2m.c Mon May 12 06:07:03 2008 -0500
@@ -151,9 +151,11 @@ p2m_next_level(struct domain *d, mfn_t *
unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
u32 max, unsigned long type)
{
+ l1_pgentry_t *l1_entry;
l1_pgentry_t *p2m_entry;
l1_pgentry_t new_entry;
void *next;
+ int i;
ASSERT(d->arch.p2m->alloc_page);
if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
@@ -194,6 +196,44 @@ p2m_next_level(struct domain *d, mfn_t *
break;
}
}
+
+ ASSERT(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT);
+
+ /* split single large page into 4KB page in P2M table */
+ if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+ {
+ unsigned long flags, pfn;
+ struct page_info *pg = d->arch.p2m->alloc_page(d);
+ if ( pg == NULL )
+ return 0;
+ list_add_tail(&pg->list, &d->arch.p2m->pages);
+ pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
+ pg->count_info = 1;
+
+ /* New splintered mappings inherit the flags of the old superpage,
+ * with a little reorganisation for the _PAGE_PSE_PAT bit. */
+ flags = l1e_get_flags(*p2m_entry);
+ pfn = l1e_get_pfn(*p2m_entry);
+ if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
+ pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
+ else
+ flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
+
+ l1_entry = map_domain_page(mfn_x(page_to_mfn(pg)));
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ {
+ new_entry = l1e_from_pfn(pfn + i, flags);
+ paging_write_p2m_entry(d, gfn,
+ l1_entry+i, *table_mfn, new_entry, 1);
+ }
+ unmap_domain_page(l1_entry);
+
+ new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
+ __PAGE_HYPERVISOR|_PAGE_USER);
+ paging_write_p2m_entry(d, gfn,
+ p2m_entry, *table_mfn, new_entry, 2);
+ }
+
*table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
next = map_domain_page(mfn_x(*table_mfn));
unmap_domain_page(*table);
@@ -204,7 +244,8 @@ p2m_next_level(struct domain *d, mfn_t *
// Returns 0 on error (out of memory)
static int
-p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
+p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int page_order, p2m_type_t p2mt)
{
// XXX -- this might be able to be faster iff current->domain == d
mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
@@ -212,6 +253,7 @@ p2m_set_entry(struct domain *d, unsigned
unsigned long gfn_remainder = gfn;
l1_pgentry_t *p2m_entry;
l1_pgentry_t entry_content;
+ l2_pgentry_t l2e_content;
int rv=0;
#if CONFIG_PAGING_LEVELS >= 4
@@ -235,26 +277,53 @@ p2m_set_entry(struct domain *d, unsigned
PGT_l2_page_table) )
goto out;
- if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
- L2_PAGETABLE_SHIFT - PAGE_SHIFT,
- L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
- goto out;
-
- p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
- 0, L1_PAGETABLE_ENTRIES);
- ASSERT(p2m_entry);
+ if ( page_order == NORMAL_PAGE_ORDER )
+ {
+ if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+ L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+ L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
+ goto out;
+
+ p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+ 0, L1_PAGETABLE_ENTRIES);
+ ASSERT(p2m_entry);
+
+ if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
+ entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
+ else
+ entry_content = l1e_empty();
+
+ /* level 1 entry */
+ paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
+ }
+ else
+ {
+ p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+ L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+ L2_PAGETABLE_ENTRIES);
+ ASSERT(p2m_entry);
+
+ if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
+ !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+ {
+ P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
+ domain_crash(d);
+ goto out;
+ }
+
+ if ( mfn_valid(mfn) )
+ l2e_content = l2e_from_pfn(mfn_x(mfn),
+ p2m_type_to_flags(p2mt) | _PAGE_PSE);
+ else
+ l2e_content = l2e_empty();
+
+ entry_content.l1 = l2e_content.l2;
+ paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2);
+ }
/* Track the highest gfn for which we have ever had a valid mapping */
if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
d->arch.p2m->max_mapped_pfn = gfn;
-
- if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
- entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
- else
- entry_content = l1e_empty();
-
- /* level 1 entry */
- paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
if ( iommu_enabled && is_hvm_domain(d) )
{
@@ -335,6 +404,16 @@ p2m_gfn_to_mfn(struct domain *d, unsigne
unmap_domain_page(l2e);
return _mfn(INVALID_MFN);
}
+ else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
+ {
+ mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
+ *t = p2m_flags_to_type(l2e_get_flags(*l2e));
+ unmap_domain_page(l2e);
+
+ ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+ return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
+ }
+
mfn = _mfn(l2e_get_pfn(*l2e));
unmap_domain_page(l2e);
@@ -358,6 +437,7 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
{
mfn_t mfn = _mfn(INVALID_MFN);
p2m_type_t p2mt = p2m_mmio_dm;
+ paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
/* XXX This is for compatibility with the old model, where anything not
* XXX marked as RAM was considered to be emulated MMIO space.
* XXX Once we start explicitly registering MMIO regions in the p2m
@@ -366,25 +446,44 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
{
l1_pgentry_t l1e = l1e_empty();
+ l2_pgentry_t l2e = l2e_empty();
int ret;
ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
/ sizeof(l1_pgentry_t));
- /* Need to __copy_from_user because the p2m is sparse and this
- * part might not exist */
- ret = __copy_from_user(&l1e,
- &phys_to_machine_mapping[gfn],
- sizeof(l1e));
-
- if ( ret == 0 ) {
- p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
- ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+ ret = __copy_from_user(&l2e,
+ &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) + l2_linear_offset(addr)],
+ sizeof(l2e));
+
+ if ( (ret == 0) && (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
+ (l2e_get_flags(l2e) & _PAGE_PSE) )
+ {
+ p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
+ ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
if ( p2m_is_valid(p2mt) )
- mfn = _mfn(l1e_get_pfn(l1e));
- else
- /* XXX see above */
+ mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
+ else
p2mt = p2m_mmio_dm;
+ }
+ else
+ {
+
+ /* Need to __copy_from_user because the p2m is sparse and this
+ * part might not exist */
+ ret = __copy_from_user(&l1e,
+ &phys_to_machine_mapping[gfn],
+ sizeof(l1e));
+
+ if ( ret == 0 ) {
+ p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+ ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+ if ( p2m_is_valid(p2mt) )
+ mfn = _mfn(l1e_get_pfn(l1e));
+ else
+ /* XXX see above */
+ p2mt = p2m_mmio_dm;
+ }
}
}
@@ -430,9 +529,10 @@ void p2m_change_entry_type_global(struct
}
static inline
-int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
-{
- return d->arch.p2m->set_entry(d, gfn, mfn, p2mt);
+int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int page_order, p2m_type_t p2mt)
+{
+ return d->arch.p2m->set_entry(d, gfn, mfn, page_order, p2mt);
}
// Allocate a new p2m table for a domain.
@@ -493,7 +593,8 @@ int p2m_alloc_table(struct domain *d,
P2M_PRINTK("populating p2m table\n");
/* Initialise physmap tables for slot zero. Other code assumes this. */
- if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), p2m_invalid) )
+ if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), NORMAL_PAGE_ORDER,
+ p2m_invalid) )
goto error;
/* Copy all existing mappings from the page list and m2p */
@@ -512,7 +613,7 @@ int p2m_alloc_table(struct domain *d,
(gfn != 0x55555555L)
#endif
&& gfn != INVALID_M2P_ENTRY
- && !set_p2m_entry(d, gfn, mfn, p2m_ram_rw) )
+ && !set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, p2m_ram_rw) )
goto error;
}
@@ -688,6 +789,28 @@ static void audit_p2m(struct domain *d)
gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
continue;
}
+
+ /* check for super page */
+ if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
+ {
+ mfn = l2e_get_pfn(l2e[i2]);
+ ASSERT(mfn_valid(_mfn(mfn)));
+ for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
+ {
+ m2pfn = get_gpfn_from_mfn(mfn+i1);
+ if ( m2pfn != (gfn + i) )
+ {
+ pmbad++;
+ P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
+ " -> gfn %#lx\n", gfn+i, mfn+i,
+ m2pfn);
+ BUG();
+ }
+ }
+ gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
+ continue;
+ }
+
l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
@@ -737,35 +860,40 @@ static void audit_p2m(struct domain *d)
static void
-p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn)
-{
+p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
+ unsigned int page_order)
+{
+ int i;
if ( !paging_mode_translate(d) )
return;
P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
- set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid);
- set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+ set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
}
void
guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
p2m_lock(d->arch.p2m);
audit_p2m(d);
- p2m_remove_page(d, gfn, mfn);
+ p2m_remove_page(d, gfn, mfn, page_order);
audit_p2m(d);
p2m_unlock(d->arch.p2m);
}
int
guest_physmap_add_entry(struct domain *d, unsigned long gfn,
- unsigned long mfn, p2m_type_t t)
+ unsigned long mfn, unsigned int page_order,
+ p2m_type_t t)
{
unsigned long ogfn;
p2m_type_t ot;
mfn_t omfn;
int rc = 0;
+ int i;
if ( !paging_mode_translate(d) )
return -EINVAL;
@@ -795,7 +923,8 @@ guest_physmap_add_entry(struct domain *d
if ( p2m_is_ram(ot) )
{
ASSERT(mfn_valid(omfn));
- set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn_x(omfn)+i, INVALID_M2P_ENTRY);
}
ogfn = mfn_to_gfn(d, _mfn(mfn));
@@ -818,21 +947,23 @@ guest_physmap_add_entry(struct domain *d
P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
ogfn , mfn_x(omfn));
if ( mfn_x(omfn) == mfn )
- p2m_remove_page(d, ogfn, mfn);
+ p2m_remove_page(d, ogfn, mfn, page_order);
}
}
if ( mfn_valid(_mfn(mfn)) )
{
- if ( !set_p2m_entry(d, gfn, _mfn(mfn), t) )
+ if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
rc = -EINVAL;
- set_gpfn_from_mfn(mfn, gfn);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn+i, gfn+i);
}
else
{
gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
gfn, mfn);
- if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid) )
+ if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order,
+ p2m_invalid) )
rc = -EINVAL;
}
@@ -851,7 +982,7 @@ void p2m_change_type_global(struct domai
l1_pgentry_t l1e_content;
l1_pgentry_t *l1e;
l2_pgentry_t *l2e;
- mfn_t l1mfn;
+ mfn_t l1mfn, l2mfn;
int i1, i2;
l3_pgentry_t *l3e;
int i3;
@@ -891,11 +1022,26 @@ void p2m_change_type_global(struct domai
{
continue;
}
+ l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
{
if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
{
+ continue;
+ }
+
+ if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) )
+ {
+ flags = l2e_get_flags(l2e[i2]);
+ if ( p2m_flags_to_type(flags) != ot )
+ continue;
+ mfn = l2e_get_pfn(l2e[i2]);
+ gfn = get_gpfn_from_mfn(mfn);
+ flags = p2m_flags_to_type(nt);
+ l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
+ paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2],
+ l2mfn, l1e_content, 2);
continue;
}
@@ -944,7 +1090,7 @@ p2m_type_t p2m_change_type(struct domain
mfn = gfn_to_mfn(d, gfn, &pt);
if ( pt == ot )
- set_p2m_entry(d, gfn, mfn, nt);
+ set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, nt);
p2m_unlock(d->arch.p2m);
@@ -968,7 +1114,7 @@ set_mmio_p2m_entry(struct domain *d, uns
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
}
- rc = set_p2m_entry(d, gfn, mfn, p2m_mmio_direct);
+ rc = set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, p2m_mmio_direct);
if ( 0 == rc )
gdprintk(XENLOG_ERR,
"set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
@@ -992,7 +1138,7 @@ clear_mmio_p2m_entry(struct domain *d, u
"clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
return 0;
}
- rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
+ rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), NORMAL_PAGE_ORDER, 0);
return rc;
}
diff -r 810d8c3ac992 xen/common/grant_table.c
--- a/xen/common/grant_table.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/common/grant_table.c Sun May 11 11:00:12 2008 -0500
@@ -1159,7 +1159,7 @@ gnttab_transfer(
spin_lock(&e->grant_table->lock);
sha = &shared_entry(e->grant_table, gop.ref);
- guest_physmap_add_page(e, sha->frame, mfn);
+ guest_physmap_add_page(e, sha->frame, mfn, NORMAL_PAGE_ORDER);
sha->frame = mfn;
wmb();
sha->flags |= GTF_transfer_completed;
diff -r 810d8c3ac992 xen/common/memory.c
--- a/xen/common/memory.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/common/memory.c Sun May 11 11:03:13 2008 -0500
@@ -114,34 +114,60 @@ static void populate_physmap(struct memo
page = alloc_domheap_pages(
d, a->extent_order, a->memflags | MEMF_node(node));
- if ( unlikely(page == NULL) )
- {
- gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
- "id=%d memflags=%x (%ld of %d)\n",
- a->extent_order, d->domain_id, a->memflags,
- i, a->nr_extents);
- goto out;
- }
-
- mfn = page_to_mfn(page);
-
- if ( unlikely(paging_mode_translate(d)) )
- {
+
+ if ( unlikely(page == NULL) )
+ {
+ /* fail if it is not under translate mode */
+ if ( !paging_mode_translate(d) )
+ {
+ gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
+ "id=%d memflags=%x (%ld of %d)\n",
+ a->extent_order, d->domain_id, a->memflags,
+ i, a->nr_extents);
+ goto out;
+ }
+
+ /* try to allocate using 4KB page instead */
for ( j = 0; j < (1 << a->extent_order); j++ )
- if ( guest_physmap_add_page(d, gpfn + j, mfn + j) )
+ {
+ page = alloc_domheap_pages(d, 0,
+ a->memflags | MEMF_node(node));
+ if ( page == NULL )
+ {
+ gdprintk(XENLOG_INFO, "Could not allocate order=%d extent:"
+ "id=%d memflags=%x (%ld of %d)\n",
+ 0, d->domain_id, a->memflags, i, a->nr_extents);
goto out;
- }
- else
- {
- for ( j = 0; j < (1 << a->extent_order); j++ )
- set_gpfn_from_mfn(mfn + j, gpfn + j);
-
- /* Inform the domain of the new page's machine address. */
- if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
- goto out;
- }
- }
-
+ }
+
+ mfn = page_to_mfn(page);
+
+ if ( guest_physmap_add_page(d, gpfn+j, mfn,
+ NORMAL_PAGE_ORDER) )
+ goto out;
+ }
+ }
+ else /* successful in allocating page of extent_order */
+ {
+ mfn = page_to_mfn(page);
+
+ if ( unlikely(paging_mode_translate(d)) )
+ {
+ if ( guest_physmap_add_page(d, gpfn, mfn, a->extent_order) )
+ goto out;
+ }
+ else
+ {
+ for ( j = 0; j < (1 << a->extent_order); j++ )
+ set_gpfn_from_mfn(mfn + j, gpfn + j);
+
+ /* Inform the domain of the new page's machine address. */
+ if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn,
+ 1)) )
+ goto out;
+ }
+ }
+ }
out:
a->nr_done = i;
}
@@ -172,7 +198,7 @@ int guest_remove_page(struct domain *d,
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
- guest_physmap_remove_page(d, gmfn, mfn);
+ guest_physmap_remove_page(d, gmfn, mfn, NORMAL_PAGE_ORDER);
put_page(page);
@@ -419,7 +445,8 @@ static long memory_exchange(XEN_GUEST_HA
if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
BUG();
mfn = page_to_mfn(page);
- guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
+ guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn,
+ NORMAL_PAGE_ORDER);
put_page(page);
}
@@ -441,8 +468,8 @@ static long memory_exchange(XEN_GUEST_HA
if ( unlikely(paging_mode_translate(d)) )
{
/* Ignore failure here. There's nothing we can do. */
- for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
- (void)guest_physmap_add_page(d, gpfn + k, mfn + k);
+ (void)guest_physmap_add_page(d, gpfn, mfn,
+ exch.out.extent_order);
}
else
{
diff -r 810d8c3ac992 xen/include/asm-ia64/shadow.h
--- a/xen/include/asm-ia64/shadow.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/asm-ia64/shadow.h Sun May 11 10:57:28 2008 -0500
@@ -40,8 +40,10 @@
* Utilities to change relationship of gpfn->mfn for designated domain,
* which is required by gnttab transfer, balloon, device model and etc.
*/
-int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
-void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
+int guest_physmap_add_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn, unsigned int page_order);
+void guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn, unsigned int page_order);
static inline int
shadow_mode_enabled(struct domain *d)
diff -r 810d8c3ac992 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/asm-x86/mm.h Sun May 11 10:43:50 2008 -0500
@@ -124,6 +124,14 @@ static inline u32 pickle_domptr(struct d
/* The order of the largest allocation unit we use for shadow pages */
#define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
+/* The order of continuously allocated super page frames */
+#define NORMAL_PAGE_ORDER 0 /* 4KB page */
+#if CONFIG_PAGING_LEVELS == 2
+#define SUPER_PAGE_ORDER 10 /* 4MB page */
+#else
+#define SUPER_PAGE_ORDER 9 /* 2MB page */
+#endif
+
#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
diff -r 810d8c3ac992 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/asm-x86/p2m.h Sun May 11 10:57:28 2008 -0500
@@ -102,7 +102,8 @@ struct p2m_domain {
void (*free_page )(struct domain *d,
struct page_info *pg);
int (*set_entry )(struct domain *d, unsigned long gfn,
- mfn_t mfn, p2m_type_t p2mt);
+ mfn_t mfn, unsigned int page_order,
+ p2m_type_t p2mt);
mfn_t (*get_entry )(struct domain *d, unsigned long gfn,
p2m_type_t *p2mt);
mfn_t (*get_entry_current)(unsigned long gfn,
@@ -203,21 +204,23 @@ void p2m_final_teardown(struct domain *d
/* Add a page to a domain's p2m table */
int guest_physmap_add_entry(struct domain *d, unsigned long gfn,
- unsigned long mfn, p2m_type_t t);
+ unsigned long mfn, unsigned int page_order,
+ p2m_type_t t);
/* Untyped version for RAM only, for compatibility
*
* Return 0 for success
*/
static inline int guest_physmap_add_page(struct domain *d, unsigned long gfn,
- unsigned long mfn)
-{
- return guest_physmap_add_entry(d, gfn, mfn, p2m_ram_rw);
+ unsigned long mfn,
+ unsigned int page_order)
+{
+ return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw);
}
/* Remove a page from a domain's p2m table */
void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn);
+ unsigned long mfn, unsigned int page_order);
/* Change types across all p2m entries in a domain */
void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
diff -r 810d8c3ac992 xen/include/xen/paging.h
--- a/xen/include/xen/paging.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/xen/paging.h Sun May 11 16:38:05 2008 -0500
@@ -18,8 +18,8 @@
#else
#define paging_mode_translate(d) (0)
-#define guest_physmap_add_page(d, p, m) (0)
-#define guest_physmap_remove_page(d, p, m) ((void)0)
+#define guest_physmap_add_page(d, p, m, o) (0)
+#define guest_physmap_remove_page(d, p, m, o) ((void)0)
#endif
[-- Attachment #3: super_page_patch.txt --]
[-- Type: text/plain, Size: 44468 bytes --]
diff -r 810d8c3ac992 -r 583dca746efb tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c Thu May 08 16:58:33 2008 +0100
+++ b/tools/libxc/xc_hvm_build.c Mon May 12 04:06:34 2008 -0500
@@ -157,8 +157,10 @@ static int setup_guest(int xc_handle,
char *image, unsigned long image_size)
{
xen_pfn_t *page_array = NULL;
+ xen_pfn_t *super_page_array = NULL;
unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
- unsigned long special_page_nr, entry_eip, cur_pages;
+ unsigned long nr_super_pages;
+ unsigned long special_page_nr, entry_eip, cur_pages, limit;
struct xen_add_to_physmap xatp;
struct shared_info *shared_info;
void *e820_page;
@@ -167,6 +169,7 @@ static int setup_guest(int xc_handle,
uint64_t v_start, v_end;
int rc;
xen_capabilities_info_t caps;
+ int super_page_order;
/* An HVM guest must be initialised with at least 2MB memory. */
if ( memsize < 2 )
@@ -189,6 +192,12 @@ static int setup_guest(int xc_handle,
PERROR("Guest OS must load to a page boundary.\n");
goto error_out;
}
+
+ /* We only support 2MB super pages since 32bit non-PAE is not officially
+ * supported by Xen any more.
+ */
+ nr_super_pages = (unsigned long)memsize >> 1;
+ super_page_order = 9;
IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
" Loader: %016"PRIx64"->%016"PRIx64"\n"
@@ -198,7 +207,9 @@ static int setup_guest(int xc_handle,
v_start, v_end,
elf_uval(&elf, elf.ehdr, e_entry));
- if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
+ if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ||
+ (super_page_array =
+ malloc(nr_super_pages * sizeof(xen_pfn_t))) == NULL )
{
PERROR("Could not allocate memory.\n");
goto error_out;
@@ -206,26 +217,44 @@ static int setup_guest(int xc_handle,
for ( i = 0; i < nr_pages; i++ )
page_array[i] = i;
+ for ( i = 0; i < nr_super_pages; i++ )
+ super_page_array[i] = i << super_page_order;
for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
-
- /*
- * Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
- * We allocate pages in batches of no more than 2048 to ensure that
- * we can be preempted and hence dom0 remains responsive.
- */
+ for ( i = HVM_BELOW_4G_RAM_END >> (PAGE_SHIFT + super_page_order);
+ i < nr_super_pages; i++ )
+ super_page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
+
+
+ /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
rc = xc_domain_memory_populate_physmap(
xc_handle, dom, 0xa0, 0, 0, &page_array[0x00]);
- cur_pages = 0xc0;
- while ( (rc == 0) && (nr_pages > cur_pages) )
- {
- unsigned long count = nr_pages - cur_pages;
- if ( count > 2048 )
- count = 2048;
+ if ( rc == 0 )
rc = xc_domain_memory_populate_physmap(
- xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
+ xc_handle, dom, 0x200-0xc0, 0, 0, &page_array[0xc0]);
+
+ /* We allocate pages in batches of no more than 8MB to ensure that
+ * we can be preempted and hence dom0 remains responsive.
+ */
+ limit = 4;
+ cur_pages = 1;
+ while ( (rc == 0) && (nr_super_pages > cur_pages) )
+ {
+ unsigned long count = nr_super_pages - cur_pages;
+ if ( count > limit )
+ count = limit;
+ rc = xc_domain_memory_populate_physmap(xc_handle, dom, count,
+ super_page_order, 0,
+ &super_page_array[cur_pages]);
cur_pages += count;
}
+
+ /* handle the case of odd number physical memory size, such as 255MB */
+ if ( rc == 0 )
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, nr_pages - (nr_super_pages << super_page_order),
+ 0, 0, &page_array[nr_super_pages << super_page_order]);
+
if ( rc != 0 )
{
PERROR("Could not allocate memory for HVM guest.\n");
@@ -314,10 +343,12 @@ static int setup_guest(int xc_handle,
}
free(page_array);
+ free(super_page_array);
return 0;
error_out:
free(page_array);
+ free(super_page_array);
return -1;
}
diff -r 810d8c3ac992 -r 583dca746efb xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/ia64/xen/mm.c Mon May 12 04:06:34 2008 -0500
@@ -2415,7 +2415,7 @@ steal_page(struct domain *d, struct page
int
guest_physmap_add_page(struct domain *d, unsigned long gpfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
BUG_ON(!mfn_valid(mfn));
BUG_ON(mfn_to_page(mfn)->count_info != (PGC_allocated | 1));
@@ -2432,7 +2432,7 @@ guest_physmap_add_page(struct domain *d,
void
guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
BUG_ON(mfn == 0);//XXX
zap_domain_page_one(d, gpfn << PAGE_SHIFT, 0, mfn);
@@ -2838,7 +2838,7 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
if (prev_mfn && mfn_valid(prev_mfn)) {
if (is_xen_heap_mfn(prev_mfn))
/* Xen heap frames are simply unhooked from this phys slot. */
- guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
+ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0);
else
/* Normal domain memory is freed, to avoid leaking memory. */
guest_remove_page(d, xatp.gpfn);
@@ -2847,10 +2847,10 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
/* Unmap from old location, if any. */
gpfn = get_gpfn_from_mfn(mfn);
if (gpfn != INVALID_M2P_ENTRY)
- guest_physmap_remove_page(d, gpfn, mfn);
+ guest_physmap_remove_page(d, gpfn, mfn, 0);
/* Map at new location. */
- guest_physmap_add_page(d, xatp.gpfn, mfn);
+ guest_physmap_add_page(d, xatp.gpfn, mfn, 0);
out:
domain_unlock(d);
diff -r 810d8c3ac992 -r 583dca746efb xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/x86/mm.c Mon May 12 04:06:34 2008 -0500
@@ -3287,7 +3287,8 @@ long arch_memory_op(int op, XEN_GUEST_HA
{
if ( is_xen_heap_mfn(prev_mfn) )
/* Xen heap frames are simply unhooked from this phys slot. */
- guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
+ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn,
+ NORMAL_PAGE_ORDER);
else
/* Normal domain memory is freed, to avoid leaking memory. */
guest_remove_page(d, xatp.gpfn);
@@ -3296,10 +3297,10 @@ long arch_memory_op(int op, XEN_GUEST_HA
/* Unmap from old location, if any. */
gpfn = get_gpfn_from_mfn(mfn);
if ( gpfn != INVALID_M2P_ENTRY )
- guest_physmap_remove_page(d, gpfn, mfn);
+ guest_physmap_remove_page(d, gpfn, mfn, NORMAL_PAGE_ORDER);
/* Map at new location. */
- guest_physmap_add_page(d, xatp.gpfn, mfn);
+ guest_physmap_add_page(d, xatp.gpfn, mfn, NORMAL_PAGE_ORDER);
domain_unlock(d);
diff -r 810d8c3ac992 -r 583dca746efb xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/x86/mm/hap/p2m-ept.c Mon May 12 04:06:34 2008 -0500
@@ -20,6 +20,7 @@
#include <xen/domain_page.h>
#include <xen/sched.h>
#include <asm/current.h>
+#include <asm/paging.h>
#include <asm/types.h>
#include <asm/domain.h>
#include <asm/p2m.h>
@@ -46,6 +47,9 @@ static void ept_p2m_type_to_flags(ept_en
}
}
+#define GUEST_TABLE_NORMAL_PAGE 1
+#define GUEST_TABLE_SUPER_PAGE 2
+
static int ept_next_level(struct domain *d, bool_t read_only,
ept_entry_t **table, unsigned long *gfn_remainder,
u32 shift)
@@ -54,7 +58,6 @@ static int ept_next_level(struct domain
u32 index;
index = *gfn_remainder >> shift;
- *gfn_remainder &= (1UL << shift) - 1;
ept_entry = (*table) + index;
@@ -83,31 +86,53 @@ static int ept_next_level(struct domain
ept_entry->r = ept_entry->w = ept_entry->x = 1;
}
- next = map_domain_page(ept_entry->mfn);
- unmap_domain_page(*table);
- *table = next;
-
- return 1;
+ if ( !ept_entry->sp_avail )
+ {
+ *gfn_remainder &= (1UL << shift) - 1;
+ next = map_domain_page(ept_entry->mfn);
+ unmap_domain_page(*table);
+ *table = next;
+ return GUEST_TABLE_NORMAL_PAGE;
+ }
+ else
+ return GUEST_TABLE_SUPER_PAGE;
}
static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
-{
- ept_entry_t *table =
- map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
- unsigned long gfn_remainder = gfn;
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int order, p2m_type_t p2mt)
+{
+ ept_entry_t *table = NULL;
+ unsigned long gfn_remainder = gfn, offset = 0;
ept_entry_t *ept_entry = NULL;
u32 index;
- int i, rv = 0;
+ int i, rv = 0, ret = 0;
+ int walk_level = order / EPT_TABLE_ORDER;
/* Should check if gfn obeys GAW here */
- for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
- if ( !ept_next_level(d, 0, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER) )
+ if ( order != 0 )
+ if ( (gfn & ((1UL << order) - 1)) )
+ return 1;
+
+ table = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+
+ ASSERT(table != NULL);
+
+ for ( i = EPT_DEFAULT_GAW; i > walk_level; i-- )
+ {
+ ret = ept_next_level(d, 0, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER);
+ if ( !ret )
goto out;
-
- index = gfn_remainder;
+ else if ( ret == GUEST_TABLE_SUPER_PAGE )
+ break;
+ }
+
+ index = gfn_remainder >> ( i ? (i * EPT_TABLE_ORDER): order);
+ walk_level = ( i ? ( i * EPT_TABLE_ORDER) : order) / EPT_TABLE_ORDER;
+ offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1)));
+
ept_entry = table + index;
if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
@@ -117,9 +142,20 @@ ept_set_entry(struct domain *d, unsigned
d->arch.p2m->max_mapped_pfn = gfn;
ept_entry->emt = EPT_DEFAULT_MT;
- ept_entry->sp_avail = 0;
+ ept_entry->sp_avail = walk_level ? 1 : 0;
+
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ ept_entry->mfn = mfn_x(mfn) - offset;
+ if ( ept_entry->avail1 == p2m_ram_logdirty &&
+ p2mt == p2m_ram_rw )
+ for ( i = 0; i < 512; i++ )
+ paging_mark_dirty(d, mfn_x(mfn)-offset+i);
+ }
+ else
+ ept_entry->mfn = mfn_x(mfn);
+
ept_entry->avail1 = p2mt;
- ept_entry->mfn = mfn_x(mfn);
ept_entry->rsvd = 0;
ept_entry->avail2 = 0;
/* last step */
@@ -132,14 +168,42 @@ ept_set_entry(struct domain *d, unsigned
/* Success */
rv = 1;
- out:
+out:
unmap_domain_page(table);
ept_sync_domain(d);
+ /* Now the p2m table is not shared with vt-d page table */
+
+ if ( iommu_enabled && is_hvm_domain(d) )
+ {
+ if ( p2mt == p2m_ram_rw )
+ {
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ for ( i = 0; i < 512; i++ )
+ iommu_map_page(d, gfn-offset+i, mfn_x(mfn)-offset+i);
+ }
+ else if ( ret )
+ iommu_map_page(d, gfn, mfn_x(mfn));
+ }
+ else
+ {
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ for ( i = 0; i < 512; i++ )
+ iommu_unmap_page(d, gfn-offset+i);
+ }
+ else if ( ret )
+ iommu_unmap_page(d, gfn);
+ }
+ }
+
+#ifdef P2M_SHARE_WITH_VTD_PAGE_TABLE
/* If p2m table is shared with vtd page-table. */
if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) )
iommu_flush(d, gfn, (u64*)ept_entry);
+#endif
return rv;
}
@@ -152,7 +216,7 @@ static mfn_t ept_get_entry(struct domain
unsigned long gfn_remainder = gfn;
ept_entry_t *ept_entry;
u32 index;
- int i;
+ int i, ret=0;
mfn_t mfn = _mfn(INVALID_MFN);
*t = p2m_mmio_dm;
@@ -164,17 +228,31 @@ static mfn_t ept_get_entry(struct domain
/* Should check if gfn obeys GAW here. */
for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
- if ( !ept_next_level(d, 1, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER) )
+ {
+ ret = ept_next_level(d, 1, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER);
+ if ( !ret )
goto out;
-
- index = gfn_remainder;
+ else if ( ret == GUEST_TABLE_SUPER_PAGE )
+ break;
+ }
+
+ index = gfn_remainder >> ( i * EPT_TABLE_ORDER);
ept_entry = table + index;
if ( ept_entry->avail1 != p2m_invalid )
{
*t = ept_entry->avail1;
mfn = _mfn(ept_entry->mfn);
+ if ( i )
+ {
+ /* we may meet super pages, and to split into 4k pages
+ * to emulate p2m table
+ */
+ unsigned long split_mfn =
+ mfn_x(mfn) + (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1 )));
+ mfn = _mfn(split_mfn);
+ }
}
out:
@@ -205,33 +283,63 @@ static void ept_change_entry_type_global
l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
for (i4 = 0; i4 < EPT_PAGETABLE_ENTRIES; i4++ )
{
- if ( !l4e[i4].epte || l4e[i4].sp_avail )
+ if ( !l4e[i4].epte )
continue;
- l3e = map_domain_page(l4e[i4].mfn);
- for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
- {
- if ( !l3e[i3].epte || l3e[i3].sp_avail )
+ if ( !l4e[i4].sp_avail )
+ {
+ l3e = map_domain_page(l4e[i4].mfn);
+ for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
+ {
+ if ( !l3e[i3].epte )
+ continue;
+ if ( !l3e[i3].sp_avail )
+ {
+ l2e = map_domain_page(l3e[i3].mfn);
+ for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
+ {
+ if ( !l2e[i2].epte )
+ continue;
+ if ( !l2e[i2].sp_avail )
+ {
+ l1e = map_domain_page(l2e[i2].mfn);
+ for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
+ {
+ if ( !l1e[i1].epte )
+ continue;
+ if ( l1e[i1].avail1 != ot )
+ continue;
+ l1e[i1].avail1 = nt;
+ ept_p2m_type_to_flags(l1e+i1, nt);
+ }
+ unmap_domain_page(l1e);
+ }
+ else
+ {
+ if ( l2e[i2].avail1 != ot )
+ continue;
+ l2e[i2].avail1 = nt;
+ ept_p2m_type_to_flags(l2e+i2, nt);
+ }
+ }
+ unmap_domain_page(l2e);
+ }
+ else
+ {
+ if ( l3e[i3].avail1 != ot )
+ continue;
+ l3e[i3].avail1 = nt;
+ ept_p2m_type_to_flags(l3e+i3, nt);
+ }
+ }
+ unmap_domain_page(l3e);
+ }
+ else
+ {
+ if ( l4e[i4].avail1 != ot )
continue;
- l2e = map_domain_page(l3e[i3].mfn);
- for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
- {
- if ( !l2e[i2].epte || l2e[i2].sp_avail )
- continue;
- l1e = map_domain_page(l2e[i2].mfn);
- for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
- {
- if ( !l1e[i1].epte )
- continue;
- if ( l1e[i1].avail1 != ot )
- continue;
- l1e[i1].avail1 = nt;
- ept_p2m_type_to_flags(l1e+i1, nt);
- }
- unmap_domain_page(l1e);
- }
- unmap_domain_page(l2e);
- }
- unmap_domain_page(l3e);
+ l4e[i4].avail1 = nt;
+ ept_p2m_type_to_flags(l4e+i4, nt);
+ }
}
unmap_domain_page(l4e);
diff -r 810d8c3ac992 -r 583dca746efb xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/x86/mm/p2m.c Mon May 12 04:06:34 2008 -0500
@@ -151,9 +151,11 @@ p2m_next_level(struct domain *d, mfn_t *
unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
u32 max, unsigned long type)
{
+ l1_pgentry_t *l1_entry;
l1_pgentry_t *p2m_entry;
l1_pgentry_t new_entry;
void *next;
+ int i;
ASSERT(d->arch.p2m->alloc_page);
if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
@@ -194,6 +196,44 @@ p2m_next_level(struct domain *d, mfn_t *
break;
}
}
+
+ ASSERT(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT);
+
+ /* split single large page into 4KB page in P2M table */
+ if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+ {
+ unsigned long flags, pfn;
+ struct page_info *pg = d->arch.p2m->alloc_page(d);
+ if ( pg == NULL )
+ return 0;
+ list_add_tail(&pg->list, &d->arch.p2m->pages);
+ pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
+ pg->count_info = 1;
+
+ /* New splintered mappings inherit the flags of the old superpage,
+ * with a little reorganisation for the _PAGE_PSE_PAT bit. */
+ flags = l1e_get_flags(*p2m_entry);
+ pfn = l1e_get_pfn(*p2m_entry);
+ if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
+ pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
+ else
+ flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
+
+ l1_entry = map_domain_page(mfn_x(page_to_mfn(pg)));
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ {
+ new_entry = l1e_from_pfn(pfn + i, flags);
+ paging_write_p2m_entry(d, gfn,
+ l1_entry+i, *table_mfn, new_entry, 1);
+ }
+ unmap_domain_page(l1_entry);
+
+ new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
+ __PAGE_HYPERVISOR|_PAGE_USER);
+ paging_write_p2m_entry(d, gfn,
+ p2m_entry, *table_mfn, new_entry, 2);
+ }
+
*table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
next = map_domain_page(mfn_x(*table_mfn));
unmap_domain_page(*table);
@@ -204,7 +244,8 @@ p2m_next_level(struct domain *d, mfn_t *
// Returns 0 on error (out of memory)
static int
-p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
+p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int page_order, p2m_type_t p2mt)
{
// XXX -- this might be able to be faster iff current->domain == d
mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
@@ -212,6 +253,7 @@ p2m_set_entry(struct domain *d, unsigned
unsigned long gfn_remainder = gfn;
l1_pgentry_t *p2m_entry;
l1_pgentry_t entry_content;
+ l2_pgentry_t l2e_content;
int rv=0;
#if CONFIG_PAGING_LEVELS >= 4
@@ -235,26 +277,53 @@ p2m_set_entry(struct domain *d, unsigned
PGT_l2_page_table) )
goto out;
- if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
- L2_PAGETABLE_SHIFT - PAGE_SHIFT,
- L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
- goto out;
-
- p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
- 0, L1_PAGETABLE_ENTRIES);
- ASSERT(p2m_entry);
+ if ( page_order == NORMAL_PAGE_ORDER )
+ {
+ if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+ L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+ L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
+ goto out;
+
+ p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+ 0, L1_PAGETABLE_ENTRIES);
+ ASSERT(p2m_entry);
+
+ if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
+ entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
+ else
+ entry_content = l1e_empty();
+
+ /* level 1 entry */
+ paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
+ }
+ else
+ {
+ p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+ L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+ L2_PAGETABLE_ENTRIES);
+ ASSERT(p2m_entry);
+
+ if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
+ !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+ {
+ P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
+ domain_crash(d);
+ goto out;
+ }
+
+ if ( mfn_valid(mfn) )
+ l2e_content = l2e_from_pfn(mfn_x(mfn),
+ p2m_type_to_flags(p2mt) | _PAGE_PSE);
+ else
+ l2e_content = l2e_empty();
+
+ entry_content.l1 = l2e_content.l2;
+ paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2);
+ }
/* Track the highest gfn for which we have ever had a valid mapping */
if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
d->arch.p2m->max_mapped_pfn = gfn;
-
- if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
- entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
- else
- entry_content = l1e_empty();
-
- /* level 1 entry */
- paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
if ( iommu_enabled && is_hvm_domain(d) )
{
@@ -335,6 +404,16 @@ p2m_gfn_to_mfn(struct domain *d, unsigne
unmap_domain_page(l2e);
return _mfn(INVALID_MFN);
}
+ else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
+ {
+ mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
+ *t = p2m_flags_to_type(l2e_get_flags(*l2e));
+ unmap_domain_page(l2e);
+
+ ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+ return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
+ }
+
mfn = _mfn(l2e_get_pfn(*l2e));
unmap_domain_page(l2e);
@@ -358,6 +437,7 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
{
mfn_t mfn = _mfn(INVALID_MFN);
p2m_type_t p2mt = p2m_mmio_dm;
+ paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
/* XXX This is for compatibility with the old model, where anything not
* XXX marked as RAM was considered to be emulated MMIO space.
* XXX Once we start explicitly registering MMIO regions in the p2m
@@ -366,25 +446,44 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
{
l1_pgentry_t l1e = l1e_empty();
+ l2_pgentry_t l2e = l2e_empty();
int ret;
ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
/ sizeof(l1_pgentry_t));
- /* Need to __copy_from_user because the p2m is sparse and this
- * part might not exist */
- ret = __copy_from_user(&l1e,
- &phys_to_machine_mapping[gfn],
- sizeof(l1e));
-
- if ( ret == 0 ) {
- p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
- ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+ ret = __copy_from_user(&l2e,
+ &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) + l2_linear_offset(addr)],
+ sizeof(l2e));
+
+ if ( (ret == 0) && (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
+ (l2e_get_flags(l2e) & _PAGE_PSE) )
+ {
+ p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
+ ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
if ( p2m_is_valid(p2mt) )
- mfn = _mfn(l1e_get_pfn(l1e));
- else
- /* XXX see above */
+ mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
+ else
p2mt = p2m_mmio_dm;
+ }
+ else
+ {
+
+ /* Need to __copy_from_user because the p2m is sparse and this
+ * part might not exist */
+ ret = __copy_from_user(&l1e,
+ &phys_to_machine_mapping[gfn],
+ sizeof(l1e));
+
+ if ( ret == 0 ) {
+ p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+ ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+ if ( p2m_is_valid(p2mt) )
+ mfn = _mfn(l1e_get_pfn(l1e));
+ else
+ /* XXX see above */
+ p2mt = p2m_mmio_dm;
+ }
}
}
@@ -430,9 +529,10 @@ void p2m_change_entry_type_global(struct
}
static inline
-int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
-{
- return d->arch.p2m->set_entry(d, gfn, mfn, p2mt);
+int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int page_order, p2m_type_t p2mt)
+{
+ return d->arch.p2m->set_entry(d, gfn, mfn, page_order, p2mt);
}
// Allocate a new p2m table for a domain.
@@ -493,7 +593,8 @@ int p2m_alloc_table(struct domain *d,
P2M_PRINTK("populating p2m table\n");
/* Initialise physmap tables for slot zero. Other code assumes this. */
- if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), p2m_invalid) )
+ if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), NORMAL_PAGE_ORDER,
+ p2m_invalid) )
goto error;
/* Copy all existing mappings from the page list and m2p */
@@ -512,7 +613,7 @@ int p2m_alloc_table(struct domain *d,
(gfn != 0x55555555L)
#endif
&& gfn != INVALID_M2P_ENTRY
- && !set_p2m_entry(d, gfn, mfn, p2m_ram_rw) )
+ && !set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, p2m_ram_rw) )
goto error;
}
@@ -688,6 +789,28 @@ static void audit_p2m(struct domain *d)
gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
continue;
}
+
+ /* check for super page */
+ if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
+ {
+ mfn = l2e_get_pfn(l2e[i2]);
+ ASSERT(mfn_valid(_mfn(mfn)));
+ for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
+ {
+ m2pfn = get_gpfn_from_mfn(mfn+i1);
+ if ( m2pfn != (gfn + i) )
+ {
+ pmbad++;
+ P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
+ " -> gfn %#lx\n", gfn+i, mfn+i,
+ m2pfn);
+ BUG();
+ }
+ }
+ gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
+ continue;
+ }
+
l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
@@ -737,35 +860,40 @@ static void audit_p2m(struct domain *d)
static void
-p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn)
-{
+p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
+ unsigned int page_order)
+{
+ int i;
if ( !paging_mode_translate(d) )
return;
P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
- set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid);
- set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+ set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
}
void
guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
p2m_lock(d->arch.p2m);
audit_p2m(d);
- p2m_remove_page(d, gfn, mfn);
+ p2m_remove_page(d, gfn, mfn, page_order);
audit_p2m(d);
p2m_unlock(d->arch.p2m);
}
int
guest_physmap_add_entry(struct domain *d, unsigned long gfn,
- unsigned long mfn, p2m_type_t t)
+ unsigned long mfn, unsigned int page_order,
+ p2m_type_t t)
{
unsigned long ogfn;
p2m_type_t ot;
mfn_t omfn;
int rc = 0;
+ int i;
if ( !paging_mode_translate(d) )
return -EINVAL;
@@ -795,7 +923,8 @@ guest_physmap_add_entry(struct domain *d
if ( p2m_is_ram(ot) )
{
ASSERT(mfn_valid(omfn));
- set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn_x(omfn)+i, INVALID_M2P_ENTRY);
}
ogfn = mfn_to_gfn(d, _mfn(mfn));
@@ -818,21 +947,23 @@ guest_physmap_add_entry(struct domain *d
P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
ogfn , mfn_x(omfn));
if ( mfn_x(omfn) == mfn )
- p2m_remove_page(d, ogfn, mfn);
+ p2m_remove_page(d, ogfn, mfn, page_order);
}
}
if ( mfn_valid(_mfn(mfn)) )
{
- if ( !set_p2m_entry(d, gfn, _mfn(mfn), t) )
+ if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
rc = -EINVAL;
- set_gpfn_from_mfn(mfn, gfn);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn+i, gfn+i);
}
else
{
gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
gfn, mfn);
- if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid) )
+ if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order,
+ p2m_invalid) )
rc = -EINVAL;
}
@@ -851,7 +982,7 @@ void p2m_change_type_global(struct domai
l1_pgentry_t l1e_content;
l1_pgentry_t *l1e;
l2_pgentry_t *l2e;
- mfn_t l1mfn;
+ mfn_t l1mfn, l2mfn;
int i1, i2;
l3_pgentry_t *l3e;
int i3;
@@ -891,11 +1022,26 @@ void p2m_change_type_global(struct domai
{
continue;
}
+ l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
{
if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
{
+ continue;
+ }
+
+ if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) )
+ {
+ flags = l2e_get_flags(l2e[i2]);
+ if ( p2m_flags_to_type(flags) != ot )
+ continue;
+ mfn = l2e_get_pfn(l2e[i2]);
+ gfn = get_gpfn_from_mfn(mfn);
+ flags = p2m_flags_to_type(nt);
+ l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
+ paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2],
+ l2mfn, l1e_content, 2);
continue;
}
@@ -944,7 +1090,7 @@ p2m_type_t p2m_change_type(struct domain
mfn = gfn_to_mfn(d, gfn, &pt);
if ( pt == ot )
- set_p2m_entry(d, gfn, mfn, nt);
+ set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, nt);
p2m_unlock(d->arch.p2m);
@@ -968,7 +1114,7 @@ set_mmio_p2m_entry(struct domain *d, uns
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
}
- rc = set_p2m_entry(d, gfn, mfn, p2m_mmio_direct);
+ rc = set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, p2m_mmio_direct);
if ( 0 == rc )
gdprintk(XENLOG_ERR,
"set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
@@ -992,7 +1138,7 @@ clear_mmio_p2m_entry(struct domain *d, u
"clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
return 0;
}
- rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
+ rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), NORMAL_PAGE_ORDER, 0);
return rc;
}
diff -r 810d8c3ac992 -r 583dca746efb xen/common/grant_table.c
--- a/xen/common/grant_table.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/common/grant_table.c Mon May 12 04:06:34 2008 -0500
@@ -1159,7 +1159,7 @@ gnttab_transfer(
spin_lock(&e->grant_table->lock);
sha = &shared_entry(e->grant_table, gop.ref);
- guest_physmap_add_page(e, sha->frame, mfn);
+ guest_physmap_add_page(e, sha->frame, mfn, NORMAL_PAGE_ORDER);
sha->frame = mfn;
wmb();
sha->flags |= GTF_transfer_completed;
diff -r 810d8c3ac992 -r 583dca746efb xen/common/memory.c
--- a/xen/common/memory.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/common/memory.c Mon May 12 04:06:34 2008 -0500
@@ -114,34 +114,60 @@ static void populate_physmap(struct memo
page = alloc_domheap_pages(
d, a->extent_order, a->memflags | MEMF_node(node));
- if ( unlikely(page == NULL) )
- {
- gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
- "id=%d memflags=%x (%ld of %d)\n",
- a->extent_order, d->domain_id, a->memflags,
- i, a->nr_extents);
- goto out;
- }
-
- mfn = page_to_mfn(page);
-
- if ( unlikely(paging_mode_translate(d)) )
- {
+
+ if ( unlikely(page == NULL) )
+ {
+ /* fail if it is not under translate mode */
+ if ( !paging_mode_translate(d) )
+ {
+ gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
+ "id=%d memflags=%x (%ld of %d)\n",
+ a->extent_order, d->domain_id, a->memflags,
+ i, a->nr_extents);
+ goto out;
+ }
+
+ /* try to allocate using 4KB page instead */
for ( j = 0; j < (1 << a->extent_order); j++ )
- if ( guest_physmap_add_page(d, gpfn + j, mfn + j) )
+ {
+ page = alloc_domheap_pages(d, 0,
+ a->memflags | MEMF_node(node));
+ if ( page == NULL )
+ {
+ gdprintk(XENLOG_INFO, "Could not allocate order=%d extent:"
+ "id=%d memflags=%x (%ld of %d)\n",
+ 0, d->domain_id, a->memflags, i, a->nr_extents);
goto out;
- }
- else
- {
- for ( j = 0; j < (1 << a->extent_order); j++ )
- set_gpfn_from_mfn(mfn + j, gpfn + j);
-
- /* Inform the domain of the new page's machine address. */
- if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
- goto out;
- }
- }
-
+ }
+
+ mfn = page_to_mfn(page);
+
+ if ( guest_physmap_add_page(d, gpfn+j, mfn,
+ NORMAL_PAGE_ORDER) )
+ goto out;
+ }
+ }
+ else /* successful in allocating page of extent_order */
+ {
+ mfn = page_to_mfn(page);
+
+ if ( unlikely(paging_mode_translate(d)) )
+ {
+ if ( guest_physmap_add_page(d, gpfn, mfn, a->extent_order) )
+ goto out;
+ }
+ else
+ {
+ for ( j = 0; j < (1 << a->extent_order); j++ )
+ set_gpfn_from_mfn(mfn + j, gpfn + j);
+
+ /* Inform the domain of the new page's machine address. */
+ if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn,
+ 1)) )
+ goto out;
+ }
+ }
+ }
out:
a->nr_done = i;
}
@@ -172,7 +198,7 @@ int guest_remove_page(struct domain *d,
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
- guest_physmap_remove_page(d, gmfn, mfn);
+ guest_physmap_remove_page(d, gmfn, mfn, NORMAL_PAGE_ORDER);
put_page(page);
@@ -419,7 +445,8 @@ static long memory_exchange(XEN_GUEST_HA
if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
BUG();
mfn = page_to_mfn(page);
- guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
+ guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn,
+ NORMAL_PAGE_ORDER);
put_page(page);
}
@@ -441,8 +468,8 @@ static long memory_exchange(XEN_GUEST_HA
if ( unlikely(paging_mode_translate(d)) )
{
/* Ignore failure here. There's nothing we can do. */
- for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
- (void)guest_physmap_add_page(d, gpfn + k, mfn + k);
+ (void)guest_physmap_add_page(d, gpfn, mfn,
+ exch.out.extent_order);
}
else
{
diff -r 810d8c3ac992 -r 583dca746efb xen/include/asm-ia64/shadow.h
--- a/xen/include/asm-ia64/shadow.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/asm-ia64/shadow.h Mon May 12 04:06:34 2008 -0500
@@ -40,8 +40,10 @@
* Utilities to change relationship of gpfn->mfn for designated domain,
* which is required by gnttab transfer, balloon, device model and etc.
*/
-int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
-void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
+int guest_physmap_add_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn, unsigned int page_order);
+void guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn, unsigned int page_order);
static inline int
shadow_mode_enabled(struct domain *d)
diff -r 810d8c3ac992 -r 583dca746efb xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/asm-x86/mm.h Mon May 12 04:06:34 2008 -0500
@@ -124,6 +124,14 @@ static inline u32 pickle_domptr(struct d
/* The order of the largest allocation unit we use for shadow pages */
#define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
+/* The order of continuously allocated super page frames */
+#define NORMAL_PAGE_ORDER 0 /* 4KB page */
+#if CONFIG_PAGING_LEVELS == 2
+#define SUPER_PAGE_ORDER 10 /* 4MB page */
+#else
+#define SUPER_PAGE_ORDER 9 /* 2MB page */
+#endif
+
#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
diff -r 810d8c3ac992 -r 583dca746efb xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/asm-x86/p2m.h Mon May 12 04:06:34 2008 -0500
@@ -102,7 +102,8 @@ struct p2m_domain {
void (*free_page )(struct domain *d,
struct page_info *pg);
int (*set_entry )(struct domain *d, unsigned long gfn,
- mfn_t mfn, p2m_type_t p2mt);
+ mfn_t mfn, unsigned int page_order,
+ p2m_type_t p2mt);
mfn_t (*get_entry )(struct domain *d, unsigned long gfn,
p2m_type_t *p2mt);
mfn_t (*get_entry_current)(unsigned long gfn,
@@ -203,21 +204,23 @@ void p2m_final_teardown(struct domain *d
/* Add a page to a domain's p2m table */
int guest_physmap_add_entry(struct domain *d, unsigned long gfn,
- unsigned long mfn, p2m_type_t t);
+ unsigned long mfn, unsigned int page_order,
+ p2m_type_t t);
/* Untyped version for RAM only, for compatibility
*
* Return 0 for success
*/
static inline int guest_physmap_add_page(struct domain *d, unsigned long gfn,
- unsigned long mfn)
-{
- return guest_physmap_add_entry(d, gfn, mfn, p2m_ram_rw);
+ unsigned long mfn,
+ unsigned int page_order)
+{
+ return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw);
}
/* Remove a page from a domain's p2m table */
void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn);
+ unsigned long mfn, unsigned int page_order);
/* Change types across all p2m entries in a domain */
void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
diff -r 810d8c3ac992 -r 583dca746efb xen/include/xen/paging.h
--- a/xen/include/xen/paging.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/xen/paging.h Mon May 12 04:06:34 2008 -0500
@@ -18,8 +18,8 @@
#else
#define paging_mode_translate(d) (0)
-#define guest_physmap_add_page(d, p, m) (0)
-#define guest_physmap_remove_page(d, p, m) ((void)0)
+#define guest_physmap_add_page(d, p, m, o) (0)
+#define guest_physmap_remove_page(d, p, m, o) ((void)0)
#endif
[-- Attachment #4: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] patch to support super page (2M) with EPT
2008-05-12 17:28 ` Huang2, Wei
@ 2008-05-13 8:46 ` Keir Fraser
2008-05-13 13:36 ` Huang2, Wei
0 siblings, 1 reply; 14+ messages in thread
From: Keir Fraser @ 2008-05-13 8:46 UTC (permalink / raw)
To: Huang2, Wei, Xin, Xiaohui, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 4862 bytes --]
Overall your changes to common code seem a bit more invasive than in the
Intel patch. In particular I don¹t understand why you made such changes to
common/memory.c. The other patch makes far fewer changes (and even some of
those would go away since they have erroneously changed the populate_physmap
interface). So my feeling is that the Intel patch is a slightly more elegant
base to start with: extra changes that your patch makes really need to be
accounted for.
-- Keir
On 12/5/08 18:28, "Huang2, Wei" <Wei.Huang2@amd.com> wrote:
> Here is a revised version. I get rid of 4MB support, as suggested. I did not
> see the issue mentioned by Xiaohui related to splitting last 2M into 4KB
> pages. But anyway, I attached two versions for your reference. Keir, please
> let me know if you have comments.
>
>
> -Wei
>
>
> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
> Sent: Monday, May 12, 2008 2:03 AM
> To: Xin, Xiaohui; Huang2, Wei; xen-devel@lists.xensource.com
> Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>
> Yes, absolutely no need for 4M page support. We do not support the 32-bit
> non-PAE build target any more.
>
> -- Keir
>
> On 12/5/08 06:04, "Xin, Xiaohui" <xiaohui.xin@intel.com> wrote:
>
>> Some comments here:
>> 1) Basically 4M pages allocations is not hardware naturally for EPT, we only
>> use 2M super pages now.
>> I remembered that Keir said that 2M pages allocation is sufficient, and he
>> removed all the pure 32bit support already.
>> 2) If we don¹t allocate the last 2M area with 4kb pages, the EPT will meet
>> some problem. Xen will set one of the 4k page
>> there to be invalid, logically that means we should invalid the all the
>> 2M page if we allocate it with 2M, and then the
>> special pages Xen used in the high end of the guest memory can not be
>> used then. May we know how you cope with that?
>>
>> Thanks
>> Xiaohui
>>
>>
>>
>>
>>
>> From: xen-devel-bounces@lists.xensource.com
>> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
>> Sent: Monday, May 12, 2008 12:36 PM
>> To: Xin, Xiaohui; xen-devel@lists.xensource.com
>> Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>>
>> This is the latest one I created. Please review it and I will re-submit.
>>
>> 1. It includes the patch for p2m-ept.c, directly from your previous
>> patch.
>>
>> 2. Xc_hvm_create.c is based on my original approach. It includes
>> support for both 2MB and 4MB pages. Also it considers the case of odd page
>> size (such as 255MB). But I did not allocate the last 2MB area using 4KB
>> pages. Let me know if it is a big issue.
>>
>> 3. The rest are pretty similar.
>>
>>
>> Thanks,
>>
>> -Wei
>>
>>
>> From: xen-devel-bounces@lists.xensource.com
>> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
>> Sent: Sunday, May 11, 2008 3:34 PM
>> To: Xin, Xiaohui; xen-devel@lists.xensource.com
>> Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>>
>> Could we work together for a common solution? As far as I can see, it
>> largely overlaps with my super page patch. The major difference is between
>> p2m.c and p2m-ept.c.
>>
>> -Wei
>>
>>
>> From: xen-devel-bounces@lists.xensource.com
>> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Xin, Xiaohui
>> Sent: Friday, May 09, 2008 4:11 AM
>> To: xen-devel@lists.xensource.com
>> Subject: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>>
>> Attached are the patches to support super page with EPT. We only support 2M
>> size. And shadow may still work fine with 4K pages.
>> The patches can be split into 3 parts. Apply order is as attached.
>>
>> tool.diff
>> To allocate 2M physical contiguous memory in guest except the first 2M and
>> the last 2M.
>> The first 2M covers special memory, and Xen use the last few pages in guest
>> memory to do special things.
>> We let them to be 4K pages as normal.
>> super_page_common.patch
>> To modify the p2m interfaces by adding an order parameter, such as
>> guest_physmap_add_page(), p2m_set_entry(), etc.
>> p2m-ept-file.patch
>> To handle the EPT tables to support super page.
>>
>>
>> Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
>> Signed-off-by: Li Xin, B <xin.b.li@intel.com>
>>
>>
>>
>>
>>
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xensource.com
>> http://lists.xensource.com/xen-devel
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 10039 bytes --]
[-- Attachment #2: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH] patch to support super page (2M) with EPT
2008-05-13 8:46 ` Keir Fraser
@ 2008-05-13 13:36 ` Huang2, Wei
2008-05-13 13:39 ` Li, Xin B
0 siblings, 1 reply; 14+ messages in thread
From: Huang2, Wei @ 2008-05-13 13:36 UTC (permalink / raw)
To: Keir Fraser, Xin, Xiaohui, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 5184 bytes --]
Memory.c looks more invasive because it takes care of failure cases
using 4KB pages. Xiaohui's patch tries to allocate pages using
extend_order. But if this request fails for any reason, the guest cannot
be started anymore.
-Wei
From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
Sent: Tuesday, May 13, 2008 3:47 AM
To: Huang2, Wei; Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with
EPT
Overall your changes to common code seem a bit more invasive than in the
Intel patch. In particular I don't understand why you made such changes
to common/memory.c. The other patch makes far fewer changes (and even
some of those would go away since they have erroneously changed the
populate_physmap interface). So my feeling is that the Intel patch is a
slightly more elegant base to start with: extra changes that your patch
makes really need to be accounted for.
-- Keir
On 12/5/08 18:28, "Huang2, Wei" <Wei.Huang2@amd.com> wrote:
Here is a revised version. I get rid of 4MB support, as suggested. I did
not see the issue mentioned by Xiaohui related to splitting last 2M into
4KB pages. But anyway, I attached two versions for your reference. Keir,
please let me know if you have comments.
-Wei
________________________________
From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
Sent: Monday, May 12, 2008 2:03 AM
To: Xin, Xiaohui; Huang2, Wei; xen-devel@lists.xensource.com
Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with
EPT
Yes, absolutely no need for 4M page support. We do not support the
32-bit non-PAE build target any more.
-- Keir
On 12/5/08 06:04, "Xin, Xiaohui" <xiaohui.xin@intel.com> wrote:
Some comments here:
1) Basically 4M pages allocations is not hardware naturally for EPT, we
only use 2M super pages now.
I remembered that Keir said that 2M pages allocation is sufficient, and
he removed all the pure 32bit support already.
2) If we don't allocate the last 2M area with 4kb pages, the EPT will
meet some problem. Xen will set one of the 4k page
there to be invalid, logically that means we should invalid the
all the 2M page if we allocate it with 2M, and then the
special pages Xen used in the high end of the guest memory can not
be used then. May we know how you cope with that?
Thanks
Xiaohui
________________________________
From: xen-devel-bounces@lists.xensource.com
[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Monday, May 12, 2008 12:36 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with
EPT
This is the latest one I created. Please review it and I will
re-submit.
1. It includes the patch for p2m-ept.c, directly from your
previous patch.
2. Xc_hvm_create.c is based on my original approach. It includes
support for both 2MB and 4MB pages. Also it considers the case of odd
page size (such as 255MB). But I did not allocate the last 2MB area
using 4KB pages. Let me know if it is a big issue.
3. The rest are pretty similar.
Thanks,
-Wei
From: xen-devel-bounces@lists.xensource.com
[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Sunday, May 11, 2008 3:34 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with
EPT
Could we work together for a common solution? As far as I can see, it
largely overlaps with my super page patch. The major difference is
between p2m.c and p2m-ept.c.
-Wei
From: xen-devel-bounces@lists.xensource.com
[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Xin,
Xiaohui
Sent: Friday, May 09, 2008 4:11 AM
To: xen-devel@lists.xensource.com
Subject: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Attached are the patches to support super page with EPT. We only
support 2M size. And shadow may still work fine with 4K pages.
The patches can be split into 3 parts. Apply order is as attached.
tool.diff
To allocate 2M physical contiguous memory in guest except the first 2M
and the last 2M.
The first 2M covers special memory, and Xen use the last few pages in
guest memory to do special things.
We let them to be 4K pages as normal.
super_page_common.patch
To modify the p2m interfaces by adding an order parameter, such as
guest_physmap_add_page(), p2m_set_entry(), etc.
p2m-ept-file.patch
To handle the EPT tables to support super page.
Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
Signed-off-by: Li Xin, B <xin.b.li@intel.com>
________________________________
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
________________________________
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 13865 bytes --]
[-- Attachment #2: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH] patch to support super page (2M) with EPT
2008-05-13 13:36 ` Huang2, Wei
@ 2008-05-13 13:39 ` Li, Xin B
2008-05-13 13:51 ` Keir Fraser
0 siblings, 1 reply; 14+ messages in thread
From: Li, Xin B @ 2008-05-13 13:39 UTC (permalink / raw)
To: Huang2, Wei, Keir Fraser, Xin, Xiaohui, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 5700 bytes --]
if 2M page allocation fails, the domain builer will try to use 4K allocation instead.
-Xin
________________________________
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: 2008年5月13日 21:37
To: Keir Fraser; Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Memory.c looks more invasive because it takes care of failure cases using 4KB pages. Xiaohui’s patch tries to allocate pages using extend_order. But if this request fails for any reason, the guest cannot be started anymore.
-Wei
From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
Sent: Tuesday, May 13, 2008 3:47 AM
To: Huang2, Wei; Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Overall your changes to common code seem a bit more invasive than in the Intel patch. In particular I don’t understand why you made such changes to common/memory.c. The other patch makes far fewer changes (and even some of those would go away since they have erroneously changed the populate_physmap interface). So my feeling is that the Intel patch is a slightly more elegant base to start with: extra changes that your patch makes really need to be accounted for.
-- Keir
On 12/5/08 18:28, "Huang2, Wei" <Wei.Huang2@amd.com> wrote:
Here is a revised version. I get rid of 4MB support, as suggested. I did not see the issue mentioned by Xiaohui related to splitting last 2M into 4KB pages. But anyway, I attached two versions for your reference. Keir, please let me know if you have comments.
-Wei
________________________________
From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
Sent: Monday, May 12, 2008 2:03 AM
To: Xin, Xiaohui; Huang2, Wei; xen-devel@lists.xensource.com
Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Yes, absolutely no need for 4M page support. We do not support the 32-bit non-PAE build target any more.
-- Keir
On 12/5/08 06:04, "Xin, Xiaohui" <xiaohui.xin@intel.com> wrote:
Some comments here:
1) Basically 4M pages allocations is not hardware naturally for EPT, we only use 2M super pages now.
I remembered that Keir said that 2M pages allocation is sufficient, and he removed all the pure 32bit support already.
2) If we don’t allocate the last 2M area with 4kb pages, the EPT will meet some problem. Xen will set one of the 4k page
there to be invalid, logically that means we should invalid the all the 2M page if we allocate it with 2M, and then the
special pages Xen used in the high end of the guest memory can not be used then. May we know how you cope with that?
Thanks
Xiaohui
________________________________
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Monday, May 12, 2008 12:36 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
This is the latest one I created. Please review it and I will re-submit.
1. It includes the patch for p2m-ept.c, directly from your previous patch.
2. Xc_hvm_create.c is based on my original approach. It includes support for both 2MB and 4MB pages. Also it considers the case of odd page size (such as 255MB). But I did not allocate the last 2MB area using 4KB pages. Let me know if it is a big issue.
3. The rest are pretty similar.
Thanks,
-Wei
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Sunday, May 11, 2008 3:34 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Could we work together for a common solution? As far as I can see, it largely overlaps with my super page patch. The major difference is between p2m.c and p2m-ept.c.
-Wei
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Xin, Xiaohui
Sent: Friday, May 09, 2008 4:11 AM
To: xen-devel@lists.xensource.com
Subject: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Attached are the patches to support super page with EPT. We only support 2M size. And shadow may still work fine with 4K pages.
The patches can be split into 3 parts. Apply order is as attached.
tool.diff
To allocate 2M physical contiguous memory in guest except the first 2M and the last 2M.
The first 2M covers special memory, and Xen use the last few pages in guest memory to do special things.
We let them to be 4K pages as normal.
super_page_common.patch
To modify the p2m interfaces by adding an order parameter, such as guest_physmap_add_page(), p2m_set_entry(), etc.
p2m-ept-file.patch
To handle the EPT tables to support super page.
Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
Signed-off-by: Li Xin, B <xin.b.li@intel.com>
________________________________
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
________________________________
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 15381 bytes --]
[-- Attachment #2: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] patch to support super page (2M) with EPT
2008-05-13 13:39 ` Li, Xin B
@ 2008-05-13 13:51 ` Keir Fraser
2008-05-13 15:49 ` Huang2, Wei
0 siblings, 1 reply; 14+ messages in thread
From: Keir Fraser @ 2008-05-13 13:51 UTC (permalink / raw)
To: Li, Xin B, Huang2, Wei, Xin, Xiaohui, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 6468 bytes --]
Exactly. The interface for populate_physmap() is clear — if you ask for
order-9 allocations then that is what you must get. Otherwise the allocation
fails. It is up to the caller to retry with order-0 allocations _if_ that is
a suitable fallback.
-- Keir
On 13/5/08 14:39, "Li, Xin B" <xin.b.li@intel.com> wrote:
> if 2M page allocation fails, the domain builer will try to use 4K allocation
> instead.
> -Xin
>
>>
>>
>>
>> From: xen-devel-bounces@lists.xensource.com
>> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
>> Sent: 2008年5月13日 21:37
>> To: Keir Fraser; Xin, Xiaohui; xen-devel@lists.xensource.com
>> Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>>
>>
>>
>>
>>
>> Memory.c looks more invasive because it takes care of failure cases using
>> 4KB pages. Xiaohui’s patch tries to allocate pages using extend_order. But
>> if this request fails for any reason, the guest cannot be started anymore.
>>
>>
>>
>> -Wei
>>
>>
>>
>>
>>
>>
>> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
>> Sent: Tuesday, May 13, 2008 3:47 AM
>> To: Huang2, Wei; Xin, Xiaohui; xen-devel@lists.xensource.com
>> Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>>
>>
>>
>> Overall your changes to common code seem a bit more invasive than in the
>> Intel patch. In particular I don’t understand why you made such changes to
>> common/memory.c. The other patch makes far fewer changes (and even some of
>> those would go away since they have erroneously changed the populate_physmap
>> interface). So my feeling is that the Intel patch is a slightly more elegant
>> base to start with: extra changes that your patch makes really need to be
>> accounted for.
>>
>> -- Keir
>>
>> On 12/5/08 18:28, "Huang2, Wei" <Wei.Huang2@amd.com> wrote:
>>
>> Here is a revised version. I get rid of 4MB support, as suggested. I did not
>> see the issue mentioned by Xiaohui related to splitting last 2M into 4KB
>> pages. But anyway, I attached two versions for your reference. Keir, please
>> let me know if you have comments.
>>
>>
>> -Wei
>>
>>
>>
>>
>>
>>
>> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
>> Sent: Monday, May 12, 2008 2:03 AM
>> To: Xin, Xiaohui; Huang2, Wei; xen-devel@lists.xensource.com
>> Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>>
>> Yes, absolutely no need for 4M page support. We do not support the 32-bit
>> non-PAE build target any more.
>>
>> -- Keir
>>
>> On 12/5/08 06:04, "Xin, Xiaohui" <xiaohui.xin@intel.com> wrote:
>>
>> Some comments here:
>> 1) Basically 4M pages allocations is not hardware naturally for EPT, we
>> only use 2M super pages now.
>> I remembered that Keir said that 2M pages allocation is sufficient, and he
>> removed all the pure 32bit support already.
>> 2) If we don’t allocate the last 2M area with 4kb pages, the EPT will meet
>> some problem. Xen will set one of the 4k page
>> there to be invalid, logically that means we should invalid the all
>> the 2M page if we allocate it with 2M, and then the
>> special pages Xen used in the high end of the guest memory can not be
>> used then. May we know how you cope with that?
>>
>> Thanks
>> Xiaohui
>>
>>
>>
>>
>>
>>
>>
>>
>>
>> From: xen-devel-bounces@lists.xensource.com
>> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
>> Sent: Monday, May 12, 2008 12:36 PM
>> To: Xin, Xiaohui; xen-devel@lists.xensource.com
>> Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>>
>> This is the latest one I created. Please review it and I will re-submit.
>>
>> 1. It includes the patch for p2m-ept.c, directly from your previous
>> patch.
>>
>> 2. Xc_hvm_create.c is based on my original approach. It includes
>> support for both 2MB and 4MB pages. Also it considers the case of odd page
>> size (such as 255MB). But I did not allocate the last 2MB area using 4KB
>> pages. Let me know if it is a big issue.
>>
>> 3. The rest are pretty similar.
>>
>>
>> Thanks,
>>
>> -Wei
>>
>>
>> From: xen-devel-bounces@lists.xensource.com
>> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
>> Sent: Sunday, May 11, 2008 3:34 PM
>> To: Xin, Xiaohui; xen-devel@lists.xensource.com
>> Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>>
>> Could we work together for a common solution? As far as I can see, it
>> largely overlaps with my super page patch. The major difference is between
>> p2m.c and p2m-ept.c.
>>
>> -Wei
>>
>>
>> From: xen-devel-bounces@lists.xensource.com
>> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Xin, Xiaohui
>> Sent: Friday, May 09, 2008 4:11 AM
>> To: xen-devel@lists.xensource.com
>> Subject: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>>
>> Attached are the patches to support super page with EPT. We only support 2M
>> size. And shadow may still work fine with 4K pages.
>> The patches can be split into 3 parts. Apply order is as attached.
>>
>> tool.diff
>> To allocate 2M physical contiguous memory in guest except the first 2M and
>> the last 2M.
>> The first 2M covers special memory, and Xen use the last few pages in guest
>> memory to do special things.
>> We let them to be 4K pages as normal.
>> super_page_common.patch
>> To modify the p2m interfaces by adding an order parameter, such as
>> guest_physmap_add_page(), p2m_set_entry(), etc.
>> p2m-ept-file.patch
>> To handle the EPT tables to support super page.
>>
>>
>> Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
>> Signed-off-by: Li Xin, B <xin.b.li@intel.com>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xensource.com
>> http://lists.xensource.com/xen-devel
>>
>>
>>
>>
>>
>>
>>
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xensource.com
>> http://lists.xensource.com/xen-devel
>>
>>
>
[-- Attachment #1.2: Type: text/html, Size: 13253 bytes --]
[-- Attachment #2: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH] patch to support super page (2M) with EPT
2008-05-13 13:51 ` Keir Fraser
@ 2008-05-13 15:49 ` Huang2, Wei
2008-05-14 8:40 ` Keir Fraser
0 siblings, 1 reply; 14+ messages in thread
From: Huang2, Wei @ 2008-05-13 15:49 UTC (permalink / raw)
To: Keir Fraser, Li, Xin B, Xin, Xiaohui, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 6560 bytes --]
Re-submit. It fixes the issues based on your comments.
Thanks,
-Wei
________________________________
From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
Sent: Tuesday, May 13, 2008 8:51 AM
To: Li, Xin B; Huang2, Wei; Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Exactly. The interface for populate_physmap() is clear - if you ask for order-9 allocations then that is what you must get. Otherwise the allocation fails. It is up to the caller to retry with order-0 allocations _if_ that is a suitable fallback.
-- Keir
On 13/5/08 14:39, "Li, Xin B" <xin.b.li@intel.com> wrote:
if 2M page allocation fails, the domain builer will try to use 4K allocation instead.
-Xin
________________________________
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: 2008年5月13日 21:37
To: Keir Fraser; Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Memory.c looks more invasive because it takes care of failure cases using 4KB pages. Xiaohui’s patch tries to allocate pages using extend_order. But if this request fails for any reason, the guest cannot be started anymore.
-Wei
From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
Sent: Tuesday, May 13, 2008 3:47 AM
To: Huang2, Wei; Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Overall your changes to common code seem a bit more invasive than in the Intel patch. In particular I don’t understand why you made such changes to common/memory.c. The other patch makes far fewer changes (and even some of those would go away since they have erroneously changed the populate_physmap interface). So my feeling is that the Intel patch is a slightly more elegant base to start with: extra changes that your patch makes really need to be accounted for.
-- Keir
On 12/5/08 18:28, "Huang2, Wei" <Wei.Huang2@amd.com> wrote:
Here is a revised version. I get rid of 4MB support, as suggested. I did not see the issue mentioned by Xiaohui related to splitting last 2M into 4KB pages. But anyway, I attached two versions for your reference. Keir, please let me know if you have comments.
-Wei
________________________________
From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
Sent: Monday, May 12, 2008 2:03 AM
To: Xin, Xiaohui; Huang2, Wei; xen-devel@lists.xensource.com
Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Yes, absolutely no need for 4M page support. We do not support the 32-bit non-PAE build target any more.
-- Keir
On 12/5/08 06:04, "Xin, Xiaohui" <xiaohui.xin@intel.com> wrote:
Some comments here:
1) Basically 4M pages allocations is not hardware naturally for EPT, we only use 2M super pages now.
I remembered that Keir said that 2M pages allocation is sufficient, and he removed all the pure 32bit support already.
2) If we don’t allocate the last 2M area with 4kb pages, the EPT will meet some problem. Xen will set one of the 4k page
there to be invalid, logically that means we should invalid the all the 2M page if we allocate it with 2M, and then the
special pages Xen used in the high end of the guest memory can not be used then. May we know how you cope with that?
Thanks
Xiaohui
________________________________
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Monday, May 12, 2008 12:36 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
This is the latest one I created. Please review it and I will re-submit.
1. It includes the patch for p2m-ept.c, directly from your previous patch.
2. Xc_hvm_create.c is based on my original approach. It includes support for both 2MB and 4MB pages. Also it considers the case of odd page size (such as 255MB). But I did not allocate the last 2MB area using 4KB pages. Let me know if it is a big issue.
3. The rest are pretty similar.
Thanks,
-Wei
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Sunday, May 11, 2008 3:34 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Could we work together for a common solution? As far as I can see, it largely overlaps with my super page patch. The major difference is between p2m.c and p2m-ept.c.
-Wei
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Xin, Xiaohui
Sent: Friday, May 09, 2008 4:11 AM
To: xen-devel@lists.xensource.com
Subject: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Attached are the patches to support super page with EPT. We only support 2M size. And shadow may still work fine with 4K pages.
The patches can be split into 3 parts. Apply order is as attached.
tool.diff
To allocate 2M physical contiguous memory in guest except the first 2M and the last 2M.
The first 2M covers special memory, and Xen use the last few pages in guest memory to do special things.
We let them to be 4K pages as normal.
super_page_common.patch
To modify the p2m interfaces by adding an order parameter, such as guest_physmap_add_page(), p2m_set_entry(), etc.
p2m-ept-file.patch
To handle the EPT tables to support super page.
Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
Signed-off-by: Li Xin, B <xin.b.li@intel.com>
________________________________
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
________________________________
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 14876 bytes --]
[-- Attachment #2: super_page_patch_new.txt --]
[-- Type: text/plain, Size: 41054 bytes --]
diff -r 810d8c3ac992 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c Thu May 08 16:58:33 2008 +0100
+++ b/tools/libxc/xc_hvm_build.c Tue May 13 03:39:27 2008 -0500
@@ -165,7 +165,7 @@ static int setup_guest(int xc_handle,
uint32_t *ident_pt;
struct elf_binary elf;
uint64_t v_start, v_end;
- int rc;
+ int rc, left;
xen_capabilities_info_t caps;
/* An HVM guest must be initialised with at least 2MB memory. */
@@ -213,19 +213,64 @@ static int setup_guest(int xc_handle,
* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
* We allocate pages in batches of no more than 2048 to ensure that
* we can be preempted and hence dom0 remains responsive.
- */
+ * 1) Allocate 4K pages for the first 2M guest memory;
+ * 2) try to allocate 2M continous pages for the left guest memory
+ * or use 4K pages;
+ * 3) Since the last page of the guest memory will be dereserved at last,
+ * we try just allocate 4K pages for the last 2M guest memory.
+ */
+
rc = xc_domain_memory_populate_physmap(
xc_handle, dom, 0xa0, 0, 0, &page_array[0x00]);
cur_pages = 0xc0;
- while ( (rc == 0) && (nr_pages > cur_pages) )
+
+ if ( rc == 0 )
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, 0x200-0xc0, 0, 0, &page_array[0xc0]);
+
+ cur_pages = 0x200;
+
+ left = nr_pages - ((nr_pages >> 9 ) << 9 );
+
+ while ( (rc == 0) && ( (left ? nr_pages : (nr_pages - 0x200)) > cur_pages) )
{
unsigned long count = nr_pages - cur_pages;
if ( count > 2048 )
+ {
count = 2048;
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, 4, 9, 0, &page_array[cur_pages]);
+ if ( rc != 0 )
+ {
+ PERROR("Cannot allocate more 2M pages for HVM guest.\n");
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
+ if ( rc != 0 )
+ {
+ PERROR("Could not allocate memory for HVM guest.\n");
+ goto error_out;
+ }
+ }
+ }
+ else
+ {
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
+
+ if ( rc != 0 )
+ {
+ PERROR("Could not allocate memory for HVM guest.\n");
+ goto error_out;
+ }
+ }
+
+ cur_pages += count;
+ }
+
+ if ( !left )
rc = xc_domain_memory_populate_physmap(
- xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
- cur_pages += count;
- }
+ xc_handle, dom, nr_pages - cur_pages, 0, 0, &page_array[cur_pages]);
+
if ( rc != 0 )
{
PERROR("Could not allocate memory for HVM guest.\n");
diff -r 810d8c3ac992 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/ia64/xen/mm.c Tue May 13 03:39:03 2008 -0500
@@ -2415,7 +2415,7 @@ steal_page(struct domain *d, struct page
int
guest_physmap_add_page(struct domain *d, unsigned long gpfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
BUG_ON(!mfn_valid(mfn));
BUG_ON(mfn_to_page(mfn)->count_info != (PGC_allocated | 1));
@@ -2432,7 +2432,7 @@ guest_physmap_add_page(struct domain *d,
void
guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
BUG_ON(mfn == 0);//XXX
zap_domain_page_one(d, gpfn << PAGE_SHIFT, 0, mfn);
@@ -2838,7 +2838,7 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
if (prev_mfn && mfn_valid(prev_mfn)) {
if (is_xen_heap_mfn(prev_mfn))
/* Xen heap frames are simply unhooked from this phys slot. */
- guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
+ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0);
else
/* Normal domain memory is freed, to avoid leaking memory. */
guest_remove_page(d, xatp.gpfn);
@@ -2847,10 +2847,10 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
/* Unmap from old location, if any. */
gpfn = get_gpfn_from_mfn(mfn);
if (gpfn != INVALID_M2P_ENTRY)
- guest_physmap_remove_page(d, gpfn, mfn);
+ guest_physmap_remove_page(d, gpfn, mfn, 0);
/* Map at new location. */
- guest_physmap_add_page(d, xatp.gpfn, mfn);
+ guest_physmap_add_page(d, xatp.gpfn, mfn, 0);
out:
domain_unlock(d);
diff -r 810d8c3ac992 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/x86/mm.c Tue May 13 03:39:03 2008 -0500
@@ -3287,7 +3287,8 @@ long arch_memory_op(int op, XEN_GUEST_HA
{
if ( is_xen_heap_mfn(prev_mfn) )
/* Xen heap frames are simply unhooked from this phys slot. */
- guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
+ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn,
+ NORMAL_PAGE_ORDER);
else
/* Normal domain memory is freed, to avoid leaking memory. */
guest_remove_page(d, xatp.gpfn);
@@ -3296,10 +3297,10 @@ long arch_memory_op(int op, XEN_GUEST_HA
/* Unmap from old location, if any. */
gpfn = get_gpfn_from_mfn(mfn);
if ( gpfn != INVALID_M2P_ENTRY )
- guest_physmap_remove_page(d, gpfn, mfn);
+ guest_physmap_remove_page(d, gpfn, mfn, NORMAL_PAGE_ORDER);
/* Map at new location. */
- guest_physmap_add_page(d, xatp.gpfn, mfn);
+ guest_physmap_add_page(d, xatp.gpfn, mfn, NORMAL_PAGE_ORDER);
domain_unlock(d);
diff -r 810d8c3ac992 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/x86/mm/hap/p2m-ept.c Tue May 13 03:39:03 2008 -0500
@@ -20,6 +20,7 @@
#include <xen/domain_page.h>
#include <xen/sched.h>
#include <asm/current.h>
+#include <asm/paging.h>
#include <asm/types.h>
#include <asm/domain.h>
#include <asm/p2m.h>
@@ -46,6 +47,9 @@ static void ept_p2m_type_to_flags(ept_en
}
}
+#define GUEST_TABLE_NORMAL_PAGE 1
+#define GUEST_TABLE_SUPER_PAGE 2
+
static int ept_next_level(struct domain *d, bool_t read_only,
ept_entry_t **table, unsigned long *gfn_remainder,
u32 shift)
@@ -54,7 +58,6 @@ static int ept_next_level(struct domain
u32 index;
index = *gfn_remainder >> shift;
- *gfn_remainder &= (1UL << shift) - 1;
ept_entry = (*table) + index;
@@ -83,31 +86,53 @@ static int ept_next_level(struct domain
ept_entry->r = ept_entry->w = ept_entry->x = 1;
}
- next = map_domain_page(ept_entry->mfn);
- unmap_domain_page(*table);
- *table = next;
-
- return 1;
+ if ( !ept_entry->sp_avail )
+ {
+ *gfn_remainder &= (1UL << shift) - 1;
+ next = map_domain_page(ept_entry->mfn);
+ unmap_domain_page(*table);
+ *table = next;
+ return GUEST_TABLE_NORMAL_PAGE;
+ }
+ else
+ return GUEST_TABLE_SUPER_PAGE;
}
static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
-{
- ept_entry_t *table =
- map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
- unsigned long gfn_remainder = gfn;
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int order, p2m_type_t p2mt)
+{
+ ept_entry_t *table = NULL;
+ unsigned long gfn_remainder = gfn, offset = 0;
ept_entry_t *ept_entry = NULL;
u32 index;
- int i, rv = 0;
+ int i, rv = 0, ret = 0;
+ int walk_level = order / EPT_TABLE_ORDER;
/* Should check if gfn obeys GAW here */
- for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
- if ( !ept_next_level(d, 0, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER) )
+ if ( order != 0 )
+ if ( (gfn & ((1UL << order) - 1)) )
+ return 1;
+
+ table = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+
+ ASSERT(table != NULL);
+
+ for ( i = EPT_DEFAULT_GAW; i > walk_level; i-- )
+ {
+ ret = ept_next_level(d, 0, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER);
+ if ( !ret )
goto out;
-
- index = gfn_remainder;
+ else if ( ret == GUEST_TABLE_SUPER_PAGE )
+ break;
+ }
+
+ index = gfn_remainder >> ( i ? (i * EPT_TABLE_ORDER): order);
+ walk_level = ( i ? ( i * EPT_TABLE_ORDER) : order) / EPT_TABLE_ORDER;
+ offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1)));
+
ept_entry = table + index;
if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
@@ -117,9 +142,20 @@ ept_set_entry(struct domain *d, unsigned
d->arch.p2m->max_mapped_pfn = gfn;
ept_entry->emt = EPT_DEFAULT_MT;
- ept_entry->sp_avail = 0;
+ ept_entry->sp_avail = walk_level ? 1 : 0;
+
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ ept_entry->mfn = mfn_x(mfn) - offset;
+ if ( ept_entry->avail1 == p2m_ram_logdirty &&
+ p2mt == p2m_ram_rw )
+ for ( i = 0; i < 512; i++ )
+ paging_mark_dirty(d, mfn_x(mfn)-offset+i);
+ }
+ else
+ ept_entry->mfn = mfn_x(mfn);
+
ept_entry->avail1 = p2mt;
- ept_entry->mfn = mfn_x(mfn);
ept_entry->rsvd = 0;
ept_entry->avail2 = 0;
/* last step */
@@ -132,14 +168,42 @@ ept_set_entry(struct domain *d, unsigned
/* Success */
rv = 1;
- out:
+out:
unmap_domain_page(table);
ept_sync_domain(d);
+ /* Now the p2m table is not shared with vt-d page table */
+
+ if ( iommu_enabled && is_hvm_domain(d) )
+ {
+ if ( p2mt == p2m_ram_rw )
+ {
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ for ( i = 0; i < 512; i++ )
+ iommu_map_page(d, gfn-offset+i, mfn_x(mfn)-offset+i);
+ }
+ else if ( ret )
+ iommu_map_page(d, gfn, mfn_x(mfn));
+ }
+ else
+ {
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ for ( i = 0; i < 512; i++ )
+ iommu_unmap_page(d, gfn-offset+i);
+ }
+ else if ( ret )
+ iommu_unmap_page(d, gfn);
+ }
+ }
+
+#ifdef P2M_SHARE_WITH_VTD_PAGE_TABLE
/* If p2m table is shared with vtd page-table. */
if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) )
iommu_flush(d, gfn, (u64*)ept_entry);
+#endif
return rv;
}
@@ -152,7 +216,7 @@ static mfn_t ept_get_entry(struct domain
unsigned long gfn_remainder = gfn;
ept_entry_t *ept_entry;
u32 index;
- int i;
+ int i, ret=0;
mfn_t mfn = _mfn(INVALID_MFN);
*t = p2m_mmio_dm;
@@ -164,17 +228,31 @@ static mfn_t ept_get_entry(struct domain
/* Should check if gfn obeys GAW here. */
for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
- if ( !ept_next_level(d, 1, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER) )
+ {
+ ret = ept_next_level(d, 1, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER);
+ if ( !ret )
goto out;
-
- index = gfn_remainder;
+ else if ( ret == GUEST_TABLE_SUPER_PAGE )
+ break;
+ }
+
+ index = gfn_remainder >> ( i * EPT_TABLE_ORDER);
ept_entry = table + index;
if ( ept_entry->avail1 != p2m_invalid )
{
*t = ept_entry->avail1;
mfn = _mfn(ept_entry->mfn);
+ if ( i )
+ {
+ /* we may meet super pages, and to split into 4k pages
+ * to emulate p2m table
+ */
+ unsigned long split_mfn =
+ mfn_x(mfn) + (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1 )));
+ mfn = _mfn(split_mfn);
+ }
}
out:
@@ -205,33 +283,63 @@ static void ept_change_entry_type_global
l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
for (i4 = 0; i4 < EPT_PAGETABLE_ENTRIES; i4++ )
{
- if ( !l4e[i4].epte || l4e[i4].sp_avail )
+ if ( !l4e[i4].epte )
continue;
- l3e = map_domain_page(l4e[i4].mfn);
- for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
- {
- if ( !l3e[i3].epte || l3e[i3].sp_avail )
+ if ( !l4e[i4].sp_avail )
+ {
+ l3e = map_domain_page(l4e[i4].mfn);
+ for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
+ {
+ if ( !l3e[i3].epte )
+ continue;
+ if ( !l3e[i3].sp_avail )
+ {
+ l2e = map_domain_page(l3e[i3].mfn);
+ for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
+ {
+ if ( !l2e[i2].epte )
+ continue;
+ if ( !l2e[i2].sp_avail )
+ {
+ l1e = map_domain_page(l2e[i2].mfn);
+ for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
+ {
+ if ( !l1e[i1].epte )
+ continue;
+ if ( l1e[i1].avail1 != ot )
+ continue;
+ l1e[i1].avail1 = nt;
+ ept_p2m_type_to_flags(l1e+i1, nt);
+ }
+ unmap_domain_page(l1e);
+ }
+ else
+ {
+ if ( l2e[i2].avail1 != ot )
+ continue;
+ l2e[i2].avail1 = nt;
+ ept_p2m_type_to_flags(l2e+i2, nt);
+ }
+ }
+ unmap_domain_page(l2e);
+ }
+ else
+ {
+ if ( l3e[i3].avail1 != ot )
+ continue;
+ l3e[i3].avail1 = nt;
+ ept_p2m_type_to_flags(l3e+i3, nt);
+ }
+ }
+ unmap_domain_page(l3e);
+ }
+ else
+ {
+ if ( l4e[i4].avail1 != ot )
continue;
- l2e = map_domain_page(l3e[i3].mfn);
- for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
- {
- if ( !l2e[i2].epte || l2e[i2].sp_avail )
- continue;
- l1e = map_domain_page(l2e[i2].mfn);
- for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
- {
- if ( !l1e[i1].epte )
- continue;
- if ( l1e[i1].avail1 != ot )
- continue;
- l1e[i1].avail1 = nt;
- ept_p2m_type_to_flags(l1e+i1, nt);
- }
- unmap_domain_page(l1e);
- }
- unmap_domain_page(l2e);
- }
- unmap_domain_page(l3e);
+ l4e[i4].avail1 = nt;
+ ept_p2m_type_to_flags(l4e+i4, nt);
+ }
}
unmap_domain_page(l4e);
diff -r 810d8c3ac992 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/arch/x86/mm/p2m.c Tue May 13 04:28:16 2008 -0500
@@ -151,9 +151,11 @@ p2m_next_level(struct domain *d, mfn_t *
unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
u32 max, unsigned long type)
{
+ l1_pgentry_t *l1_entry;
l1_pgentry_t *p2m_entry;
l1_pgentry_t new_entry;
void *next;
+ int i;
ASSERT(d->arch.p2m->alloc_page);
if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
@@ -194,6 +196,44 @@ p2m_next_level(struct domain *d, mfn_t *
break;
}
}
+
+ ASSERT(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT);
+
+ /* split single large page into 4KB page in P2M table */
+ if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+ {
+ unsigned long flags, pfn;
+ struct page_info *pg = d->arch.p2m->alloc_page(d);
+ if ( pg == NULL )
+ return 0;
+ list_add_tail(&pg->list, &d->arch.p2m->pages);
+ pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
+ pg->count_info = 1;
+
+ /* New splintered mappings inherit the flags of the old superpage,
+ * with a little reorganisation for the _PAGE_PSE_PAT bit. */
+ flags = l1e_get_flags(*p2m_entry);
+ pfn = l1e_get_pfn(*p2m_entry);
+ if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
+ pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
+ else
+ flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
+
+ l1_entry = map_domain_page(mfn_x(page_to_mfn(pg)));
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ {
+ new_entry = l1e_from_pfn(pfn + i, flags);
+ paging_write_p2m_entry(d, gfn,
+ l1_entry+i, *table_mfn, new_entry, 1);
+ }
+ unmap_domain_page(l1_entry);
+
+ new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
+ __PAGE_HYPERVISOR|_PAGE_USER);
+ paging_write_p2m_entry(d, gfn,
+ p2m_entry, *table_mfn, new_entry, 2);
+ }
+
*table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
next = map_domain_page(mfn_x(*table_mfn));
unmap_domain_page(*table);
@@ -204,7 +244,8 @@ p2m_next_level(struct domain *d, mfn_t *
// Returns 0 on error (out of memory)
static int
-p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
+p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int page_order, p2m_type_t p2mt)
{
// XXX -- this might be able to be faster iff current->domain == d
mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
@@ -212,6 +253,7 @@ p2m_set_entry(struct domain *d, unsigned
unsigned long gfn_remainder = gfn;
l1_pgentry_t *p2m_entry;
l1_pgentry_t entry_content;
+ l2_pgentry_t l2e_content;
int rv=0;
#if CONFIG_PAGING_LEVELS >= 4
@@ -235,26 +277,53 @@ p2m_set_entry(struct domain *d, unsigned
PGT_l2_page_table) )
goto out;
- if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
- L2_PAGETABLE_SHIFT - PAGE_SHIFT,
- L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
- goto out;
-
- p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
- 0, L1_PAGETABLE_ENTRIES);
- ASSERT(p2m_entry);
+ if ( page_order == NORMAL_PAGE_ORDER )
+ {
+ if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+ L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+ L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
+ goto out;
+
+ p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+ 0, L1_PAGETABLE_ENTRIES);
+ ASSERT(p2m_entry);
+
+ if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
+ entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
+ else
+ entry_content = l1e_empty();
+
+ /* level 1 entry */
+ paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
+ }
+ else
+ {
+ p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+ L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+ L2_PAGETABLE_ENTRIES);
+ ASSERT(p2m_entry);
+
+ if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
+ !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+ {
+ P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
+ domain_crash(d);
+ goto out;
+ }
+
+ if ( mfn_valid(mfn) )
+ l2e_content = l2e_from_pfn(mfn_x(mfn),
+ p2m_type_to_flags(p2mt) | _PAGE_PSE);
+ else
+ l2e_content = l2e_empty();
+
+ entry_content.l1 = l2e_content.l2;
+ paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2);
+ }
/* Track the highest gfn for which we have ever had a valid mapping */
if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
d->arch.p2m->max_mapped_pfn = gfn;
-
- if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
- entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
- else
- entry_content = l1e_empty();
-
- /* level 1 entry */
- paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
if ( iommu_enabled && is_hvm_domain(d) )
{
@@ -335,6 +404,16 @@ p2m_gfn_to_mfn(struct domain *d, unsigne
unmap_domain_page(l2e);
return _mfn(INVALID_MFN);
}
+ else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
+ {
+ mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
+ *t = p2m_flags_to_type(l2e_get_flags(*l2e));
+ unmap_domain_page(l2e);
+
+ ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+ return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
+ }
+
mfn = _mfn(l2e_get_pfn(*l2e));
unmap_domain_page(l2e);
@@ -358,6 +437,7 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
{
mfn_t mfn = _mfn(INVALID_MFN);
p2m_type_t p2mt = p2m_mmio_dm;
+ paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
/* XXX This is for compatibility with the old model, where anything not
* XXX marked as RAM was considered to be emulated MMIO space.
* XXX Once we start explicitly registering MMIO regions in the p2m
@@ -366,25 +446,44 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
{
l1_pgentry_t l1e = l1e_empty();
+ l2_pgentry_t l2e = l2e_empty();
int ret;
ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
/ sizeof(l1_pgentry_t));
- /* Need to __copy_from_user because the p2m is sparse and this
- * part might not exist */
- ret = __copy_from_user(&l1e,
- &phys_to_machine_mapping[gfn],
- sizeof(l1e));
-
- if ( ret == 0 ) {
- p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
- ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+ ret = __copy_from_user(&l2e,
+ &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) + l2_linear_offset(addr)],
+ sizeof(l2e));
+
+ if ( (ret == 0) && (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
+ (l2e_get_flags(l2e) & _PAGE_PSE) )
+ {
+ p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
+ ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
if ( p2m_is_valid(p2mt) )
- mfn = _mfn(l1e_get_pfn(l1e));
- else
- /* XXX see above */
+ mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
+ else
p2mt = p2m_mmio_dm;
+ }
+ else
+ {
+
+ /* Need to __copy_from_user because the p2m is sparse and this
+ * part might not exist */
+ ret = __copy_from_user(&l1e,
+ &phys_to_machine_mapping[gfn],
+ sizeof(l1e));
+
+ if ( ret == 0 ) {
+ p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+ ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+ if ( p2m_is_valid(p2mt) )
+ mfn = _mfn(l1e_get_pfn(l1e));
+ else
+ /* XXX see above */
+ p2mt = p2m_mmio_dm;
+ }
}
}
@@ -430,9 +529,10 @@ void p2m_change_entry_type_global(struct
}
static inline
-int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
-{
- return d->arch.p2m->set_entry(d, gfn, mfn, p2mt);
+int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int page_order, p2m_type_t p2mt)
+{
+ return d->arch.p2m->set_entry(d, gfn, mfn, page_order, p2mt);
}
// Allocate a new p2m table for a domain.
@@ -493,7 +593,8 @@ int p2m_alloc_table(struct domain *d,
P2M_PRINTK("populating p2m table\n");
/* Initialise physmap tables for slot zero. Other code assumes this. */
- if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), p2m_invalid) )
+ if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), NORMAL_PAGE_ORDER,
+ p2m_invalid) )
goto error;
/* Copy all existing mappings from the page list and m2p */
@@ -512,7 +613,7 @@ int p2m_alloc_table(struct domain *d,
(gfn != 0x55555555L)
#endif
&& gfn != INVALID_M2P_ENTRY
- && !set_p2m_entry(d, gfn, mfn, p2m_ram_rw) )
+ && !set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, p2m_ram_rw) )
goto error;
}
@@ -688,6 +789,28 @@ static void audit_p2m(struct domain *d)
gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
continue;
}
+
+ /* check for super page */
+ if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
+ {
+ mfn = l2e_get_pfn(l2e[i2]);
+ ASSERT(mfn_valid(_mfn(mfn)));
+ for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
+ {
+ m2pfn = get_gpfn_from_mfn(mfn+i1);
+ if ( m2pfn != (gfn + i) )
+ {
+ pmbad++;
+ P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
+ " -> gfn %#lx\n", gfn+i, mfn+i,
+ m2pfn);
+ BUG();
+ }
+ }
+ gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
+ continue;
+ }
+
l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
@@ -737,35 +860,40 @@ static void audit_p2m(struct domain *d)
static void
-p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn)
-{
+p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
+ unsigned int page_order)
+{
+ int i;
if ( !paging_mode_translate(d) )
return;
P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
- set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid);
- set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+ set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
}
void
guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
p2m_lock(d->arch.p2m);
audit_p2m(d);
- p2m_remove_page(d, gfn, mfn);
+ p2m_remove_page(d, gfn, mfn, page_order);
audit_p2m(d);
p2m_unlock(d->arch.p2m);
}
int
guest_physmap_add_entry(struct domain *d, unsigned long gfn,
- unsigned long mfn, p2m_type_t t)
+ unsigned long mfn, unsigned int page_order,
+ p2m_type_t t)
{
unsigned long ogfn;
p2m_type_t ot;
mfn_t omfn;
int rc = 0;
+ int i;
if ( !paging_mode_translate(d) )
return -EINVAL;
@@ -795,7 +923,8 @@ guest_physmap_add_entry(struct domain *d
if ( p2m_is_ram(ot) )
{
ASSERT(mfn_valid(omfn));
- set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn_x(omfn)+i, INVALID_M2P_ENTRY);
}
ogfn = mfn_to_gfn(d, _mfn(mfn));
@@ -818,21 +947,23 @@ guest_physmap_add_entry(struct domain *d
P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
ogfn , mfn_x(omfn));
if ( mfn_x(omfn) == mfn )
- p2m_remove_page(d, ogfn, mfn);
+ p2m_remove_page(d, ogfn, mfn, page_order);
}
}
if ( mfn_valid(_mfn(mfn)) )
{
- if ( !set_p2m_entry(d, gfn, _mfn(mfn), t) )
+ if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
rc = -EINVAL;
- set_gpfn_from_mfn(mfn, gfn);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn+i, gfn+i);
}
else
{
gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
gfn, mfn);
- if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid) )
+ if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order,
+ p2m_invalid) )
rc = -EINVAL;
}
@@ -851,7 +982,7 @@ void p2m_change_type_global(struct domai
l1_pgentry_t l1e_content;
l1_pgentry_t *l1e;
l2_pgentry_t *l2e;
- mfn_t l1mfn;
+ mfn_t l1mfn, l2mfn;
int i1, i2;
l3_pgentry_t *l3e;
int i3;
@@ -891,11 +1022,26 @@ void p2m_change_type_global(struct domai
{
continue;
}
+ l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
{
if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
{
+ continue;
+ }
+
+ if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) )
+ {
+ flags = l2e_get_flags(l2e[i2]);
+ if ( p2m_flags_to_type(flags) != ot )
+ continue;
+ mfn = l2e_get_pfn(l2e[i2]);
+ gfn = get_gpfn_from_mfn(mfn);
+ flags = p2m_flags_to_type(nt);
+ l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
+ paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2],
+ l2mfn, l1e_content, 2);
continue;
}
@@ -944,7 +1090,7 @@ p2m_type_t p2m_change_type(struct domain
mfn = gfn_to_mfn(d, gfn, &pt);
if ( pt == ot )
- set_p2m_entry(d, gfn, mfn, nt);
+ set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, nt);
p2m_unlock(d->arch.p2m);
@@ -968,7 +1114,7 @@ set_mmio_p2m_entry(struct domain *d, uns
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
}
- rc = set_p2m_entry(d, gfn, mfn, p2m_mmio_direct);
+ rc = set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, p2m_mmio_direct);
if ( 0 == rc )
gdprintk(XENLOG_ERR,
"set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
@@ -992,7 +1138,7 @@ clear_mmio_p2m_entry(struct domain *d, u
"clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
return 0;
}
- rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
+ rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), NORMAL_PAGE_ORDER, 0);
return rc;
}
diff -r 810d8c3ac992 xen/common/grant_table.c
--- a/xen/common/grant_table.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/common/grant_table.c Tue May 13 03:39:03 2008 -0500
@@ -1159,7 +1159,7 @@ gnttab_transfer(
spin_lock(&e->grant_table->lock);
sha = &shared_entry(e->grant_table, gop.ref);
- guest_physmap_add_page(e, sha->frame, mfn);
+ guest_physmap_add_page(e, sha->frame, mfn, NORMAL_PAGE_ORDER);
sha->frame = mfn;
wmb();
sha->flags |= GTF_transfer_completed;
diff -r 810d8c3ac992 xen/common/memory.c
--- a/xen/common/memory.c Thu May 08 16:58:33 2008 +0100
+++ b/xen/common/memory.c Tue May 13 03:49:48 2008 -0500
@@ -109,8 +109,12 @@ static void populate_physmap(struct memo
goto out;
}
- if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
- goto out;
+ if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list,
+ i << a->extent_order, 1)) )
+ {
+ printk("copy_from_guest failed.\n");
+ goto out;
+ }
page = alloc_domheap_pages(
d, a->extent_order, a->memflags | MEMF_node(node));
@@ -126,11 +130,7 @@ static void populate_physmap(struct memo
mfn = page_to_mfn(page);
if ( unlikely(paging_mode_translate(d)) )
- {
- for ( j = 0; j < (1 << a->extent_order); j++ )
- if ( guest_physmap_add_page(d, gpfn + j, mfn + j) )
- goto out;
- }
+ guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
else
{
for ( j = 0; j < (1 << a->extent_order); j++ )
@@ -172,7 +172,7 @@ int guest_remove_page(struct domain *d,
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
- guest_physmap_remove_page(d, gmfn, mfn);
+ guest_physmap_remove_page(d, gmfn, mfn, 0);
put_page(page);
@@ -419,7 +419,7 @@ static long memory_exchange(XEN_GUEST_HA
if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
BUG();
mfn = page_to_mfn(page);
- guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
+ guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn, 0);
put_page(page);
}
@@ -441,8 +441,7 @@ static long memory_exchange(XEN_GUEST_HA
if ( unlikely(paging_mode_translate(d)) )
{
/* Ignore failure here. There's nothing we can do. */
- for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
- (void)guest_physmap_add_page(d, gpfn + k, mfn + k);
+ (void)guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
}
else
{
diff -r 810d8c3ac992 xen/include/asm-ia64/grant_table.h
--- a/xen/include/asm-ia64/grant_table.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/asm-ia64/grant_table.h Tue May 13 04:38:40 2008 -0500
@@ -13,7 +13,7 @@ int replace_grant_host_mapping(unsigned
int replace_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, unsigned long new_gpaddr, unsigned int flags);
// for grant transfer
-int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
+int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn, int order);
/* XXX
* somewhere appropriate
diff -r 810d8c3ac992 xen/include/asm-ia64/shadow.h
--- a/xen/include/asm-ia64/shadow.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/asm-ia64/shadow.h Tue May 13 03:39:03 2008 -0500
@@ -40,8 +40,10 @@
* Utilities to change relationship of gpfn->mfn for designated domain,
* which is required by gnttab transfer, balloon, device model and etc.
*/
-int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
-void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
+int guest_physmap_add_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn, unsigned int page_order);
+void guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn, unsigned int page_order);
static inline int
shadow_mode_enabled(struct domain *d)
diff -r 810d8c3ac992 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/asm-x86/mm.h Tue May 13 03:39:03 2008 -0500
@@ -124,6 +124,14 @@ static inline u32 pickle_domptr(struct d
/* The order of the largest allocation unit we use for shadow pages */
#define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
+/* The order of continuously allocated super page frames */
+#define NORMAL_PAGE_ORDER 0 /* 4KB page */
+#if CONFIG_PAGING_LEVELS == 2
+#define SUPER_PAGE_ORDER 10 /* 4MB page */
+#else
+#define SUPER_PAGE_ORDER 9 /* 2MB page */
+#endif
+
#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
diff -r 810d8c3ac992 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/asm-x86/p2m.h Tue May 13 03:39:03 2008 -0500
@@ -102,7 +102,8 @@ struct p2m_domain {
void (*free_page )(struct domain *d,
struct page_info *pg);
int (*set_entry )(struct domain *d, unsigned long gfn,
- mfn_t mfn, p2m_type_t p2mt);
+ mfn_t mfn, unsigned int page_order,
+ p2m_type_t p2mt);
mfn_t (*get_entry )(struct domain *d, unsigned long gfn,
p2m_type_t *p2mt);
mfn_t (*get_entry_current)(unsigned long gfn,
@@ -203,21 +204,23 @@ void p2m_final_teardown(struct domain *d
/* Add a page to a domain's p2m table */
int guest_physmap_add_entry(struct domain *d, unsigned long gfn,
- unsigned long mfn, p2m_type_t t);
+ unsigned long mfn, unsigned int page_order,
+ p2m_type_t t);
/* Untyped version for RAM only, for compatibility
*
* Return 0 for success
*/
static inline int guest_physmap_add_page(struct domain *d, unsigned long gfn,
- unsigned long mfn)
-{
- return guest_physmap_add_entry(d, gfn, mfn, p2m_ram_rw);
+ unsigned long mfn,
+ unsigned int page_order)
+{
+ return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw);
}
/* Remove a page from a domain's p2m table */
void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn);
+ unsigned long mfn, unsigned int page_order);
/* Change types across all p2m entries in a domain */
void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
diff -r 810d8c3ac992 xen/include/xen/paging.h
--- a/xen/include/xen/paging.h Thu May 08 16:58:33 2008 +0100
+++ b/xen/include/xen/paging.h Tue May 13 03:39:03 2008 -0500
@@ -18,8 +18,8 @@
#else
#define paging_mode_translate(d) (0)
-#define guest_physmap_add_page(d, p, m) (0)
-#define guest_physmap_remove_page(d, p, m) ((void)0)
+#define guest_physmap_add_page(d, p, m, o) (0)
+#define guest_physmap_remove_page(d, p, m, o) ((void)0)
#endif
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] patch to support super page (2M) with EPT
2008-05-13 15:49 ` Huang2, Wei
@ 2008-05-14 8:40 ` Keir Fraser
2008-05-14 21:22 ` Huang2, Wei
0 siblings, 1 reply; 14+ messages in thread
From: Keir Fraser @ 2008-05-14 8:40 UTC (permalink / raw)
To: Huang2, Wei, Li, Xin B, Xin, Xiaohui, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 7568 bytes --]
You now break the populate_physmap interface like the original Intel patch
did. You cannot change the index argument to __copy_from_guest_offset().
Also you add in a bogus printk in the same chunk of the patch.
You do not use SUPER_PAGE_ORDER anywhere, and NORMAL_PAGE_ORDER only in some
places. You may as well remove both macros.
-- Keir
On 13/5/08 16:49, "Huang2, Wei" <Wei.Huang2@amd.com> wrote:
> Re-submit. It fixes the issues based on your comments.
>
> Thanks,
>
> -Wei
>
>
> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
> Sent: Tuesday, May 13, 2008 8:51 AM
> To: Li, Xin B; Huang2, Wei; Xin, Xiaohui; xen-devel@lists.xensource.com
> Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>
> Exactly. The interface for populate_physmap() is clear — if you ask for
> order-9 allocations then that is what you must get. Otherwise the allocation
> fails. It is up to the caller to retry with order-0 allocations _if_ that is a
> suitable fallback.
>
> -- Keir
>
> On 13/5/08 14:39, "Li, Xin B" <xin.b.li@intel.com> wrote:
>
>> if 2M page allocation fails, the domain builer will try to use 4K allocation
>> instead.
>> -Xin
>>
>>
>>>
>>>
>>>
>>>
>>> From: xen-devel-bounces@lists.xensource.com
>>> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
>>> Sent: 2008年5月13日 21:37
>>> To: Keir Fraser; Xin, Xiaohui; xen-devel@lists.xensource.com
>>> Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>>>
>>>
>>>
>>>
>>>
>>> Memory.c looks more invasive because it takes care of failure cases using
>>> 4KB pages. Xiaohui’s patch tries to allocate pages using extend_order. But
>>> if this request fails for any reason, the guest cannot be started anymore.
>>>
>>>
>>>
>>> -Wei
>>>
>>>
>>>
>>>
>>>
>>>
>>> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
>>> Sent: Tuesday, May 13, 2008 3:47 AM
>>> To: Huang2, Wei; Xin, Xiaohui; xen-devel@lists.xensource.com
>>> Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>>>
>>>
>>>
>>> Overall your changes to common code seem a bit more invasive than in the
>>> Intel patch. In particular I don’t understand why you made such changes to
>>> common/memory.c. The other patch makes far fewer changes (and even some of
>>> those would go away since they have erroneously changed the
>>> populate_physmap interface). So my feeling is that the Intel patch is a
>>> slightly more elegant base to start with: extra changes that your patch
>>> makes really need to be accounted for.
>>>
>>> -- Keir
>>>
>>> On 12/5/08 18:28, "Huang2, Wei" <Wei.Huang2@amd.com> wrote:
>>>
>>> Here is a revised version. I get rid of 4MB support, as suggested. I did
>>> not see the issue mentioned by Xiaohui related to splitting last 2M into
>>> 4KB pages. But anyway, I attached two versions for your reference. Keir,
>>> please let me know if you have comments.
>>>
>>>
>>> -Wei
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>> From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
>>> Sent: Monday, May 12, 2008 2:03 AM
>>> To: Xin, Xiaohui; Huang2, Wei; xen-devel@lists.xensource.com
>>> Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>>>
>>> Yes, absolutely no need for 4M page support. We do not support the 32-bit
>>> non-PAE build target any more.
>>>
>>> -- Keir
>>>
>>> On 12/5/08 06:04, "Xin, Xiaohui" <xiaohui.xin@intel.com> wrote:
>>>
>>> Some comments here:
>>> 1) Basically 4M pages allocations is not hardware naturally for EPT, we
>>> only use 2M super pages now.
>>> I remembered that Keir said that 2M pages allocation is sufficient, and he
>>> removed all the pure 32bit support already.
>>> 2) If we don’t allocate the last 2M area with 4kb pages, the EPT will
>>> meet some problem. Xen will set one of the 4k page
>>> there to be invalid, logically that means we should invalid the all
>>> the 2M page if we allocate it with 2M, and then the
>>> special pages Xen used in the high end of the guest memory can not
>>> be used then. May we know how you cope with that?
>>>
>>> Thanks
>>> Xiaohui
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>> From: xen-devel-bounces@lists.xensource.com
>>> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
>>> Sent: Monday, May 12, 2008 12:36 PM
>>> To: Xin, Xiaohui; xen-devel@lists.xensource.com
>>> Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with
>>> EPT
>>>
>>> This is the latest one I created. Please review it and I will
>>> re-submit.
>>>
>>> 1. It includes the patch for p2m-ept.c, directly from your
>>> previous patch.
>>>
>>> 2. Xc_hvm_create.c is based on my original approach. It includes
>>> support for both 2MB and 4MB pages. Also it considers the case of odd
>>> page size (such as 255MB). But I did not allocate the last 2MB area using
>>> 4KB pages. Let me know if it is a big issue.
>>>
>>> 3. The rest are pretty similar.
>>>
>>>
>>> Thanks,
>>>
>>> -Wei
>>>
>>>
>>> From: xen-devel-bounces@lists.xensource.com
>>> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
>>> Sent: Sunday, May 11, 2008 3:34 PM
>>> To: Xin, Xiaohui; xen-devel@lists.xensource.com
>>> Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with
>>> EPT
>>>
>>> Could we work together for a common solution? As far as I can see, it
>>> largely overlaps with my super page patch. The major difference is
>>> between p2m.c and p2m-ept.c.
>>>
>>> -Wei
>>>
>>>
>>> From: xen-devel-bounces@lists.xensource.com
>>> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Xin, Xiaohui
>>> Sent: Friday, May 09, 2008 4:11 AM
>>> To: xen-devel@lists.xensource.com
>>> Subject: [Xen-devel][PATCH] patch to support super page (2M) with EPT
>>>
>>> Attached are the patches to support super page with EPT. We only support
>>> 2M size. And shadow may still work fine with 4K pages.
>>> The patches can be split into 3 parts. Apply order is as attached.
>>>
>>> tool.diff
>>> To allocate 2M physical contiguous memory in guest except the first 2M
>>> and the last 2M.
>>> The first 2M covers special memory, and Xen use the last few pages in
>>> guest memory to do special things.
>>> We let them to be 4K pages as normal.
>>> super_page_common.patch
>>> To modify the p2m interfaces by adding an order parameter, such as
>>> guest_physmap_add_page(), p2m_set_entry(), etc.
>>> p2m-ept-file.patch
>>> To handle the EPT tables to support super page.
>>>
>>>
>>> Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
>>> Signed-off-by: Li Xin, B <xin.b.li@intel.com>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>> _______________________________________________
>>> Xen-devel mailing list
>>> Xen-devel@lists.xensource.com
>>> http://lists.xensource.com/xen-devel
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>> _______________________________________________
>>> Xen-devel mailing list
>>> Xen-devel@lists.xensource.com
>>> http://lists.xensource.com/xen-devel
>>>
>>>
>>
>
>
[-- Attachment #1.2: Type: text/html, Size: 15502 bytes --]
[-- Attachment #2: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH] patch to support super page (2M) with EPT
2008-05-14 8:40 ` Keir Fraser
@ 2008-05-14 21:22 ` Huang2, Wei
0 siblings, 0 replies; 14+ messages in thread
From: Huang2, Wei @ 2008-05-14 21:22 UTC (permalink / raw)
To: Keir Fraser, Li, Xin B, Xin, Xiaohui, xen-devel
[-- Attachment #1.1: Type: text/plain, Size: 7962 bytes --]
Keir,
Here is the latest version. The changes include:
1. NORMAL_PAGE_ORDER and SUPER_PAGE_ORDER are removed
2. Changes to __copy_from_guest_offset() index are removed from populate_physmap() function
3. Because of (2), a super_page_array is created and passed to xc_domain_memory_populate_physmap() for allocating 2M pages
4. When 2M requests can not be satisfied, use 4K pages instead.
Thanks,
-Wei
________________________________
From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
Sent: Wednesday, May 14, 2008 3:40 AM
To: Huang2, Wei; Li, Xin B; Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
You now break the populate_physmap interface like the original Intel patch did. You cannot change the index argument to __copy_from_guest_offset(). Also you add in a bogus printk in the same chunk of the patch.
You do not use SUPER_PAGE_ORDER anywhere, and NORMAL_PAGE_ORDER only in some places. You may as well remove both macros.
-- Keir
On 13/5/08 16:49, "Huang2, Wei" <Wei.Huang2@amd.com> wrote:
Re-submit. It fixes the issues based on your comments.
Thanks,
-Wei
________________________________
From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
Sent: Tuesday, May 13, 2008 8:51 AM
To: Li, Xin B; Huang2, Wei; Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Exactly. The interface for populate_physmap() is clear - if you ask for order-9 allocations then that is what you must get. Otherwise the allocation fails. It is up to the caller to retry with order-0 allocations _if_ that is a suitable fallback.
-- Keir
On 13/5/08 14:39, "Li, Xin B" <xin.b.li@intel.com> wrote:
if 2M page allocation fails, the domain builer will try to use 4K allocation instead.
-Xin
________________________________
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: 2008年5月13日 21:37
To: Keir Fraser; Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Memory.c looks more invasive because it takes care of failure cases using 4KB pages. Xiaohui’s patch tries to allocate pages using extend_order. But if this request fails for any reason, the guest cannot be started anymore.
-Wei
From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
Sent: Tuesday, May 13, 2008 3:47 AM
To: Huang2, Wei; Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Overall your changes to common code seem a bit more invasive than in the Intel patch. In particular I don’t understand why you made such changes to common/memory.c. The other patch makes far fewer changes (and even some of those would go away since they have erroneously changed the populate_physmap interface). So my feeling is that the Intel patch is a slightly more elegant base to start with: extra changes that your patch makes really need to be accounted for.
-- Keir
On 12/5/08 18:28, "Huang2, Wei" <Wei.Huang2@amd.com> wrote:
Here is a revised version. I get rid of 4MB support, as suggested. I did not see the issue mentioned by Xiaohui related to splitting last 2M into 4KB pages. But anyway, I attached two versions for your reference. Keir, please let me know if you have comments.
-Wei
________________________________
From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
Sent: Monday, May 12, 2008 2:03 AM
To: Xin, Xiaohui; Huang2, Wei; xen-devel@lists.xensource.com
Subject: Re: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Yes, absolutely no need for 4M page support. We do not support the 32-bit non-PAE build target any more.
-- Keir
On 12/5/08 06:04, "Xin, Xiaohui" <xiaohui.xin@intel.com> wrote:
Some comments here:
1) Basically 4M pages allocations is not hardware naturally for EPT, we only use 2M super pages now.
I remembered that Keir said that 2M pages allocation is sufficient, and he removed all the pure 32bit support already.
2) If we don’t allocate the last 2M area with 4kb pages, the EPT will meet some problem. Xen will set one of the 4k page
there to be invalid, logically that means we should invalid the all the 2M page if we allocate it with 2M, and then the
special pages Xen used in the high end of the guest memory can not be used then. May we know how you cope with that?
Thanks
Xiaohui
________________________________
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Monday, May 12, 2008 12:36 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
This is the latest one I created. Please review it and I will re-submit.
1. It includes the patch for p2m-ept.c, directly from your previous patch.
2. Xc_hvm_create.c is based on my original approach. It includes support for both 2MB and 4MB pages. Also it considers the case of odd page size (such as 255MB). But I did not allocate the last 2MB area using 4KB pages. Let me know if it is a big issue.
3. The rest are pretty similar.
Thanks,
-Wei
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Huang2, Wei
Sent: Sunday, May 11, 2008 3:34 PM
To: Xin, Xiaohui; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Could we work together for a common solution? As far as I can see, it largely overlaps with my super page patch. The major difference is between p2m.c and p2m-ept.c.
-Wei
From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Xin, Xiaohui
Sent: Friday, May 09, 2008 4:11 AM
To: xen-devel@lists.xensource.com
Subject: [Xen-devel][PATCH] patch to support super page (2M) with EPT
Attached are the patches to support super page with EPT. We only support 2M size. And shadow may still work fine with 4K pages.
The patches can be split into 3 parts. Apply order is as attached.
tool.diff
To allocate 2M physical contiguous memory in guest except the first 2M and the last 2M.
The first 2M covers special memory, and Xen use the last few pages in guest memory to do special things.
We let them to be 4K pages as normal.
super_page_common.patch
To modify the p2m interfaces by adding an order parameter, such as guest_physmap_add_page(), p2m_set_entry(), etc.
p2m-ept-file.patch
To handle the EPT tables to support super page.
Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
Signed-off-by: Li Xin, B <xin.b.li@intel.com>
________________________________
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
________________________________
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 18772 bytes --]
[-- Attachment #2: super_page_patch.txt --]
[-- Type: text/plain, Size: 41142 bytes --]
diff -r 53195719f762 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c Tue May 13 15:08:17 2008 +0100
+++ b/tools/libxc/xc_hvm_build.c Wed May 14 10:37:02 2008 -0500
@@ -157,15 +157,17 @@ static int setup_guest(int xc_handle,
char *image, unsigned long image_size)
{
xen_pfn_t *page_array = NULL;
+ xen_pfn_t *super_array = NULL;
unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
- unsigned long special_page_nr, entry_eip, cur_pages;
+ unsigned long nr_super_pages;
+ unsigned long special_page_nr, entry_eip, cur_super_pages;
struct xen_add_to_physmap xatp;
struct shared_info *shared_info;
void *e820_page;
uint32_t *ident_pt;
struct elf_binary elf;
uint64_t v_start, v_end;
- int rc;
+ int rc, left;
xen_capabilities_info_t caps;
/* An HVM guest must be initialised with at least 2MB memory. */
@@ -198,7 +200,13 @@ static int setup_guest(int xc_handle,
v_start, v_end,
elf_uval(&elf, elf.ehdr, e_entry));
- if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
+ /* Since Xen only supports PAE and 64bit, the super page size is 2MB
+ * and its order is 9.
+ */
+ nr_super_pages = (unsigned long)memsize >> 1;
+
+ if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ||
+ (super_array = malloc(nr_super_pages * sizeof(xen_pfn_t))) == NULL )
{
PERROR("Could not allocate memory.\n");
goto error_out;
@@ -206,26 +214,64 @@ static int setup_guest(int xc_handle,
for ( i = 0; i < nr_pages; i++ )
page_array[i] = i;
+ for ( i = 0; i < nr_super_pages; i++ )
+ super_array[i] = i << 9;
for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
+ for ( i = HVM_BELOW_4G_RAM_END >> (PAGE_SHIFT + 9); i < nr_super_pages;
+ i++ )
+ super_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
/*
* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
- * We allocate pages in batches of no more than 2048 to ensure that
+ * We allocate pages in batches of no more than 8MB to ensure that
* we can be preempted and hence dom0 remains responsive.
+ *
+ * 1) Allocate 4K pages for the first 2M guest memory;
+ * 2) Try to allocate 2M continous pages for the left guest memory
+ * or use 4K pages;
+ * 3) Allocate the reset memory using 4K pages.
*/
rc = xc_domain_memory_populate_physmap(
xc_handle, dom, 0xa0, 0, 0, &page_array[0x00]);
- cur_pages = 0xc0;
- while ( (rc == 0) && (nr_pages > cur_pages) )
- {
- unsigned long count = nr_pages - cur_pages;
- if ( count > 2048 )
- count = 2048;
+ if ( rc == 0 )
rc = xc_domain_memory_populate_physmap(
- xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
- cur_pages += count;
- }
+ xc_handle, dom, 0x200-0xc0, 0, 0, &page_array[0xc0]);
+
+ /* The amount of 4K pages left behind by super page allocation */
+ left = nr_pages - ((nr_pages >> 9 ) << 9 );
+ cur_super_pages = 1;
+
+ /* Start to allocate super pages */
+ while ( (rc == 0) && (nr_super_pages > cur_super_pages) )
+ {
+ unsigned long count = nr_super_pages - cur_super_pages;
+ if ( count > 4 )
+ count = 4;
+
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, count, 9, 0, &super_array[cur_super_pages]);
+
+ if ( rc != 0 )
+ {
+ PERROR("Cannot allocate any more 2M pages for HVM guest.\n");
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, count << 9, 0, 0,
+ &page_array[cur_super_pages << 9]);
+ if ( rc != 0 )
+ {
+ PERROR("Could not allocate memory for HVM guest.\n");
+ goto error_out;
+ }
+ }
+
+ cur_super_pages += count;
+ }
+
+ if ( rc == 0 && left )
+ rc = xc_domain_memory_populate_physmap(xc_handle, dom, left, 0, 0,
+ &page_array[cur_super_pages << 9]);
+
if ( rc != 0 )
{
PERROR("Could not allocate memory for HVM guest.\n");
@@ -314,10 +360,12 @@ static int setup_guest(int xc_handle,
}
free(page_array);
+ free(super_array);
return 0;
error_out:
free(page_array);
+ free(super_array);
return -1;
}
diff -r 53195719f762 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c Tue May 13 15:08:17 2008 +0100
+++ b/xen/arch/ia64/xen/mm.c Wed May 14 09:47:50 2008 -0500
@@ -2415,7 +2415,7 @@ steal_page(struct domain *d, struct page
int
guest_physmap_add_page(struct domain *d, unsigned long gpfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
BUG_ON(!mfn_valid(mfn));
BUG_ON(mfn_to_page(mfn)->count_info != (PGC_allocated | 1));
@@ -2432,7 +2432,7 @@ guest_physmap_add_page(struct domain *d,
void
guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
BUG_ON(mfn == 0);//XXX
zap_domain_page_one(d, gpfn << PAGE_SHIFT, 0, mfn);
@@ -2838,7 +2838,7 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
if (prev_mfn && mfn_valid(prev_mfn)) {
if (is_xen_heap_mfn(prev_mfn))
/* Xen heap frames are simply unhooked from this phys slot. */
- guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
+ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0);
else
/* Normal domain memory is freed, to avoid leaking memory. */
guest_remove_page(d, xatp.gpfn);
@@ -2847,10 +2847,10 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
/* Unmap from old location, if any. */
gpfn = get_gpfn_from_mfn(mfn);
if (gpfn != INVALID_M2P_ENTRY)
- guest_physmap_remove_page(d, gpfn, mfn);
+ guest_physmap_remove_page(d, gpfn, mfn, 0);
/* Map at new location. */
- guest_physmap_add_page(d, xatp.gpfn, mfn);
+ guest_physmap_add_page(d, xatp.gpfn, mfn, 0);
out:
domain_unlock(d);
diff -r 53195719f762 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue May 13 15:08:17 2008 +0100
+++ b/xen/arch/x86/mm.c Wed May 14 09:54:38 2008 -0500
@@ -3297,7 +3297,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
{
if ( is_xen_heap_mfn(prev_mfn) )
/* Xen heap frames are simply unhooked from this phys slot. */
- guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
+ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0);
else
/* Normal domain memory is freed, to avoid leaking memory. */
guest_remove_page(d, xatp.gpfn);
@@ -3306,10 +3306,10 @@ long arch_memory_op(int op, XEN_GUEST_HA
/* Unmap from old location, if any. */
gpfn = get_gpfn_from_mfn(mfn);
if ( gpfn != INVALID_M2P_ENTRY )
- guest_physmap_remove_page(d, gpfn, mfn);
+ guest_physmap_remove_page(d, gpfn, mfn, 0);
/* Map at new location. */
- guest_physmap_add_page(d, xatp.gpfn, mfn);
+ guest_physmap_add_page(d, xatp.gpfn, mfn, 0);
domain_unlock(d);
diff -r 53195719f762 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c Tue May 13 15:08:17 2008 +0100
+++ b/xen/arch/x86/mm/hap/p2m-ept.c Wed May 14 09:47:50 2008 -0500
@@ -20,6 +20,7 @@
#include <xen/domain_page.h>
#include <xen/sched.h>
#include <asm/current.h>
+#include <asm/paging.h>
#include <asm/types.h>
#include <asm/domain.h>
#include <asm/p2m.h>
@@ -46,6 +47,9 @@ static void ept_p2m_type_to_flags(ept_en
}
}
+#define GUEST_TABLE_NORMAL_PAGE 1
+#define GUEST_TABLE_SUPER_PAGE 2
+
static int ept_next_level(struct domain *d, bool_t read_only,
ept_entry_t **table, unsigned long *gfn_remainder,
u32 shift)
@@ -54,7 +58,6 @@ static int ept_next_level(struct domain
u32 index;
index = *gfn_remainder >> shift;
- *gfn_remainder &= (1UL << shift) - 1;
ept_entry = (*table) + index;
@@ -83,31 +86,53 @@ static int ept_next_level(struct domain
ept_entry->r = ept_entry->w = ept_entry->x = 1;
}
- next = map_domain_page(ept_entry->mfn);
- unmap_domain_page(*table);
- *table = next;
-
- return 1;
+ if ( !ept_entry->sp_avail )
+ {
+ *gfn_remainder &= (1UL << shift) - 1;
+ next = map_domain_page(ept_entry->mfn);
+ unmap_domain_page(*table);
+ *table = next;
+ return GUEST_TABLE_NORMAL_PAGE;
+ }
+ else
+ return GUEST_TABLE_SUPER_PAGE;
}
static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
-{
- ept_entry_t *table =
- map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
- unsigned long gfn_remainder = gfn;
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int order, p2m_type_t p2mt)
+{
+ ept_entry_t *table = NULL;
+ unsigned long gfn_remainder = gfn, offset = 0;
ept_entry_t *ept_entry = NULL;
u32 index;
- int i, rv = 0;
+ int i, rv = 0, ret = 0;
+ int walk_level = order / EPT_TABLE_ORDER;
/* Should check if gfn obeys GAW here */
- for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
- if ( !ept_next_level(d, 0, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER) )
+ if ( order != 0 )
+ if ( (gfn & ((1UL << order) - 1)) )
+ return 1;
+
+ table = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+
+ ASSERT(table != NULL);
+
+ for ( i = EPT_DEFAULT_GAW; i > walk_level; i-- )
+ {
+ ret = ept_next_level(d, 0, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER);
+ if ( !ret )
goto out;
-
- index = gfn_remainder;
+ else if ( ret == GUEST_TABLE_SUPER_PAGE )
+ break;
+ }
+
+ index = gfn_remainder >> ( i ? (i * EPT_TABLE_ORDER): order);
+ walk_level = ( i ? ( i * EPT_TABLE_ORDER) : order) / EPT_TABLE_ORDER;
+ offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1)));
+
ept_entry = table + index;
if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
@@ -117,9 +142,20 @@ ept_set_entry(struct domain *d, unsigned
d->arch.p2m->max_mapped_pfn = gfn;
ept_entry->emt = EPT_DEFAULT_MT;
- ept_entry->sp_avail = 0;
+ ept_entry->sp_avail = walk_level ? 1 : 0;
+
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ ept_entry->mfn = mfn_x(mfn) - offset;
+ if ( ept_entry->avail1 == p2m_ram_logdirty &&
+ p2mt == p2m_ram_rw )
+ for ( i = 0; i < 512; i++ )
+ paging_mark_dirty(d, mfn_x(mfn)-offset+i);
+ }
+ else
+ ept_entry->mfn = mfn_x(mfn);
+
ept_entry->avail1 = p2mt;
- ept_entry->mfn = mfn_x(mfn);
ept_entry->rsvd = 0;
ept_entry->avail2 = 0;
/* last step */
@@ -132,14 +168,42 @@ ept_set_entry(struct domain *d, unsigned
/* Success */
rv = 1;
- out:
+out:
unmap_domain_page(table);
ept_sync_domain(d);
+ /* Now the p2m table is not shared with vt-d page table */
+
+ if ( iommu_enabled && is_hvm_domain(d) )
+ {
+ if ( p2mt == p2m_ram_rw )
+ {
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ for ( i = 0; i < 512; i++ )
+ iommu_map_page(d, gfn-offset+i, mfn_x(mfn)-offset+i);
+ }
+ else if ( ret )
+ iommu_map_page(d, gfn, mfn_x(mfn));
+ }
+ else
+ {
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ for ( i = 0; i < 512; i++ )
+ iommu_unmap_page(d, gfn-offset+i);
+ }
+ else if ( ret )
+ iommu_unmap_page(d, gfn);
+ }
+ }
+
+#ifdef P2M_SHARE_WITH_VTD_PAGE_TABLE
/* If p2m table is shared with vtd page-table. */
if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) )
iommu_flush(d, gfn, (u64*)ept_entry);
+#endif
return rv;
}
@@ -152,7 +216,7 @@ static mfn_t ept_get_entry(struct domain
unsigned long gfn_remainder = gfn;
ept_entry_t *ept_entry;
u32 index;
- int i;
+ int i, ret=0;
mfn_t mfn = _mfn(INVALID_MFN);
*t = p2m_mmio_dm;
@@ -164,17 +228,31 @@ static mfn_t ept_get_entry(struct domain
/* Should check if gfn obeys GAW here. */
for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
- if ( !ept_next_level(d, 1, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER) )
+ {
+ ret = ept_next_level(d, 1, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER);
+ if ( !ret )
goto out;
-
- index = gfn_remainder;
+ else if ( ret == GUEST_TABLE_SUPER_PAGE )
+ break;
+ }
+
+ index = gfn_remainder >> ( i * EPT_TABLE_ORDER);
ept_entry = table + index;
if ( ept_entry->avail1 != p2m_invalid )
{
*t = ept_entry->avail1;
mfn = _mfn(ept_entry->mfn);
+ if ( i )
+ {
+ /* we may meet super pages, and to split into 4k pages
+ * to emulate p2m table
+ */
+ unsigned long split_mfn =
+ mfn_x(mfn) + (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1 )));
+ mfn = _mfn(split_mfn);
+ }
}
out:
@@ -205,33 +283,63 @@ static void ept_change_entry_type_global
l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
for (i4 = 0; i4 < EPT_PAGETABLE_ENTRIES; i4++ )
{
- if ( !l4e[i4].epte || l4e[i4].sp_avail )
+ if ( !l4e[i4].epte )
continue;
- l3e = map_domain_page(l4e[i4].mfn);
- for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
- {
- if ( !l3e[i3].epte || l3e[i3].sp_avail )
+ if ( !l4e[i4].sp_avail )
+ {
+ l3e = map_domain_page(l4e[i4].mfn);
+ for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
+ {
+ if ( !l3e[i3].epte )
+ continue;
+ if ( !l3e[i3].sp_avail )
+ {
+ l2e = map_domain_page(l3e[i3].mfn);
+ for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
+ {
+ if ( !l2e[i2].epte )
+ continue;
+ if ( !l2e[i2].sp_avail )
+ {
+ l1e = map_domain_page(l2e[i2].mfn);
+ for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
+ {
+ if ( !l1e[i1].epte )
+ continue;
+ if ( l1e[i1].avail1 != ot )
+ continue;
+ l1e[i1].avail1 = nt;
+ ept_p2m_type_to_flags(l1e+i1, nt);
+ }
+ unmap_domain_page(l1e);
+ }
+ else
+ {
+ if ( l2e[i2].avail1 != ot )
+ continue;
+ l2e[i2].avail1 = nt;
+ ept_p2m_type_to_flags(l2e+i2, nt);
+ }
+ }
+ unmap_domain_page(l2e);
+ }
+ else
+ {
+ if ( l3e[i3].avail1 != ot )
+ continue;
+ l3e[i3].avail1 = nt;
+ ept_p2m_type_to_flags(l3e+i3, nt);
+ }
+ }
+ unmap_domain_page(l3e);
+ }
+ else
+ {
+ if ( l4e[i4].avail1 != ot )
continue;
- l2e = map_domain_page(l3e[i3].mfn);
- for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
- {
- if ( !l2e[i2].epte || l2e[i2].sp_avail )
- continue;
- l1e = map_domain_page(l2e[i2].mfn);
- for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
- {
- if ( !l1e[i1].epte )
- continue;
- if ( l1e[i1].avail1 != ot )
- continue;
- l1e[i1].avail1 = nt;
- ept_p2m_type_to_flags(l1e+i1, nt);
- }
- unmap_domain_page(l1e);
- }
- unmap_domain_page(l2e);
- }
- unmap_domain_page(l3e);
+ l4e[i4].avail1 = nt;
+ ept_p2m_type_to_flags(l4e+i4, nt);
+ }
}
unmap_domain_page(l4e);
diff -r 53195719f762 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Tue May 13 15:08:17 2008 +0100
+++ b/xen/arch/x86/mm/p2m.c Wed May 14 09:54:38 2008 -0500
@@ -151,9 +151,11 @@ p2m_next_level(struct domain *d, mfn_t *
unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
u32 max, unsigned long type)
{
+ l1_pgentry_t *l1_entry;
l1_pgentry_t *p2m_entry;
l1_pgentry_t new_entry;
void *next;
+ int i;
ASSERT(d->arch.p2m->alloc_page);
if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
@@ -194,6 +196,44 @@ p2m_next_level(struct domain *d, mfn_t *
break;
}
}
+
+ ASSERT(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT);
+
+ /* split single large page into 4KB page in P2M table */
+ if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+ {
+ unsigned long flags, pfn;
+ struct page_info *pg = d->arch.p2m->alloc_page(d);
+ if ( pg == NULL )
+ return 0;
+ list_add_tail(&pg->list, &d->arch.p2m->pages);
+ pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
+ pg->count_info = 1;
+
+ /* New splintered mappings inherit the flags of the old superpage,
+ * with a little reorganisation for the _PAGE_PSE_PAT bit. */
+ flags = l1e_get_flags(*p2m_entry);
+ pfn = l1e_get_pfn(*p2m_entry);
+ if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
+ pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
+ else
+ flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
+
+ l1_entry = map_domain_page(mfn_x(page_to_mfn(pg)));
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ {
+ new_entry = l1e_from_pfn(pfn + i, flags);
+ paging_write_p2m_entry(d, gfn,
+ l1_entry+i, *table_mfn, new_entry, 1);
+ }
+ unmap_domain_page(l1_entry);
+
+ new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
+ __PAGE_HYPERVISOR|_PAGE_USER);
+ paging_write_p2m_entry(d, gfn,
+ p2m_entry, *table_mfn, new_entry, 2);
+ }
+
*table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
next = map_domain_page(mfn_x(*table_mfn));
unmap_domain_page(*table);
@@ -204,7 +244,8 @@ p2m_next_level(struct domain *d, mfn_t *
// Returns 0 on error (out of memory)
static int
-p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
+p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int page_order, p2m_type_t p2mt)
{
// XXX -- this might be able to be faster iff current->domain == d
mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
@@ -212,6 +253,7 @@ p2m_set_entry(struct domain *d, unsigned
unsigned long gfn_remainder = gfn;
l1_pgentry_t *p2m_entry;
l1_pgentry_t entry_content;
+ l2_pgentry_t l2e_content;
int rv=0;
#if CONFIG_PAGING_LEVELS >= 4
@@ -235,26 +277,53 @@ p2m_set_entry(struct domain *d, unsigned
PGT_l2_page_table) )
goto out;
- if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
- L2_PAGETABLE_SHIFT - PAGE_SHIFT,
- L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
- goto out;
-
- p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
- 0, L1_PAGETABLE_ENTRIES);
- ASSERT(p2m_entry);
+ if ( page_order == 0 )
+ {
+ if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+ L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+ L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
+ goto out;
+
+ p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+ 0, L1_PAGETABLE_ENTRIES);
+ ASSERT(p2m_entry);
+
+ if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
+ entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
+ else
+ entry_content = l1e_empty();
+
+ /* level 1 entry */
+ paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
+ }
+ else
+ {
+ p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+ L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+ L2_PAGETABLE_ENTRIES);
+ ASSERT(p2m_entry);
+
+ if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
+ !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+ {
+ P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
+ domain_crash(d);
+ goto out;
+ }
+
+ if ( mfn_valid(mfn) )
+ l2e_content = l2e_from_pfn(mfn_x(mfn),
+ p2m_type_to_flags(p2mt) | _PAGE_PSE);
+ else
+ l2e_content = l2e_empty();
+
+ entry_content.l1 = l2e_content.l2;
+ paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2);
+ }
/* Track the highest gfn for which we have ever had a valid mapping */
if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
d->arch.p2m->max_mapped_pfn = gfn;
-
- if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
- entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
- else
- entry_content = l1e_empty();
-
- /* level 1 entry */
- paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
if ( iommu_enabled && is_hvm_domain(d) )
{
@@ -335,6 +404,16 @@ p2m_gfn_to_mfn(struct domain *d, unsigne
unmap_domain_page(l2e);
return _mfn(INVALID_MFN);
}
+ else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
+ {
+ mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
+ *t = p2m_flags_to_type(l2e_get_flags(*l2e));
+ unmap_domain_page(l2e);
+
+ ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+ return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
+ }
+
mfn = _mfn(l2e_get_pfn(*l2e));
unmap_domain_page(l2e);
@@ -358,6 +437,7 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
{
mfn_t mfn = _mfn(INVALID_MFN);
p2m_type_t p2mt = p2m_mmio_dm;
+ paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
/* XXX This is for compatibility with the old model, where anything not
* XXX marked as RAM was considered to be emulated MMIO space.
* XXX Once we start explicitly registering MMIO regions in the p2m
@@ -366,25 +446,44 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
{
l1_pgentry_t l1e = l1e_empty();
+ l2_pgentry_t l2e = l2e_empty();
int ret;
ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
/ sizeof(l1_pgentry_t));
- /* Need to __copy_from_user because the p2m is sparse and this
- * part might not exist */
- ret = __copy_from_user(&l1e,
- &phys_to_machine_mapping[gfn],
- sizeof(l1e));
-
- if ( ret == 0 ) {
- p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
- ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+ ret = __copy_from_user(&l2e,
+ &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) + l2_linear_offset(addr)],
+ sizeof(l2e));
+
+ if ( (ret == 0) && (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
+ (l2e_get_flags(l2e) & _PAGE_PSE) )
+ {
+ p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
+ ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
if ( p2m_is_valid(p2mt) )
- mfn = _mfn(l1e_get_pfn(l1e));
- else
- /* XXX see above */
+ mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
+ else
p2mt = p2m_mmio_dm;
+ }
+ else
+ {
+
+ /* Need to __copy_from_user because the p2m is sparse and this
+ * part might not exist */
+ ret = __copy_from_user(&l1e,
+ &phys_to_machine_mapping[gfn],
+ sizeof(l1e));
+
+ if ( ret == 0 ) {
+ p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+ ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+ if ( p2m_is_valid(p2mt) )
+ mfn = _mfn(l1e_get_pfn(l1e));
+ else
+ /* XXX see above */
+ p2mt = p2m_mmio_dm;
+ }
}
}
@@ -430,9 +529,10 @@ void p2m_change_entry_type_global(struct
}
static inline
-int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
-{
- return d->arch.p2m->set_entry(d, gfn, mfn, p2mt);
+int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int page_order, p2m_type_t p2mt)
+{
+ return d->arch.p2m->set_entry(d, gfn, mfn, page_order, p2mt);
}
// Allocate a new p2m table for a domain.
@@ -493,7 +593,8 @@ int p2m_alloc_table(struct domain *d,
P2M_PRINTK("populating p2m table\n");
/* Initialise physmap tables for slot zero. Other code assumes this. */
- if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), p2m_invalid) )
+ if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), 0,
+ p2m_invalid) )
goto error;
/* Copy all existing mappings from the page list and m2p */
@@ -512,7 +613,7 @@ int p2m_alloc_table(struct domain *d,
(gfn != 0x55555555L)
#endif
&& gfn != INVALID_M2P_ENTRY
- && !set_p2m_entry(d, gfn, mfn, p2m_ram_rw) )
+ && !set_p2m_entry(d, gfn, mfn, 0, p2m_ram_rw) )
goto error;
}
@@ -688,6 +789,28 @@ static void audit_p2m(struct domain *d)
gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
continue;
}
+
+ /* check for super page */
+ if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
+ {
+ mfn = l2e_get_pfn(l2e[i2]);
+ ASSERT(mfn_valid(_mfn(mfn)));
+ for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
+ {
+ m2pfn = get_gpfn_from_mfn(mfn+i1);
+ if ( m2pfn != (gfn + i) )
+ {
+ pmbad++;
+ P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
+ " -> gfn %#lx\n", gfn+i, mfn+i,
+ m2pfn);
+ BUG();
+ }
+ }
+ gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
+ continue;
+ }
+
l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
@@ -737,35 +860,40 @@ static void audit_p2m(struct domain *d)
static void
-p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn)
-{
+p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
+ unsigned int page_order)
+{
+ int i;
if ( !paging_mode_translate(d) )
return;
P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
- set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid);
- set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+ set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
}
void
guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
p2m_lock(d->arch.p2m);
audit_p2m(d);
- p2m_remove_page(d, gfn, mfn);
+ p2m_remove_page(d, gfn, mfn, page_order);
audit_p2m(d);
p2m_unlock(d->arch.p2m);
}
int
guest_physmap_add_entry(struct domain *d, unsigned long gfn,
- unsigned long mfn, p2m_type_t t)
+ unsigned long mfn, unsigned int page_order,
+ p2m_type_t t)
{
unsigned long ogfn;
p2m_type_t ot;
mfn_t omfn;
int rc = 0;
+ int i;
if ( !paging_mode_translate(d) )
return -EINVAL;
@@ -795,7 +923,8 @@ guest_physmap_add_entry(struct domain *d
if ( p2m_is_ram(ot) )
{
ASSERT(mfn_valid(omfn));
- set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn_x(omfn)+i, INVALID_M2P_ENTRY);
}
ogfn = mfn_to_gfn(d, _mfn(mfn));
@@ -818,21 +947,23 @@ guest_physmap_add_entry(struct domain *d
P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
ogfn , mfn_x(omfn));
if ( mfn_x(omfn) == mfn )
- p2m_remove_page(d, ogfn, mfn);
+ p2m_remove_page(d, ogfn, mfn, page_order);
}
}
if ( mfn_valid(_mfn(mfn)) )
{
- if ( !set_p2m_entry(d, gfn, _mfn(mfn), t) )
+ if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
rc = -EINVAL;
- set_gpfn_from_mfn(mfn, gfn);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn+i, gfn+i);
}
else
{
gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
gfn, mfn);
- if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid) )
+ if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order,
+ p2m_invalid) )
rc = -EINVAL;
}
@@ -851,7 +982,7 @@ void p2m_change_type_global(struct domai
l1_pgentry_t l1e_content;
l1_pgentry_t *l1e;
l2_pgentry_t *l2e;
- mfn_t l1mfn;
+ mfn_t l1mfn, l2mfn;
int i1, i2;
l3_pgentry_t *l3e;
int i3;
@@ -891,11 +1022,26 @@ void p2m_change_type_global(struct domai
{
continue;
}
+ l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
{
if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
{
+ continue;
+ }
+
+ if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) )
+ {
+ flags = l2e_get_flags(l2e[i2]);
+ if ( p2m_flags_to_type(flags) != ot )
+ continue;
+ mfn = l2e_get_pfn(l2e[i2]);
+ gfn = get_gpfn_from_mfn(mfn);
+ flags = p2m_flags_to_type(nt);
+ l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
+ paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2],
+ l2mfn, l1e_content, 2);
continue;
}
@@ -944,7 +1090,7 @@ p2m_type_t p2m_change_type(struct domain
mfn = gfn_to_mfn(d, gfn, &pt);
if ( pt == ot )
- set_p2m_entry(d, gfn, mfn, nt);
+ set_p2m_entry(d, gfn, mfn, 0, nt);
p2m_unlock(d->arch.p2m);
@@ -968,7 +1114,7 @@ set_mmio_p2m_entry(struct domain *d, uns
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
}
- rc = set_p2m_entry(d, gfn, mfn, p2m_mmio_direct);
+ rc = set_p2m_entry(d, gfn, mfn, 0, p2m_mmio_direct);
if ( 0 == rc )
gdprintk(XENLOG_ERR,
"set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
@@ -992,7 +1138,7 @@ clear_mmio_p2m_entry(struct domain *d, u
"clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
return 0;
}
- rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
+ rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0, 0);
return rc;
}
diff -r 53195719f762 xen/common/grant_table.c
--- a/xen/common/grant_table.c Tue May 13 15:08:17 2008 +0100
+++ b/xen/common/grant_table.c Wed May 14 09:54:38 2008 -0500
@@ -1159,7 +1159,7 @@ gnttab_transfer(
spin_lock(&e->grant_table->lock);
sha = &shared_entry(e->grant_table, gop.ref);
- guest_physmap_add_page(e, sha->frame, mfn);
+ guest_physmap_add_page(e, sha->frame, mfn, 0);
sha->frame = mfn;
wmb();
sha->flags |= GTF_transfer_completed;
diff -r 53195719f762 xen/common/memory.c
--- a/xen/common/memory.c Tue May 13 15:08:17 2008 +0100
+++ b/xen/common/memory.c Wed May 14 09:54:38 2008 -0500
@@ -126,11 +126,7 @@ static void populate_physmap(struct memo
mfn = page_to_mfn(page);
if ( unlikely(paging_mode_translate(d)) )
- {
- for ( j = 0; j < (1 << a->extent_order); j++ )
- if ( guest_physmap_add_page(d, gpfn + j, mfn + j) )
- goto out;
- }
+ guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
else
{
for ( j = 0; j < (1 << a->extent_order); j++ )
@@ -172,7 +168,7 @@ int guest_remove_page(struct domain *d,
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
- guest_physmap_remove_page(d, gmfn, mfn);
+ guest_physmap_remove_page(d, gmfn, mfn, 0);
put_page(page);
@@ -419,7 +415,7 @@ static long memory_exchange(XEN_GUEST_HA
if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
BUG();
mfn = page_to_mfn(page);
- guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
+ guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn, 0);
put_page(page);
}
@@ -441,8 +437,7 @@ static long memory_exchange(XEN_GUEST_HA
if ( unlikely(paging_mode_translate(d)) )
{
/* Ignore failure here. There's nothing we can do. */
- for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
- (void)guest_physmap_add_page(d, gpfn + k, mfn + k);
+ (void)guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
}
else
{
diff -r 53195719f762 xen/include/asm-ia64/grant_table.h
--- a/xen/include/asm-ia64/grant_table.h Tue May 13 15:08:17 2008 +0100
+++ b/xen/include/asm-ia64/grant_table.h Wed May 14 09:47:50 2008 -0500
@@ -13,7 +13,7 @@ int replace_grant_host_mapping(unsigned
int replace_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, unsigned long new_gpaddr, unsigned int flags);
// for grant transfer
-int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
+int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn, int order);
/* XXX
* somewhere appropriate
diff -r 53195719f762 xen/include/asm-ia64/shadow.h
--- a/xen/include/asm-ia64/shadow.h Tue May 13 15:08:17 2008 +0100
+++ b/xen/include/asm-ia64/shadow.h Wed May 14 09:47:50 2008 -0500
@@ -40,8 +40,10 @@
* Utilities to change relationship of gpfn->mfn for designated domain,
* which is required by gnttab transfer, balloon, device model and etc.
*/
-int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
-void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
+int guest_physmap_add_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn, unsigned int page_order);
+void guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn, unsigned int page_order);
static inline int
shadow_mode_enabled(struct domain *d)
diff -r 53195719f762 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Tue May 13 15:08:17 2008 +0100
+++ b/xen/include/asm-x86/p2m.h Wed May 14 09:47:50 2008 -0500
@@ -102,7 +102,8 @@ struct p2m_domain {
void (*free_page )(struct domain *d,
struct page_info *pg);
int (*set_entry )(struct domain *d, unsigned long gfn,
- mfn_t mfn, p2m_type_t p2mt);
+ mfn_t mfn, unsigned int page_order,
+ p2m_type_t p2mt);
mfn_t (*get_entry )(struct domain *d, unsigned long gfn,
p2m_type_t *p2mt);
mfn_t (*get_entry_current)(unsigned long gfn,
@@ -203,21 +204,23 @@ void p2m_final_teardown(struct domain *d
/* Add a page to a domain's p2m table */
int guest_physmap_add_entry(struct domain *d, unsigned long gfn,
- unsigned long mfn, p2m_type_t t);
+ unsigned long mfn, unsigned int page_order,
+ p2m_type_t t);
/* Untyped version for RAM only, for compatibility
*
* Return 0 for success
*/
static inline int guest_physmap_add_page(struct domain *d, unsigned long gfn,
- unsigned long mfn)
-{
- return guest_physmap_add_entry(d, gfn, mfn, p2m_ram_rw);
+ unsigned long mfn,
+ unsigned int page_order)
+{
+ return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw);
}
/* Remove a page from a domain's p2m table */
void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn);
+ unsigned long mfn, unsigned int page_order);
/* Change types across all p2m entries in a domain */
void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
diff -r 53195719f762 xen/include/xen/paging.h
--- a/xen/include/xen/paging.h Tue May 13 15:08:17 2008 +0100
+++ b/xen/include/xen/paging.h Wed May 14 09:47:50 2008 -0500
@@ -18,8 +18,8 @@
#else
#define paging_mode_translate(d) (0)
-#define guest_physmap_add_page(d, p, m) (0)
-#define guest_physmap_remove_page(d, p, m) ((void)0)
+#define guest_physmap_add_page(d, p, m, o) (0)
+#define guest_physmap_remove_page(d, p, m, o) ((void)0)
#endif
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2008-05-14 21:22 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-05-12 12:14 [PATCH] patch to support super page (2M) with EPT Huang2, Wei
-- strict thread matches above, loose matches on Subject: below --
2008-05-09 9:10 Xin, Xiaohui
2008-05-11 20:33 ` Huang2, Wei
2008-05-12 4:36 ` Huang2, Wei
2008-05-12 5:04 ` Xin, Xiaohui
2008-05-12 7:03 ` Keir Fraser
2008-05-12 17:28 ` Huang2, Wei
2008-05-13 8:46 ` Keir Fraser
2008-05-13 13:36 ` Huang2, Wei
2008-05-13 13:39 ` Li, Xin B
2008-05-13 13:51 ` Keir Fraser
2008-05-13 15:49 ` Huang2, Wei
2008-05-14 8:40 ` Keir Fraser
2008-05-14 21:22 ` Huang2, Wei
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.