* [patch] pagetable cleanups
@ 2005-04-12 18:58 Gerd Knorr
2005-04-14 12:25 ` Michael A Fetterman
2005-04-14 16:47 ` Christian Limpach
0 siblings, 2 replies; 13+ messages in thread
From: Gerd Knorr @ 2005-04-12 18:58 UTC (permalink / raw)
To: xen-devel
Hi,
Next version of the pagetable cleanup patch. Builds and boots
domain 0 on x86_32. Changes from the last version:
* macro names are changed.
* adapted to the new shadow code checked in last week.
* new macro: l1e_has_changed() to compare page table
entries.
Open issues:
* I'm not sure how to handle the debug printk's best. These use
the l1e_get_value() macro at the moment to get the raw bits and
print them as unsigned long hex value. I'd like to get rid of
the l1e_get_value() macro altogether though ...
* x86_64 build needs fixing, will look into this tomorrow.
Enjoy,
Gerd
Index: xen/arch/x86/shadow.c
===================================================================
--- xen.orig/arch/x86/shadow.c 2005-04-12 17:01:41.000000000 +0200
+++ xen/arch/x86/shadow.c 2005-04-12 17:33:01.000000000 +0200
@@ -307,7 +307,7 @@ free_shadow_l1_table(struct domain *d, u
for ( i = min; i <= max; i++ )
{
put_page_from_l1e(pl1e[i], d);
- pl1e[i] = mk_l1_pgentry(0);
+ l1e_clear(pl1e[i]);
}
unmap_domain_mem(pl1e);
@@ -333,9 +333,8 @@ free_shadow_hl2_table(struct domain *d,
for ( i = 0; i < limit; i++ )
{
- unsigned long hl2e = l1_pgentry_val(hl2[i]);
- if ( hl2e & _PAGE_PRESENT )
- put_page(pfn_to_page(hl2e >> PAGE_SHIFT));
+ if ( l1e_get_flags(*hl2) & _PAGE_PRESENT )
+ put_page(pfn_to_page(l1e_get_pfn(*hl2)));
}
unmap_domain_mem(hl2);
@@ -614,13 +613,14 @@ static void alloc_monitor_pagetable(stru
HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
#endif
- mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
- mk_l2_pgentry((__pa(d->arch.mm_perdomain_pt) & PAGE_MASK)
- | __PAGE_HYPERVISOR);
+ l2e_init_phys(mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)],
+ __pa(d->arch.mm_perdomain_pt),
+ __PAGE_HYPERVISOR);
// map the phys_to_machine map into the Read-Only MPT space for this domain
- mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
- mk_l2_pgentry(pagetable_val(d->arch.phys_table) | __PAGE_HYPERVISOR);
+ l2e_init_phys(mpl2e[l2_table_offset(RO_MPT_VIRT_START)],
+ pagetable_val(d->arch.phys_table),
+ __PAGE_HYPERVISOR);
ed->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
ed->arch.monitor_vtable = mpl2e;
@@ -644,7 +644,7 @@ void free_monitor_pagetable(struct exec_
*/
hl2e = mpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT];
ASSERT(l2_pgentry_val(hl2e) & _PAGE_PRESENT);
- mfn = l2_pgentry_val(hl2e) >> PAGE_SHIFT;
+ mfn = l2e_get_pfn(hl2e);
ASSERT(mfn);
put_shadow_ref(mfn);
@@ -672,7 +672,8 @@ set_p2m_entry(struct domain *d, unsigned
ASSERT( phystab );
l2 = map_domain_mem(phystab);
- if ( !l2_pgentry_val(l2e = l2[l2_table_offset(va)]) )
+ l2e = l2[l2_table_offset(va)];
+ if ( !l2e_get_value(l2e) ) /* FIXME: check present bit? */
{
l1page = alloc_domheap_page(NULL);
if ( !l1page )
@@ -682,15 +683,14 @@ set_p2m_entry(struct domain *d, unsigned
memset(l1, 0, PAGE_SIZE);
unmap_domain_mem(l1);
- l2e = l2[l2_table_offset(va)] =
- mk_l2_pgentry((page_to_pfn(l1page) << PAGE_SHIFT) |
- __PAGE_HYPERVISOR);
+ l2e_init_pfn(l2e, page_to_pfn(l1page), __PAGE_HYPERVISOR);
+ l2[l2_table_offset(va)] = l2e;
}
unmap_domain_mem(l2);
- l1 = map_domain_mem(l2_pgentry_val(l2e) & PAGE_MASK);
- l1[l1_table_offset(va)] = mk_l1_pgentry((mfn << PAGE_SHIFT) |
- __PAGE_HYPERVISOR);
+ l1 = map_domain_mem(l2e_get_phys(l2e));
+ l1e_init_pfn(l1[l1_table_offset(va)],
+ mfn, __PAGE_HYPERVISOR);
unmap_domain_mem(l1);
return 1;
@@ -966,13 +966,12 @@ translate_l1pgtable(struct domain *d, l1
for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
{
if ( is_guest_l1_slot(i) &&
- (l1_pgentry_val(l1[i]) & _PAGE_PRESENT) )
+ (l1e_get_flags(l1[i]) & _PAGE_PRESENT) )
{
- unsigned long mfn = l1_pgentry_val(l1[i]) >> PAGE_SHIFT;
+ unsigned long mfn = l1e_get_pfn(l1[i]);
unsigned long gpfn = __mfn_to_gpfn(d, mfn);
- ASSERT((l1_pgentry_val(p2m[gpfn]) >> PAGE_SHIFT) == mfn);
- l1[i] = mk_l1_pgentry((gpfn << PAGE_SHIFT) |
- (l1_pgentry_val(l1[i]) & ~PAGE_MASK));
+ ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
+ l1e_init_pfn(l1[i], gpfn, l1e_get_flags(l1[i]));
}
}
unmap_domain_mem(l1);
@@ -994,13 +993,12 @@ translate_l2pgtable(struct domain *d, l1
for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
{
if ( is_guest_l2_slot(i) &&
- (l2_pgentry_val(l2[i]) & _PAGE_PRESENT) )
+ (l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
{
- unsigned long mfn = l2_pgentry_val(l2[i]) >> PAGE_SHIFT;
+ unsigned long mfn = l2e_get_pfn(l2[i]);
unsigned long gpfn = __mfn_to_gpfn(d, mfn);
ASSERT((l1_pgentry_val(p2m[gpfn]) >> PAGE_SHIFT) == mfn);
- l2[i] = mk_l2_pgentry((gpfn << PAGE_SHIFT) |
- (l2_pgentry_val(l2[i]) & ~PAGE_MASK));
+ l2e_init_pfn(l2[i], gpfn, l2e_get_flags(l2[i]));
translate_l1pgtable(d, p2m, mfn);
}
}
@@ -1266,13 +1264,13 @@ gpfn_to_mfn_foreign(struct domain *d, un
l2_pgentry_t *l2 = map_domain_mem(phystab);
l2_pgentry_t l2e = l2[l2_table_offset(va)];
unmap_domain_mem(l2);
- if ( !(l2_pgentry_val(l2e) & _PAGE_PRESENT) )
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
{
printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%p) => 0 l2e=%p\n",
- d->id, gpfn, l2_pgentry_val(l2e));
+ d->id, gpfn, l2e_get_value(l2e));
return INVALID_MFN;
}
- unsigned long l1tab = l2_pgentry_val(l2e) & PAGE_MASK;
+ unsigned long l1tab = l2e_get_phys(l2e);
l1_pgentry_t *l1 = map_domain_mem(l1tab);
l1_pgentry_t l1e = l1[l1_table_offset(va)];
unmap_domain_mem(l1);
@@ -1282,14 +1280,14 @@ gpfn_to_mfn_foreign(struct domain *d, un
d->id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, phystab, l2e, l1tab, l1e);
#endif
- if ( !(l1_pgentry_val(l1e) & _PAGE_PRESENT) )
+ if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
{
printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%p) => 0 l1e=%p\n",
- d->id, gpfn, l1_pgentry_val(l1e));
+ d->id, gpfn, l1e_get_value(l1e));
return INVALID_MFN;
}
- return l1_pgentry_val(l1e) >> PAGE_SHIFT;
+ return l1e_get_pfn(l1e);
}
static unsigned long
@@ -1330,12 +1328,12 @@ shadow_hl2_table(struct domain *d, unsig
// Setup easy access to the GL2, SL2, and HL2 frames.
//
- hl2[l2_table_offset(LINEAR_PT_VIRT_START)] =
- mk_l1_pgentry((gmfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
- hl2[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
- mk_l1_pgentry((smfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
- hl2[l2_table_offset(PERDOMAIN_VIRT_START)] =
- mk_l1_pgentry((hl2mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+ l1e_init_pfn(hl2[l2_table_offset(LINEAR_PT_VIRT_START)],
+ gmfn, __PAGE_HYPERVISOR);
+ l1e_init_pfn(hl2[l2_table_offset(SH_LINEAR_PT_VIRT_START)],
+ smfn, __PAGE_HYPERVISOR);
+ l1e_init_pfn(hl2[l2_table_offset(PERDOMAIN_VIRT_START)],
+ hl2mfn, __PAGE_HYPERVISOR);
}
unmap_domain_mem(hl2);
@@ -1383,21 +1381,20 @@ static unsigned long shadow_l2_table(
&idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
- spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
- mk_l2_pgentry((smfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+ l2e_init_pfn(spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)],
+ smfn, __PAGE_HYPERVISOR);
- spl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
- mk_l2_pgentry(__pa(page_get_owner(
- &frame_table[gmfn])->arch.mm_perdomain_pt) |
- __PAGE_HYPERVISOR);
+ l2e_init_phys(spl2e[l2_table_offset(PERDOMAIN_VIRT_START)],
+ __pa(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_pt),
+ __PAGE_HYPERVISOR);
if ( shadow_mode_translate(d) ) // NB: not external
{
unsigned long hl2mfn;
- spl2e[l2_table_offset(RO_MPT_VIRT_START)] =
- mk_l2_pgentry(pagetable_val(d->arch.phys_table) |
- __PAGE_HYPERVISOR);
+ l2e_init_phys(spl2e[l2_table_offset(RO_MPT_VIRT_START)],
+ pagetable_val(d->arch.phys_table),
+ __PAGE_HYPERVISOR);
if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
@@ -1408,12 +1405,12 @@ static unsigned long shadow_l2_table(
if ( !get_shadow_ref(hl2mfn) )
BUG();
- spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
- mk_l2_pgentry((hl2mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+ l2e_init_pfn(spl2e[l2_table_offset(LINEAR_PT_VIRT_START)],
+ hl2mfn, __PAGE_HYPERVISOR);
}
else
- spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
- mk_l2_pgentry((gmfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+ l2e_init_pfn(spl2e[l2_table_offset(LINEAR_PT_VIRT_START)],
+ gmfn, __PAGE_HYPERVISOR);
}
else
{
@@ -1430,12 +1427,14 @@ void shadow_map_l1_into_current_l2(unsig
{
struct exec_domain *ed = current;
struct domain *d = ed->domain;
- unsigned long *gpl1e, *spl1e, gl2e, sl2e, gl1pfn, gl1mfn, sl1mfn;
+ l1_pgentry_t *gpl1e, *spl1e;
+ l2_pgentry_t gl2e, sl2e;
+ unsigned long gl1pfn, gl1mfn, sl1mfn;
int i, init_table = 0;
__guest_get_l2e(ed, va, &gl2e);
ASSERT(gl2e & _PAGE_PRESENT);
- gl1pfn = gl2e >> PAGE_SHIFT;
+ gl1pfn = l2e_get_pfn(gl2e);
if ( !(sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow)) )
{
@@ -1468,9 +1467,9 @@ void shadow_map_l1_into_current_l2(unsig
}
#ifndef NDEBUG
- unsigned long old_sl2e;
+ l2e_pgentry_t old_sl2e;
__shadow_get_l2e(ed, va, &old_sl2e);
- ASSERT( !(old_sl2e & _PAGE_PRESENT) );
+ ASSERT( !(l2e_get_flags(old_sl2e) & _PAGE_PRESENT) );
#endif
if ( !get_shadow_ref(sl1mfn) )
@@ -1481,25 +1480,23 @@ void shadow_map_l1_into_current_l2(unsig
if ( init_table )
{
- gpl1e = (unsigned long *)
- &(linear_pg_table[l1_linear_offset(va) &
+ gpl1e = &(linear_pg_table[l1_linear_offset(va) &
~(L1_PAGETABLE_ENTRIES-1)]);
- spl1e = (unsigned long *)
- &(shadow_linear_pg_table[l1_linear_offset(va) &
+ spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
~(L1_PAGETABLE_ENTRIES-1)]);
- unsigned long sl1e;
+ l1_pgentry_t sl1e;
int index = l1_table_offset(va);
int min = 1, max = 0;
for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
{
l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
- if ( (sl1e & _PAGE_PRESENT) &&
- !shadow_get_page_from_l1e(mk_l1_pgentry(sl1e), d) )
- sl1e = 0;
- if ( sl1e == 0 )
+ if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
+ !shadow_get_page_from_l1e(sl1e, d) )
+ l1e_clear(sl1e);
+ if ( l1e_get_value(sl1e) == 0 ) /* FIXME: check flags? */
{
// First copy entries from 0 until first invalid.
// Then copy entries from index until first invalid.
@@ -1525,7 +1522,7 @@ void shadow_map_l1_into_current_l2(unsig
void shadow_invlpg(struct exec_domain *ed, unsigned long va)
{
struct domain *d = ed->domain;
- unsigned long gpte, spte;
+ l1_pgentry_t gpte, spte;
ASSERT(shadow_mode_enabled(d));
@@ -1538,8 +1535,8 @@ void shadow_invlpg(struct exec_domain *e
// It's not strictly necessary to update the shadow here,
// but it might save a fault later.
//
- if (__get_user(gpte, (unsigned long *)
- &linear_pg_table[va >> PAGE_SHIFT])) {
+ if (__copy_from_user(&gpte, &linear_pg_table[va >> PAGE_SHIFT],
+ sizeof(gpte))) {
perfc_incrc(shadow_invlpg_faults);
return;
}
@@ -1707,31 +1704,30 @@ void shadow_mark_va_out_of_sync(
{
struct out_of_sync_entry *entry =
shadow_mark_mfn_out_of_sync(ed, gpfn, mfn);
- unsigned long sl2e;
+ l2_pgentry_t sl2e;
// We need the address of shadow PTE that maps @va.
// It might not exist yet. Make sure it's there.
//
__shadow_get_l2e(ed, va, &sl2e);
- if ( !(sl2e & _PAGE_PRESENT) )
+ if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
{
// either this L1 isn't shadowed yet, or the shadow isn't linked into
// the current L2.
shadow_map_l1_into_current_l2(va);
__shadow_get_l2e(ed, va, &sl2e);
}
- ASSERT(sl2e & _PAGE_PRESENT);
+ ASSERT(l2e_get_flags(sl2e) & _PAGE_PRESENT);
// NB: this is stored as a machine address.
entry->writable_pl1e =
- ((sl2e & PAGE_MASK) |
- (sizeof(l1_pgentry_t) * l1_table_offset(va)));
+ l2e_get_phys(sl2e) | (sizeof(l1_pgentry_t) * l1_table_offset(va));
ASSERT( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) );
// Increment shadow's page count to represent the reference
// inherent in entry->writable_pl1e
//
- if ( !get_shadow_ref(sl2e >> PAGE_SHIFT) )
+ if ( !get_shadow_ref(l2e_get_pfn(sl2e)) )
BUG();
FSH_LOG("mark_out_of_sync(va=%p -> writable_pl1e=%p)",
@@ -1784,7 +1780,7 @@ int __shadow_out_of_sync(struct exec_dom
{
struct domain *d = ed->domain;
unsigned long l2mfn = pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT;
- unsigned long l2e;
+ l2_pgentry_t l2e;
unsigned long l1mfn;
ASSERT(spin_is_locked(&d->arch.shadow_lock));
@@ -1796,10 +1792,10 @@ int __shadow_out_of_sync(struct exec_dom
return 1;
__guest_get_l2e(ed, va, &l2e);
- if ( !(l2e & _PAGE_PRESENT) )
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
return 0;
- l1mfn = __gpfn_to_mfn(d, l2e >> PAGE_SHIFT);
+ l1mfn = __gpfn_to_mfn(d, l2e_get_pfn(l2e));
// If the l1 pfn is invalid, it can't be out of sync...
if ( !VALID_MFN(l1mfn) )
@@ -1866,31 +1862,31 @@ static u32 remove_all_write_access_in_pt
unsigned long readonly_gpfn, unsigned long readonly_gmfn,
u32 max_refs_to_find, unsigned long prediction)
{
- unsigned long *pt = map_domain_mem(pt_mfn << PAGE_SHIFT);
- unsigned long match =
- (readonly_gmfn << PAGE_SHIFT) | _PAGE_RW | _PAGE_PRESENT;
- unsigned long mask = PAGE_MASK | _PAGE_RW | _PAGE_PRESENT;
+ l1_pgentry_t *pt = map_domain_mem(pt_mfn << PAGE_SHIFT);
+ l1_pgentry_t match;
+ unsigned long flags = _PAGE_RW | _PAGE_PRESENT;
int i;
u32 found = 0;
int is_l1_shadow =
((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) ==
PGT_l1_shadow);
-#define MATCH_ENTRY(_i) (((pt[_i] ^ match) & mask) == 0)
+ l1e_init_pfn(match, readonly_gmfn, flags);
// returns true if all refs have been found and fixed.
//
int fix_entry(int i)
{
- unsigned long old = pt[i];
- unsigned long new = old & ~_PAGE_RW;
+ l1_pgentry_t old = pt[i];
+ l1_pgentry_t new = old;
- if ( is_l1_shadow && !shadow_get_page_from_l1e(mk_l1_pgentry(new), d) )
+ l1e_remove_flags(new,_PAGE_RW);
+ if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
BUG();
found++;
pt[i] = new;
if ( is_l1_shadow )
- put_page_from_l1e(mk_l1_pgentry(old), d);
+ put_page_from_l1e(old, d);
#if 0
printk("removed write access to pfn=%p mfn=%p in smfn=%p entry %x "
@@ -1901,8 +1897,8 @@ static u32 remove_all_write_access_in_pt
return (found == max_refs_to_find);
}
- if ( MATCH_ENTRY(readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1)) &&
- fix_entry(readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1)) )
+ i = readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1);
+ if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
{
perfc_incrc(remove_write_fast_exit);
increase_writable_pte_prediction(d, readonly_gpfn, prediction);
@@ -1912,7 +1908,7 @@ static u32 remove_all_write_access_in_pt
for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
{
- if ( unlikely(MATCH_ENTRY(i)) && fix_entry(i) )
+ if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
break;
}
@@ -2005,25 +2001,27 @@ int shadow_remove_all_write_access(
static u32 remove_all_access_in_page(
struct domain *d, unsigned long l1mfn, unsigned long forbidden_gmfn)
{
- unsigned long *pl1e = map_domain_mem(l1mfn << PAGE_SHIFT);
- unsigned long match = (forbidden_gmfn << PAGE_SHIFT) | _PAGE_PRESENT;
- unsigned long mask = PAGE_MASK | _PAGE_PRESENT;
+ l1_pgentry_t *pl1e = map_domain_mem(l1mfn << PAGE_SHIFT);
+ l1_pgentry_t match;
+ unsigned long flags = _PAGE_PRESENT;
int i;
u32 count = 0;
int is_l1_shadow =
((frame_table[l1mfn].u.inuse.type_info & PGT_type_mask) ==
PGT_l1_shadow);
+ l1e_init_pfn(match, forbidden_gmfn, flags);
+
for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
{
- if ( unlikely(((pl1e[i] ^ match) & mask) == 0) )
+ if ( unlikely(!l1e_has_changed(pl1e[i], match, flags) == 0) )
{
- unsigned long ol2e = pl1e[i];
- pl1e[i] = 0;
+ l1_pgentry_t ol2e = pl1e[i];
+ l1e_clear(pl1e[i]);
count++;
if ( is_l1_shadow )
- put_page_from_l1e(mk_l1_pgentry(ol2e), d);
+ put_page_from_l1e(ol2e, d);
else /* must be an hl2 page */
put_page(&frame_table[forbidden_gmfn]);
}
@@ -2076,7 +2074,7 @@ static int resync_all(struct domain *d,
struct out_of_sync_entry *entry;
unsigned i;
unsigned long smfn;
- unsigned long *guest, *shadow, *snapshot;
+ void *guest, *shadow, *snapshot;
int need_flush = 0, external = shadow_mode_external(d);
int unshadow;
int changed;
@@ -2114,14 +2112,18 @@ static int resync_all(struct domain *d,
int min_snapshot = SHADOW_MIN(min_max_snapshot);
int max_snapshot = SHADOW_MAX(min_max_snapshot);
+ l1_pgentry_t *guest1 = guest;
+ l1_pgentry_t *shadow1 = shadow;
+ l1_pgentry_t *snapshot1 = snapshot;
+
changed = 0;
for ( i = min_shadow; i <= max_shadow; i++ )
{
if ( (i < min_snapshot) || (i > max_snapshot) ||
- (guest[i] != snapshot[i]) )
+ l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
{
- need_flush |= validate_pte_change(d, guest[i], &shadow[i]);
+ need_flush |= validate_pte_change(d, guest1[i], &shadow1[i]);
// can't update snapshots of linear page tables -- they
// are used multiple times...
@@ -2140,16 +2142,20 @@ static int resync_all(struct domain *d,
{
int max = -1;
+ l2_pgentry_t *guest2 = guest;
+ l2_pgentry_t *shadow2 = shadow;
+ l2_pgentry_t *snapshot2 = snapshot;
+
changed = 0;
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
{
if ( !is_guest_l2_slot(i) && !external )
continue;
- unsigned long new_pde = guest[i];
- if ( new_pde != snapshot[i] )
+ l2_pgentry_t new_pde = guest2[i];
+ if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK))
{
- need_flush |= validate_pde_change(d, new_pde, &shadow[i]);
+ need_flush |= validate_pde_change(d, new_pde, &shadow2[i]);
// can't update snapshots of linear page tables -- they
// are used multiple times...
@@ -2158,12 +2164,13 @@ static int resync_all(struct domain *d,
changed++;
}
- if ( new_pde != 0 )
+ if ( l2e_get_value(new_pde) != 0 ) /* FIXME: check flags? */
max = i;
// XXX - This hack works for linux guests.
// Need a better solution long term.
- if ( !(new_pde & _PAGE_PRESENT) && unlikely(new_pde != 0) &&
+ if ( !(l2e_get_flags(new_pde) & _PAGE_PRESENT) &&
+ unlikely(l2e_get_value(new_pde) != 0) &&
!unshadow &&
(frame_table[smfn].u.inuse.type_info & PGT_pinned) )
unshadow = 1;
@@ -2175,16 +2182,21 @@ static int resync_all(struct domain *d,
break;
}
case PGT_hl2_shadow:
+ {
+ l2_pgentry_t *guest2 = guest;
+ l2_pgentry_t *shadow2 = shadow;
+ l2_pgentry_t *snapshot2 = snapshot;
+
changed = 0;
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
{
if ( !is_guest_l2_slot(i) && !external )
continue;
- unsigned long new_pde = guest[i];
- if ( new_pde != snapshot[i] )
+ l2_pgentry_t new_pde = guest2[i];
+ if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK) )
{
- need_flush |= validate_hl2e_change(d, new_pde, &shadow[i]);
+ need_flush |= validate_hl2e_change(d, new_pde, &shadow2[i]);
// can't update snapshots of linear page tables -- they
// are used multiple times...
@@ -2197,6 +2209,7 @@ static int resync_all(struct domain *d,
perfc_incrc(resync_hl2);
perfc_incr_histo(shm_hl2_updates, changed, PT_UPDATES);
break;
+ }
default:
BUG();
}
@@ -2234,15 +2247,16 @@ void __shadow_sync_all(struct domain *d)
if ( entry->writable_pl1e & (sizeof(l1_pgentry_t)-1) )
continue;
- unsigned long *ppte = map_domain_mem(entry->writable_pl1e);
- unsigned long opte = *ppte;
- unsigned long npte = opte & ~_PAGE_RW;
+ l1_pgentry_t *ppte = map_domain_mem(entry->writable_pl1e);
+ l1_pgentry_t opte = *ppte;
+ l1_pgentry_t npte = opte;
+ l1e_remove_flags(opte, _PAGE_RW);
- if ( (npte & _PAGE_PRESENT) &&
- !shadow_get_page_from_l1e(mk_l1_pgentry(npte), d) )
+ if ( (l1e_get_flags(npte) & _PAGE_PRESENT) &&
+ !shadow_get_page_from_l1e(npte, d) )
BUG();
*ppte = npte;
- put_page_from_l1e(mk_l1_pgentry(opte), d);
+ put_page_from_l1e(opte, d);
unmap_domain_mem(ppte);
}
@@ -2277,10 +2291,12 @@ void __shadow_sync_all(struct domain *d)
int shadow_fault(unsigned long va, struct xen_regs *regs)
{
- unsigned long gpte, spte = 0, orig_gpte;
+ l1_pgentry_t gpte, spte, orig_gpte;
struct exec_domain *ed = current;
struct domain *d = ed->domain;
- unsigned long gpde;
+ l2_pgentry_t gpde;
+
+ l1e_clear(spte);
SH_VVLOG("shadow_fault( va=%p, code=%lu )", va, regs->error_code );
perfc_incrc(shadow_fault_calls);
@@ -2303,7 +2319,7 @@ int shadow_fault(unsigned long va, struc
* STEP 2. Check the guest PTE.
*/
__guest_get_l2e(ed, va, &gpde);
- if ( unlikely(!(gpde & _PAGE_PRESENT)) )
+ if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_PRESENT)) )
{
SH_VVLOG("shadow_fault - EXIT: L1 not present" );
perfc_incrc(shadow_fault_bail_pde_not_present);
@@ -2314,8 +2330,8 @@ int shadow_fault(unsigned long va, struc
// the mapping is in-sync, so the check of the PDE's present bit, above,
// covers this access.
//
- orig_gpte = gpte = l1_pgentry_val(linear_pg_table[l1_linear_offset(va)]);
- if ( unlikely(!(gpte & _PAGE_PRESENT)) )
+ orig_gpte = gpte = linear_pg_table[l1_linear_offset(va)];
+ if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_PRESENT)) )
{
SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
perfc_incrc(shadow_fault_bail_pte_not_present);
@@ -2325,7 +2341,7 @@ int shadow_fault(unsigned long va, struc
/* Write fault? */
if ( regs->error_code & 2 )
{
- if ( unlikely(!(gpte & _PAGE_RW)) )
+ if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) )
{
/* Write fault on a read-only mapping. */
SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%lx)", gpte);
@@ -2357,8 +2373,8 @@ int shadow_fault(unsigned long va, struc
*/
/* XXX Watch out for read-only L2 entries! (not used in Linux). */
- if ( unlikely(__put_user(gpte, (unsigned long *)
- &linear_pg_table[l1_linear_offset(va)])) )
+ if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
+ &gpte, sizeof(gpte))) )
{
printk("shadow_fault() failed, crashing domain %d "
"due to a read-only L2 page table (gpde=%p), va=%p\n",
@@ -2367,8 +2383,9 @@ int shadow_fault(unsigned long va, struc
}
// if necessary, record the page table page as dirty
- if ( unlikely(shadow_mode_log_dirty(d)) && (orig_gpte != gpte) )
- mark_dirty(d, __gpfn_to_mfn(d, gpde >> PAGE_SHIFT));
+ if ( unlikely(shadow_mode_log_dirty(d)) &&
+ l1e_has_changed(orig_gpte, gpte, PAGE_FLAG_MASK))
+ mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gpde)));
shadow_set_l1e(va, spte, 1);
@@ -2490,11 +2507,11 @@ void __update_pagetables(struct exec_dom
BUG(); // ref counts for hl2mfn and smfn need to be maintained!
- mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
- mk_l2_pgentry((hl2mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+ l2e_init_pfn(mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)],
+ hl2mfn, __PAGE_HYPERVISOR);
- mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
- mk_l2_pgentry((smfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+ l2e_init_pfn(mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)],
+ smfn, __PAGE_HYPERVISOR);
// XXX - maybe this can be optimized somewhat??
local_flush_tlb();
@@ -2515,10 +2532,9 @@ char * sh_check_name;
int shadow_status_noswap;
#define v2m(adr) ({ \
- unsigned long _a = (unsigned long)(adr); \
- unsigned long _pte = l1_pgentry_val( \
- shadow_linear_pg_table[_a >> PAGE_SHIFT]); \
- unsigned long _pa = _pte & PAGE_MASK; \
+ unsigned long _a = (unsigned long)(adr); \
+ l1_pgentry_t _pte = shadow_linear_pg_table[_a >> PAGE_SHIFT]; \
+ unsigned long _pa = l1e_get_phys(_pte); \
_pa | (_a & ~PAGE_MASK); \
})
@@ -2536,49 +2552,55 @@ int shadow_status_noswap;
} while ( 0 )
static int check_pte(
- struct domain *d, unsigned long *pgpte, unsigned long *pspte,
+ struct domain *d, l1_pgentry_t *pgpte, l1_pgentry_t *pspte,
int level, int l2_idx, int l1_idx, int oos_ptes)
{
- unsigned gpte = *pgpte;
- unsigned spte = *pspte;
+ l1_pgentry_t gpte = *pgpte;
+ l1_pgentry_t spte = *pspte;
unsigned long mask, gpfn, smfn, gmfn;
int errors = 0;
int page_table_page;
- if ( (spte == 0) || (spte == 0xdeadface) || (spte == 0x00000E00) )
+ if ( (l1e_get_value(spte) == 0) ||
+ (l1e_get_value(spte) == 0xdeadface) ||
+ (l1e_get_value(spte) == 0x00000E00) )
return errors; /* always safe */
- if ( !(spte & _PAGE_PRESENT) )
+ if ( !(l1e_get_flags(spte) & _PAGE_PRESENT) )
FAIL("Non zero not present spte");
if ( level == 2 ) sh_l2_present++;
if ( level == 1 ) sh_l1_present++;
- if ( !(gpte & _PAGE_PRESENT) )
+ if ( !(l1e_get_flags(gpte) & _PAGE_PRESENT) )
FAIL("Guest not present yet shadow is");
- mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|PAGE_MASK);
+ mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW);
- if ( (spte & mask) != (gpte & mask) )
+ if ( l1e_has_changed(spte, gpte, mask) )
FAIL("Corrupt?");
if ( (level == 1) &&
- (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) && !oos_ptes )
+ (l1e_get_flags(spte) & _PAGE_DIRTY ) &&
+ !(l1e_get_flags(gpte) & _PAGE_DIRTY) && !oos_ptes )
FAIL("Dirty coherence");
- if ( (spte & _PAGE_ACCESSED ) && !(gpte & _PAGE_ACCESSED) && !oos_ptes )
+ if ( (l1e_get_flags(spte) & _PAGE_ACCESSED ) &&
+ !(l1e_get_flags(gpte) & _PAGE_ACCESSED) && !oos_ptes )
FAIL("Accessed coherence");
- smfn = spte >> PAGE_SHIFT;
- gpfn = gpte >> PAGE_SHIFT;
+ smfn = l1e_get_pfn(spte);
+ gpfn = l1e_get_pfn(gpte);
gmfn = __gpfn_to_mfn(d, gpfn);
if ( !VALID_MFN(gmfn) )
- FAIL("invalid gpfn=%p gpte=%p\n", __func__, gpfn, gpte);
+ FAIL("invalid gpfn=%p gpte=%p\n", __func__, gpfn,
+ l1e_get_value(gpte));
page_table_page = mfn_is_page_table(gmfn);
- if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) && !oos_ptes )
+ if ( (l1e_get_flags(spte) & _PAGE_RW ) &&
+ !(l1e_get_flags(gpte) & _PAGE_RW) && !oos_ptes )
{
printk("gpfn=%p gmfn=%p smfn=%p t=0x%08x page_table_page=%d "
"oos_ptes=%d\n",
@@ -2589,8 +2611,9 @@ static int check_pte(
}
if ( (level == 1) &&
- (spte & _PAGE_RW ) &&
- !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY)) &&
+ (l1e_get_flags(spte) & _PAGE_RW ) &&
+ !((l1e_get_flags(gpte) & _PAGE_RW) &&
+ (l1e_get_flags(gpte) & _PAGE_DIRTY)) &&
!oos_ptes )
{
printk("gpfn=%p gmfn=%p smfn=%p t=0x%08x page_table_page=%d "
@@ -2629,7 +2652,7 @@ static int check_l1_table(
unsigned long gmfn, unsigned long smfn, unsigned l2_idx)
{
int i;
- unsigned long *gpl1e, *spl1e;
+ l1_pgentry_t *gpl1e, *spl1e;
int errors = 0, oos_ptes = 0;
// First check to see if this guest page is currently the active
@@ -2670,6 +2693,7 @@ int check_l2_table(
{
l2_pgentry_t *gpl2e = (l2_pgentry_t *)map_domain_mem(gmfn << PAGE_SHIFT);
l2_pgentry_t *spl2e = (l2_pgentry_t *)map_domain_mem(smfn << PAGE_SHIFT);
+ l2_pgentry_t match;
int i;
int errors = 0;
int limit;
@@ -2701,25 +2725,26 @@ int check_l2_table(
FAILPT("hypervisor linear map inconsistent");
#endif
+ l2e_init_pfn(match, smfn, __PAGE_HYPERVISOR);
if ( !shadow_mode_external(d) &&
- (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
- L2_PAGETABLE_SHIFT]) !=
- ((smfn << PAGE_SHIFT) | __PAGE_HYPERVISOR)) )
+ l2e_has_changed(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT],
+ match, PAGE_FLAG_MASK))
{
FAILPT("hypervisor shadow linear map inconsistent %p %p",
- l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
- L2_PAGETABLE_SHIFT]),
- (smfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+ l2e_get_value(spl2e[SH_LINEAR_PT_VIRT_START >>
+ L2_PAGETABLE_SHIFT]),
+ l2e_get_value(match));
}
+ l2e_init_phys(match, __pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
if ( !shadow_mode_external(d) &&
- (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
- ((__pa(d->arch.mm_perdomain_pt) | __PAGE_HYPERVISOR))) )
+ l2e_has_changed(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT],
+ match, PAGE_FLAG_MASK))
{
FAILPT("hypervisor per-domain map inconsistent saw %p, expected (va=%p) %p",
- l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]),
+ l2e_get_value(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]),
d->arch.mm_perdomain_pt,
- (__pa(d->arch.mm_perdomain_pt) | __PAGE_HYPERVISOR));
+ l2e_get_value(match));
}
#ifdef __i386__
@@ -2733,7 +2758,10 @@ int check_l2_table(
/* Check the whole L2. */
for ( i = 0; i < limit; i++ )
- errors += check_pte(d, &l2_pgentry_val(gpl2e[i]), &l2_pgentry_val(spl2e[i]), 2, i, 0, 0);
+ errors += check_pte(d,
+ (l1_pgentry_t*)(&gpl2e[i]), /* Hmm, dirty ... */
+ (l1_pgentry_t*)(&spl2e[i]),
+ 2, i, 0, 0);
unmap_domain_mem(spl2e);
unmap_domain_mem(gpl2e);
@@ -2798,11 +2826,11 @@ int _check_pagetable(struct exec_domain
for ( i = 0; i < limit; i++ )
{
- unsigned long gl1pfn = l2_pgentry_val(gpl2e[i]) >> PAGE_SHIFT;
+ unsigned long gl1pfn = l2e_get_pfn(gpl2e[i]);
unsigned long gl1mfn = __gpfn_to_mfn(d, gl1pfn);
- unsigned long sl1mfn = l2_pgentry_val(spl2e[i]) >> PAGE_SHIFT;
+ unsigned long sl1mfn = l2e_get_pfn(spl2e[i]);
- if ( l2_pgentry_val(spl2e[i]) != 0 )
+ if ( l2e_get_value(spl2e[i]) != 0 ) /* FIXME: check flags? */
{
errors += check_l1_table(d, gl1pfn, gl1mfn, sl1mfn, i);
}
@@ -2897,3 +2925,4 @@ int _check_all_pagetables(struct exec_do
* indent-tabs-mode: nil
* End:
*/
+
Index: xen/include/asm-x86/shadow.h
===================================================================
--- xen.orig/include/asm-x86/shadow.h 2005-04-12 17:01:36.000000000 +0200
+++ xen/include/asm-x86/shadow.h 2005-04-12 17:33:01.000000000 +0200
@@ -48,7 +48,7 @@
#define shadow_linear_l2_table(_ed) ((_ed)->arch.shadow_vtable)
// easy access to the hl2 table (for translated but not external modes only)
-#define __linear_hl2_table ((l1_pgentry_t *)(LINEAR_PT_VIRT_START + \
+#define __linear_hl2_table ((l2_pgentry_t *)(LINEAR_PT_VIRT_START + \
(PERDOMAIN_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
#define shadow_lock_init(_d) spin_lock_init(&(_d)->arch.shadow_lock)
@@ -274,13 +274,13 @@ shadow_get_page_from_l1e(l1_pgentry_t l1
unsigned long mfn;
struct domain *owner;
- ASSERT( l1_pgentry_val(l1e) & _PAGE_PRESENT );
+ ASSERT(l1e_get_flags(l1e) & _PAGE_PRESENT);
- if ( unlikely(!res) && IS_PRIV(d) && !shadow_mode_translate(d) &&
- !(l1_pgentry_val(l1e) & L1_DISALLOW_MASK) &&
- (mfn = l1_pgentry_to_pfn(l1e)) &&
+ if (unlikely(!res) && IS_PRIV(d) && !shadow_mode_translate(d) &&
+ !(l1e_get_flags(l1e) & L1_DISALLOW_MASK) &&
+ (mfn = l1e_get_pfn(l1e)) &&
pfn_is_ram(mfn) &&
- (owner = page_get_owner(pfn_to_page(l1_pgentry_to_pfn(l1e)))) &&
+ (owner = page_get_owner(pfn_to_page(l1e_get_pfn(l1e)))) &&
(d != owner) )
{
res = get_page_from_l1e(l1e, owner);
@@ -292,7 +292,7 @@ shadow_get_page_from_l1e(l1_pgentry_t l1
if ( unlikely(!res) )
{
perfc_incrc(shadow_get_page_fail);
- FSH_LOG("%s failed to get ref l1e=%p\n", __func__, l1_pgentry_val(l1e));
+ FSH_LOG("%s failed to get ref l1e=%p\n", __func__, l1e_get_value(l1e));
}
return res;
@@ -302,34 +302,34 @@ shadow_get_page_from_l1e(l1_pgentry_t l1
static inline void
__shadow_get_l2e(
- struct exec_domain *ed, unsigned long va, unsigned long *psl2e)
+ struct exec_domain *ed, unsigned long va, l2_pgentry_t *psl2e)
{
ASSERT(shadow_mode_enabled(ed->domain));
- *psl2e = l2_pgentry_val( ed->arch.shadow_vtable[l2_table_offset(va)]);
+ *psl2e = ed->arch.shadow_vtable[l2_table_offset(va)];
}
static inline void
__shadow_set_l2e(
- struct exec_domain *ed, unsigned long va, unsigned long value)
+ struct exec_domain *ed, unsigned long va, l2_pgentry_t value)
{
ASSERT(shadow_mode_enabled(ed->domain));
- ed->arch.shadow_vtable[l2_table_offset(va)] = mk_l2_pgentry(value);
+ ed->arch.shadow_vtable[l2_table_offset(va)] = value;
}
static inline void
__guest_get_l2e(
- struct exec_domain *ed, unsigned long va, unsigned long *pl2e)
+ struct exec_domain *ed, unsigned long va, l2_pgentry_t *pl2e)
{
- *pl2e = l2_pgentry_val(ed->arch.guest_vtable[l2_table_offset(va)]);
+ *pl2e = ed->arch.guest_vtable[l2_table_offset(va)];
}
static inline void
__guest_set_l2e(
- struct exec_domain *ed, unsigned long va, unsigned long value)
+ struct exec_domain *ed, unsigned long va, l2_pgentry_t value)
{
- ed->arch.guest_vtable[l2_table_offset(va)] = mk_l2_pgentry(value);
+ ed->arch.guest_vtable[l2_table_offset(va)] = value;
if ( unlikely(shadow_mode_translate(ed->domain)) )
update_hl2e(ed, va);
@@ -339,36 +339,36 @@ static inline void
update_hl2e(struct exec_domain *ed, unsigned long va)
{
int index = l2_table_offset(va);
- unsigned long gl2e = l2_pgentry_val(ed->arch.guest_vtable[index]);
unsigned long mfn;
- unsigned long old_hl2e, new_hl2e;
+ l2_pgentry_t gl2e = ed->arch.guest_vtable[index];
+ l2_pgentry_t old_hl2e, new_hl2e;
int need_flush = 0;
ASSERT(shadow_mode_translate(ed->domain));
- old_hl2e = l1_pgentry_val(ed->arch.hl2_vtable[index]);
+ old_hl2e = ed->arch.hl2_vtable[index];
- if ( (gl2e & _PAGE_PRESENT) &&
- VALID_MFN(mfn = phys_to_machine_mapping(gl2e >> PAGE_SHIFT)) )
- new_hl2e = (mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR;
+ if ( (l2e_get_flags(gl2e) & _PAGE_PRESENT) &&
+ VALID_MFN(mfn = phys_to_machine_mapping(l2e_get_pfn(gl2e)) ))
+ l2e_init_pfn(new_hl2e, mfn, __PAGE_HYPERVISOR);
else
- new_hl2e = 0;
+ l2e_clear(new_hl2e);
// only do the ref counting if something important changed.
//
- if ( (old_hl2e ^ new_hl2e) & (PAGE_MASK | _PAGE_PRESENT) )
+ if ( (l2e_has_changed(old_hl2e, new_hl2e, _PAGE_PRESENT)) )
{
- if ( (new_hl2e & _PAGE_PRESENT) &&
- !get_page(pfn_to_page(new_hl2e >> PAGE_SHIFT), ed->domain) )
- new_hl2e = 0;
- if ( old_hl2e & _PAGE_PRESENT )
+ if ( (l2e_get_flags(new_hl2e) & _PAGE_PRESENT) &&
+ !get_page(pfn_to_page(l2e_get_pfn(new_hl2e)), ed->domain) )
+ l2e_clear(new_hl2e);
+ if ( l2e_get_flags(old_hl2e) & _PAGE_PRESENT )
{
- put_page(pfn_to_page(old_hl2e >> PAGE_SHIFT));
+ put_page(pfn_to_page(l2e_get_pfn(old_hl2e)));
need_flush = 1;
}
}
- ed->arch.hl2_vtable[l2_table_offset(va)] = mk_l1_pgentry(new_hl2e);
+ ed->arch.hl2_vtable[l2_table_offset(va)] = new_hl2e;
if ( need_flush )
{
@@ -594,13 +594,13 @@ extern void shadow_mark_va_out_of_sync(
unsigned long va);
static inline int l1pte_write_fault(
- struct exec_domain *ed, unsigned long *gpte_p, unsigned long *spte_p,
+ struct exec_domain *ed, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p,
unsigned long va)
{
struct domain *d = ed->domain;
- unsigned long gpte = *gpte_p;
- unsigned long spte;
- unsigned long gpfn = gpte >> PAGE_SHIFT;
+ l1_pgentry_t gpte = *gpte_p;
+ l1_pgentry_t spte;
+ unsigned long gpfn = l1e_get_pfn(gpte);
unsigned long gmfn = __gpfn_to_mfn(d, gpfn);
//printk("l1pte_write_fault gmfn=%p\n", gmfn);
@@ -608,15 +608,16 @@ static inline int l1pte_write_fault(
if ( unlikely(!VALID_MFN(gmfn)) )
{
SH_LOG("l1pte_write_fault: invalid gpfn=%p", gpfn);
- *spte_p = 0;
+ l1e_clear(*spte_p);
return 0;
}
- ASSERT(gpte & _PAGE_RW);
- gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
- spte = (gmfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
+ ASSERT(l1e_get_flags(gpte) & _PAGE_RW);
+ l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED);
+ l1e_init_pfn(spte, gmfn, l1e_get_flags(gpte));
- SH_VVLOG("l1pte_write_fault: updating spte=0x%p gpte=0x%p", spte, gpte);
+ SH_VVLOG("l1pte_write_fault: updating spte=0x%p gpte=0x%p",
+ l1e_get_value(spte), l1e_get_value(gpte));
if ( shadow_mode_log_dirty(d) )
__mark_dirty(d, gmfn);
@@ -631,30 +632,31 @@ static inline int l1pte_write_fault(
}
static inline int l1pte_read_fault(
- struct domain *d, unsigned long *gpte_p, unsigned long *spte_p)
+ struct domain *d, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p)
{
- unsigned long gpte = *gpte_p;
- unsigned long spte = *spte_p;
- unsigned long pfn = gpte >> PAGE_SHIFT;
+ l1_pgentry_t gpte = *gpte_p;
+ l1_pgentry_t spte = *spte_p;
+ unsigned long pfn = l1e_get_pfn(gpte);
unsigned long mfn = __gpfn_to_mfn(d, pfn);
if ( unlikely(!VALID_MFN(mfn)) )
{
SH_LOG("l1pte_read_fault: invalid gpfn=%p", pfn);
- *spte_p = 0;
+ l1e_clear(*spte_p);
return 0;
}
- gpte |= _PAGE_ACCESSED;
- spte = (mfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
+ l1e_add_flags(gpte, _PAGE_ACCESSED);
+ l1e_init_pfn(spte, mfn, l1e_get_flags(gpte));
- if ( shadow_mode_log_dirty(d) || !(gpte & _PAGE_DIRTY) ||
+ if ( shadow_mode_log_dirty(d) || !(l1e_get_flags(gpte) & _PAGE_DIRTY) ||
mfn_is_page_table(mfn) )
{
- spte &= ~_PAGE_RW;
+ l1e_remove_flags(spte, _PAGE_RW);
}
- SH_VVLOG("l1pte_read_fault: updating spte=0x%p gpte=0x%p", spte, gpte);
+ SH_VVLOG("l1pte_read_fault: updating spte=0x%p gpte=0x%p",
+ l1e_get_value(spte), l1e_get_value(gpte));
*gpte_p = gpte;
*spte_p = spte;
@@ -662,23 +664,24 @@ static inline int l1pte_read_fault(
}
static inline void l1pte_propagate_from_guest(
- struct domain *d, unsigned long gpte, unsigned long *spte_p)
+ struct domain *d, l1_pgentry_t gpte, l1_pgentry_t *spte_p)
{
- unsigned long mfn, spte;
+ unsigned long mfn;
+ l1_pgentry_t spte;
- spte = 0;
+ l1e_clear(spte);
- if ( ((gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
+ if ( ((l1e_get_flags(gpte) & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
(_PAGE_PRESENT|_PAGE_ACCESSED)) &&
- VALID_MFN(mfn = __gpfn_to_mfn(d, gpte >> PAGE_SHIFT)) )
+ VALID_MFN(mfn = __gpfn_to_mfn(d, l1e_get_pfn(gpte))) )
{
- spte = (mfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
+ l1e_init_pfn(spte, mfn, l1e_get_flags(gpte));
if ( shadow_mode_log_dirty(d) ||
- !(gpte & _PAGE_DIRTY) ||
+ !(l1e_get_flags(gpte) & _PAGE_DIRTY) ||
mfn_is_page_table(mfn) )
{
- spte &= ~_PAGE_RW;
+ l1e_remove_flags(spte, _PAGE_RW);
}
}
@@ -691,14 +694,15 @@ static inline void l1pte_propagate_from_
}
static inline void hl2e_propagate_from_guest(
- struct domain *d, unsigned long gpde, unsigned long *hl2e_p)
+ struct domain *d, l2_pgentry_t gpde, l2_pgentry_t *hl2e_p)
{
- unsigned long pfn = gpde >> PAGE_SHIFT;
- unsigned long mfn, hl2e;
-
- hl2e = 0;
+ unsigned long pfn = l2e_get_pfn(gpde);
+ unsigned long mfn;
+ l2_pgentry_t hl2e;
+
+ l2e_clear(hl2e);
- if ( gpde & _PAGE_PRESENT )
+ if ( l2e_get_flags(gpde) & _PAGE_PRESENT )
{
if ( unlikely((current->domain != d) && !shadow_mode_external(d)) )
{
@@ -713,30 +717,31 @@ static inline void hl2e_propagate_from_g
mfn = __gpfn_to_mfn(d, pfn);
if ( VALID_MFN(mfn) && (mfn < max_page) )
- hl2e = (mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR;
+ l2e_init_pfn(hl2e, mfn, __PAGE_HYPERVISOR);
}
- if ( hl2e || gpde )
- SH_VVLOG("%s: gpde=%p hl2e=%p", __func__, gpde, hl2e);
+ if ( l2e_get_value(hl2e) || l2e_get_value(gpde) )
+ SH_VVLOG("%s: gpde=%p hl2e=%p", __func__,
+ l2e_get_value(gpde), l2e_get_value(hl2e));
*hl2e_p = hl2e;
}
static inline void l2pde_general(
struct domain *d,
- unsigned long *gpde_p,
- unsigned long *spde_p,
+ l2_pgentry_t *gpde_p,
+ l2_pgentry_t *spde_p,
unsigned long sl1mfn)
{
- unsigned long gpde = *gpde_p;
- unsigned long spde;
+ l2_pgentry_t gpde = *gpde_p;
+ l2_pgentry_t spde;
- spde = 0;
- if ( (gpde & _PAGE_PRESENT) && (sl1mfn != 0) )
+ l2e_clear(spde);
+ if ( (l2e_get_flags(gpde) & _PAGE_PRESENT) && (sl1mfn != 0) )
{
- spde = (gpde & ~PAGE_MASK) | (sl1mfn << PAGE_SHIFT) |
- _PAGE_RW | _PAGE_ACCESSED;
- gpde |= _PAGE_ACCESSED; /* N.B. PDEs do not have a dirty bit. */
+ l2e_init_pfn(spde, sl1mfn,
+ l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED);
+ l2e_add_flags(gpde, _PAGE_ACCESSED); /* N.B. PDEs do not have a dirty bit. */
// XXX mafetter: Hmm...
// Shouldn't the dirty log be checked/updated here?
@@ -745,19 +750,21 @@ static inline void l2pde_general(
*gpde_p = gpde;
}
- if ( spde || gpde )
- SH_VVLOG("%s: gpde=%p, new spde=%p", __func__, gpde, spde);
+ if ( l2e_get_value(spde) || l2e_get_value(gpde) )
+ SH_VVLOG("%s: gpde=%p, new spde=%p", __func__,
+ l2e_get_value(gpde), l2e_get_value(spde));
*spde_p = spde;
}
static inline void l2pde_propagate_from_guest(
- struct domain *d, unsigned long *gpde_p, unsigned long *spde_p)
+ struct domain *d, l2_pgentry_t *gpde_p, l2_pgentry_t *spde_p)
{
- unsigned long gpde = *gpde_p, sl1mfn = 0;
+ l2_pgentry_t gpde = *gpde_p;
+ unsigned long sl1mfn = 0;
- if ( gpde & _PAGE_PRESENT )
- sl1mfn = __shadow_status(d, gpde >> PAGE_SHIFT, PGT_l1_shadow);
+ if ( l2e_get_flags(gpde) & _PAGE_PRESENT )
+ sl1mfn = __shadow_status(d, l2e_get_pfn(gpde), PGT_l1_shadow);
l2pde_general(d, gpde_p, spde_p, sl1mfn);
}
@@ -768,10 +775,10 @@ static inline void l2pde_propagate_from_
static int inline
validate_pte_change(
struct domain *d,
- unsigned long new_pte,
- unsigned long *shadow_pte_p)
+ l1_pgentry_t new_pte,
+ l1_pgentry_t *shadow_pte_p)
{
- unsigned long old_spte, new_spte;
+ l1_pgentry_t old_spte, new_spte;
perfc_incrc(validate_pte_calls);
@@ -784,16 +791,16 @@ validate_pte_change(
// only do the ref counting if something important changed.
//
- if ( ((old_spte | new_spte) & _PAGE_PRESENT ) &&
- ((old_spte ^ new_spte) & (PAGE_MASK | _PAGE_RW | _PAGE_PRESENT)) )
+ if ( ((l1e_get_value(old_spte) | l1e_get_value(new_spte)) & _PAGE_PRESENT ) &&
+ l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
{
perfc_incrc(validate_pte_changes);
- if ( (new_spte & _PAGE_PRESENT) &&
- !shadow_get_page_from_l1e(mk_l1_pgentry(new_spte), d) )
- new_spte = 0;
- if ( old_spte & _PAGE_PRESENT )
- put_page_from_l1e(mk_l1_pgentry(old_spte), d);
+ if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
+ !shadow_get_page_from_l1e(new_spte, d) )
+ l1e_clear(new_spte);
+ if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
+ put_page_from_l1e(old_spte, d);
}
*shadow_pte_p = new_spte;
@@ -807,10 +814,10 @@ validate_pte_change(
static int inline
validate_hl2e_change(
struct domain *d,
- unsigned long new_gpde,
- unsigned long *shadow_hl2e_p)
+ l2_pgentry_t new_gpde,
+ l2_pgentry_t *shadow_hl2e_p)
{
- unsigned long old_hl2e, new_hl2e;
+ l2_pgentry_t old_hl2e, new_hl2e;
perfc_incrc(validate_hl2e_calls);
@@ -819,16 +826,16 @@ validate_hl2e_change(
// Only do the ref counting if something important changed.
//
- if ( ((old_hl2e | new_hl2e) & _PAGE_PRESENT) &&
- ((old_hl2e ^ new_hl2e) & (PAGE_MASK | _PAGE_PRESENT)) )
+ if ( ((l2e_get_flags(old_hl2e) | l2e_get_flags(new_hl2e)) & _PAGE_PRESENT) &&
+ l2e_has_changed(old_hl2e, new_hl2e, _PAGE_PRESENT) )
{
perfc_incrc(validate_hl2e_changes);
- if ( (new_hl2e & _PAGE_PRESENT) &&
- !get_page(pfn_to_page(new_hl2e >> PAGE_SHIFT), d) )
- new_hl2e = 0;
- if ( old_hl2e & _PAGE_PRESENT )
- put_page(pfn_to_page(old_hl2e >> PAGE_SHIFT));
+ if ( (l2e_get_flags(new_hl2e) & _PAGE_PRESENT) &&
+ !get_page(pfn_to_page(l2e_get_pfn(new_hl2e)), d) )
+ l2e_clear(new_hl2e);
+ if ( l2e_get_flags(old_hl2e) & _PAGE_PRESENT )
+ put_page(pfn_to_page(l2e_get_pfn(old_hl2e)));
}
*shadow_hl2e_p = new_hl2e;
@@ -843,10 +850,10 @@ validate_hl2e_change(
static int inline
validate_pde_change(
struct domain *d,
- unsigned long new_gpde,
- unsigned long *shadow_pde_p)
+ l2_pgentry_t new_gpde,
+ l2_pgentry_t *shadow_pde_p)
{
- unsigned long old_spde, new_spde;
+ l2_pgentry_t old_spde, new_spde;
perfc_incrc(validate_pde_calls);
@@ -858,16 +865,16 @@ validate_pde_change(
// Only do the ref counting if something important changed.
//
- if ( ((old_spde | new_spde) & _PAGE_PRESENT) &&
- ((old_spde ^ new_spde) & (PAGE_MASK | _PAGE_PRESENT)) )
+ if ( ((l2e_get_value(old_spde) | l2e_get_value(new_spde)) & _PAGE_PRESENT) &&
+ l2e_has_changed(old_spde, new_spde, _PAGE_PRESENT) )
{
perfc_incrc(validate_pde_changes);
- if ( (new_spde & _PAGE_PRESENT) &&
- !get_shadow_ref(new_spde >> PAGE_SHIFT) )
+ if ( (l2e_get_flags(new_spde) & _PAGE_PRESENT) &&
+ !get_shadow_ref(l2e_get_pfn(new_spde)) )
BUG();
- if ( old_spde & _PAGE_PRESENT )
- put_shadow_ref(old_spde >> PAGE_SHIFT);
+ if ( l2e_get_flags(old_spde) & _PAGE_PRESENT )
+ put_shadow_ref(l2e_get_pfn(old_spde));
}
*shadow_pde_p = new_spde;
@@ -1377,19 +1384,20 @@ shadow_update_min_max(unsigned long smfn
extern void shadow_map_l1_into_current_l2(unsigned long va);
void static inline
-shadow_set_l1e(unsigned long va, unsigned long new_spte, int create_l1_shadow)
+shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow)
{
struct exec_domain *ed = current;
struct domain *d = ed->domain;
- unsigned long sl2e, old_spte;
+ l2_pgentry_t sl2e;
+ l1_pgentry_t old_spte;
#if 0
printk("shadow_set_l1e(va=%p, new_spte=%p, create=%d)\n",
- va, new_spte, create_l1_shadow);
+ va, l1e_get_value(new_spte), create_l1_shadow);
#endif
__shadow_get_l2e(ed, va, &sl2e);
- if ( !(sl2e & _PAGE_PRESENT) )
+ if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
{
/*
* Either the L1 is not shadowed, or the shadow isn't linked into
@@ -1402,12 +1410,11 @@ shadow_set_l1e(unsigned long va, unsigne
}
else /* check to see if it exists; if so, link it in */
{
- unsigned long gpde =
- l2_pgentry_val(linear_l2_table(ed)[l2_table_offset(va)]);
- unsigned long gl1pfn = gpde >> PAGE_SHIFT;
+ l2_pgentry_t gpde = linear_l2_table(ed)[l2_table_offset(va)];
+ unsigned long gl1pfn = l2e_get_pfn(gpde);
unsigned long sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
- ASSERT( gpde & _PAGE_PRESENT );
+ ASSERT( l2_get_flags(gpde) & _PAGE_PRESENT );
if ( sl1mfn )
{
@@ -1427,47 +1434,52 @@ shadow_set_l1e(unsigned long va, unsigne
}
}
- old_spte = l1_pgentry_val(shadow_linear_pg_table[l1_linear_offset(va)]);
+ old_spte = shadow_linear_pg_table[l1_linear_offset(va)];
// only do the ref counting if something important changed.
//
- if ( (old_spte ^ new_spte) & (PAGE_MASK | _PAGE_RW | _PAGE_PRESENT) )
+ if ( l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
{
- if ( (new_spte & _PAGE_PRESENT) &&
- !shadow_get_page_from_l1e(mk_l1_pgentry(new_spte), d) )
- new_spte = 0;
- if ( old_spte & _PAGE_PRESENT )
- put_page_from_l1e(mk_l1_pgentry(old_spte), d);
+ if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
+ !shadow_get_page_from_l1e(new_spte, d) )
+ l1e_clear(new_spte);
+ if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
+ put_page_from_l1e(old_spte, d);
}
- shadow_linear_pg_table[l1_linear_offset(va)] = mk_l1_pgentry(new_spte);
+ shadow_linear_pg_table[l1_linear_offset(va)] = new_spte;
- shadow_update_min_max(sl2e >> PAGE_SHIFT, l1_table_offset(va));
+ shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
}
/************************************************************************/
-static inline unsigned long gva_to_gpte(unsigned long gva)
+static inline l1_pgentry_t gva_to_gpte(unsigned long gva)
{
- unsigned long gpde, gpte;
+ l2_pgentry_t gpde;
+ l1_pgentry_t gpte;
struct exec_domain *ed = current;
ASSERT( shadow_mode_translate(current->domain) );
__guest_get_l2e(ed, gva, &gpde);
- if ( unlikely(!(gpde & _PAGE_PRESENT)) )
- return 0;
+ if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) ) {
+ l1e_clear(gpte);
+ return gpte;
+ }
// This is actually overkill - we only need to make sure the hl2
// is in-sync.
//
shadow_sync_va(ed, gva);
- if ( unlikely(__get_user(gpte, (unsigned long *)
- &linear_pg_table[gva >> PAGE_SHIFT])) )
+ if ( unlikely(__copy_from_user(&gpte,
+ &linear_pg_table[gva >> PAGE_SHIFT],
+ sizeof(gpte))) )
{
FSH_LOG("gva_to_gpte got a fault on gva=%p\n", gva);
- return 0;
+ l1e_clear(gpte);
+ return gpte;
}
return gpte;
@@ -1475,13 +1487,13 @@ static inline unsigned long gva_to_gpte(
static inline unsigned long gva_to_gpa(unsigned long gva)
{
- unsigned long gpte;
+ l1_pgentry_t gpte;
gpte = gva_to_gpte(gva);
- if ( !(gpte & _PAGE_PRESENT) )
+ if ( !(l1e_get_flags(gpte) & _PAGE_PRESENT) )
return 0;
- return (gpte & PAGE_MASK) + (gva & ~PAGE_MASK);
+ return l1e_get_phys(gpte) + (gva & ~PAGE_MASK);
}
/************************************************************************/
Index: xen/arch/x86/vmx.c
===================================================================
--- xen.orig/arch/x86/vmx.c 2005-04-12 17:01:36.000000000 +0200
+++ xen/arch/x86/vmx.c 2005-04-12 17:33:01.000000000 +0200
@@ -109,7 +109,8 @@ static int vmx_do_page_fault(unsigned lo
{
struct exec_domain *ed = current;
unsigned long eip;
- unsigned long gpte, gpa;
+ l1_pgentry_t gpte;
+ unsigned long gpa; /* FIXME: PAE */
int result;
#if VMX_DEBUG
@@ -132,9 +133,9 @@ static int vmx_do_page_fault(unsigned lo
}
gpte = gva_to_gpte(va);
- if (!(gpte & _PAGE_PRESENT) )
+ if (!(l1e_get_flags(gpte) & _PAGE_PRESENT) )
return 0;
- gpa = (gpte & PAGE_MASK) + (va & ~PAGE_MASK);
+ gpa = l1e_get_phys(gpte) + (va & ~PAGE_MASK);
/* Use 1:1 page table to identify MMIO address space */
if (mmio_space(gpa))
Index: xen/arch/x86/mm.c
===================================================================
--- xen.orig/arch/x86/mm.c 2005-04-12 17:01:38.000000000 +0200
+++ xen/arch/x86/mm.c 2005-04-12 19:38:13.000000000 +0200
@@ -212,9 +212,9 @@ void invalidate_shadow_ldt(struct exec_d
for ( i = 16; i < 32; i++ )
{
- pfn = l1_pgentry_to_pfn(d->arch.perdomain_ptes[i]);
+ pfn = l1e_get_pfn(d->arch.perdomain_ptes[i]);
if ( pfn == 0 ) continue;
- d->arch.perdomain_ptes[i] = mk_l1_pgentry(0);
+ l1e_clear(d->arch.perdomain_ptes[i]);
page = &frame_table[pfn];
ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page);
ASSERT_PAGE_IS_DOMAIN(page, d->domain);
@@ -251,7 +251,8 @@ int map_ldt_shadow_page(unsigned int off
{
struct exec_domain *ed = current;
struct domain *d = ed->domain;
- unsigned long l1e, nl1e, gpfn, gmfn;
+ unsigned long gpfn, gmfn;
+ l1_pgentry_t l1e, nl1e;
unsigned gva = ed->arch.ldt_base + (off << PAGE_SHIFT);
int res;
@@ -269,13 +270,14 @@ int map_ldt_shadow_page(unsigned int off
shadow_sync_va(ed, gva);
TOGGLE_MODE();
- __get_user(l1e, (unsigned long *)&linear_pg_table[l1_linear_offset(gva)]);
+ __copy_from_user(&l1e, &linear_pg_table[l1_linear_offset(gva)],
+ sizeof(l1e));
TOGGLE_MODE();
- if ( unlikely(!(l1e & _PAGE_PRESENT)) )
+ if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
return 0;
- gpfn = l1_pgentry_to_pfn(mk_l1_pgentry(l1e));
+ gpfn = l1e_get_pfn(l1e);
gmfn = __gpfn_to_mfn(d, gpfn);
if ( unlikely(!VALID_MFN(gmfn)) )
return 0;
@@ -293,9 +295,9 @@ int map_ldt_shadow_page(unsigned int off
if ( unlikely(!res) )
return 0;
- nl1e = (l1e & ~PAGE_MASK) | (gmfn << PAGE_SHIFT) | _PAGE_RW;
+ l1e_init_pfn(nl1e, gmfn, l1e_get_flags(l1e) | _PAGE_RW);
- ed->arch.perdomain_ptes[off + 16] = mk_l1_pgentry(nl1e);
+ ed->arch.perdomain_ptes[off + 16] = nl1e;
ed->arch.shadow_ldt_mapcnt++;
return 1;
@@ -366,13 +368,13 @@ get_linear_pagetable(
ASSERT( !shadow_mode_enabled(d) );
- if ( (root_pgentry_val(re) & _PAGE_RW) )
+ if ( (root_get_flags(re) & _PAGE_RW) )
{
MEM_LOG("Attempt to create linear p.t. with write perms");
return 0;
}
- if ( (pfn = root_pgentry_to_pfn(re)) != re_pfn )
+ if ( (pfn = root_get_pfn(re)) != re_pfn )
{
/* Make sure the mapped frame belongs to the correct domain. */
if ( unlikely(!get_page_from_pagenr(pfn, d)) )
@@ -405,17 +407,17 @@ int
get_page_from_l1e(
l1_pgentry_t l1e, struct domain *d)
{
- unsigned long l1v = l1_pgentry_val(l1e);
- unsigned long mfn = l1_pgentry_to_pfn(l1e);
+ unsigned long mfn = l1e_get_pfn(l1e);
struct pfn_info *page = &frame_table[mfn];
extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn);
- if ( !(l1v & _PAGE_PRESENT) )
+ if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
return 1;
- if ( unlikely(l1v & L1_DISALLOW_MASK) )
+ if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
{
- MEM_LOG("Bad L1 type settings %p %p", l1v, l1v & L1_DISALLOW_MASK);
+ MEM_LOG("Bad L1 type settings %p %p", l1e_get_value(l1e),
+ l1e_get_value(l1v) & L1_DISALLOW_MASK);
return 0;
}
@@ -435,7 +437,7 @@ get_page_from_l1e(
return 0;
}
- return ((l1v & _PAGE_RW) ?
+ return ((l1e_get_flags(l1e) & _PAGE_RW) ?
get_page_and_type(page, d, PGT_writable_page) :
get_page(page, d));
}
@@ -451,18 +453,18 @@ get_page_from_l2e(
ASSERT( !shadow_mode_enabled(d) );
- if ( !(l2_pgentry_val(l2e) & _PAGE_PRESENT) )
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
return 1;
- if ( unlikely((l2_pgentry_val(l2e) & L2_DISALLOW_MASK)) )
+ if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
{
MEM_LOG("Bad L2 page type settings %p",
- l2_pgentry_val(l2e) & L2_DISALLOW_MASK);
+ l2e_get_value(l2e) & L2_DISALLOW_MASK);
return 0;
}
rc = get_page_and_type_from_pagenr(
- l2_pgentry_to_pfn(l2e),
+ l2e_get_pfn(l2e),
PGT_l1_page_table | (va_idx<<PGT_va_shift), d);
#if defined(__i386__)
@@ -524,12 +526,11 @@ get_page_from_l4e(
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
{
- unsigned long l1v = l1_pgentry_val(l1e);
- unsigned long pfn = l1_pgentry_to_pfn(l1e);
+ unsigned long pfn = l1e_get_pfn(l1e);
struct pfn_info *page = &frame_table[pfn];
struct domain *e;
- if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(pfn) )
+ if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !pfn_is_ram(pfn) )
return;
e = page_get_owner(page);
@@ -546,12 +547,13 @@ void put_page_from_l1e(l1_pgentry_t l1e,
* mappings and which unmappings are counted via the grant entry, but
* really it doesn't matter as privileged domains have carte blanche.
*/
- if ( likely(gnttab_check_unmap(e, d, pfn, !(l1v & _PAGE_RW))) )
+ if (likely(gnttab_check_unmap(e, d, pfn,
+ !(l1e_get_flags(l1e) & _PAGE_RW))))
return;
/* Assume this mapping was made via MMUEXT_SET_FOREIGNDOM... */
}
- if ( l1v & _PAGE_RW )
+ if ( l1e_get_flags(l1e) & _PAGE_RW )
{
put_page_and_type(page);
}
@@ -575,9 +577,9 @@ void put_page_from_l1e(l1_pgentry_t l1e,
*/
static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
{
- if ( (l2_pgentry_val(l2e) & _PAGE_PRESENT) &&
- (l2_pgentry_to_pfn(l2e) != pfn) )
- put_page_and_type(&frame_table[l2_pgentry_to_pfn(l2e)]);
+ if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
+ (l2e_get_pfn(l2e) != pfn) )
+ put_page_and_type(&frame_table[l2e_get_pfn(l2e)]);
}
@@ -654,11 +656,11 @@ static int alloc_l2_table(struct pfn_inf
memcpy(&pl2e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
&idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
ROOT_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
- pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
- mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
- pl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
- mk_l2_pgentry(__pa(page_get_owner(page)->arch.mm_perdomain_pt) |
- __PAGE_HYPERVISOR);
+ l2e_init_pfn(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)],
+ pfn, __PAGE_HYPERVISOR);
+ l2e_init_phys(pl2e[l2_table_offset(PERDOMAIN_VIRT_START)],
+ __pa(page_get_owner(page)->arch.mm_perdomain_pt),
+ __PAGE_HYPERVISOR);
#endif
unmap_domain_mem(pl2e);
@@ -806,14 +808,15 @@ static inline int update_l1e(l1_pgentry_
l1_pgentry_t ol1e,
l1_pgentry_t nl1e)
{
- unsigned long o = l1_pgentry_val(ol1e);
- unsigned long n = l1_pgentry_val(nl1e);
+ /* FIXME: breaks with PAE */
+ unsigned long o = l1e_get_value(ol1e);
+ unsigned long n = l1e_get_value(nl1e);
if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
- unlikely(o != l1_pgentry_val(ol1e)) )
+ unlikely(o != l1e_get_value(ol1e)) )
{
MEM_LOG("Failed to update %p -> %p: saw %p",
- l1_pgentry_val(ol1e), l1_pgentry_val(nl1e), o);
+ l1e_get_value(ol1e), l1e_get_value(nl1e), o);
return 0;
}
@@ -825,27 +828,24 @@ static inline int update_l1e(l1_pgentry_
static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e)
{
l1_pgentry_t ol1e;
- unsigned long _ol1e;
struct domain *d = current->domain;
ASSERT( !shadow_mode_enabled(d) );
- if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
+ if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
return 0;
- ol1e = mk_l1_pgentry(_ol1e);
- if ( l1_pgentry_val(nl1e) & _PAGE_PRESENT )
+ if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
{
- if ( unlikely(l1_pgentry_val(nl1e) & L1_DISALLOW_MASK) )
+ if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
{
MEM_LOG("Bad L1 type settings %p",
- l1_pgentry_val(nl1e) & L1_DISALLOW_MASK);
+ l1e_get_value(nl1e) & L1_DISALLOW_MASK);
return 0;
}
/* Fast path for identical mapping, r/w and presence. */
- if ( ((l1_pgentry_val(ol1e) ^ l1_pgentry_val(nl1e)) &
- ((PADDR_MASK & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT)) == 0 )
+ if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT))
return update_l1e(pl1e, ol1e, nl1e);
if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
@@ -870,12 +870,12 @@ static int mod_l1_entry(l1_pgentry_t *pl
#define UPDATE_ENTRY(_t,_p,_o,_n) ({ \
unsigned long __o = cmpxchg((unsigned long *)(_p), \
- _t ## _pgentry_val(_o), \
- _t ## _pgentry_val(_n)); \
- if ( __o != _t ## _pgentry_val(_o) ) \
+ _t ## e_get_value(_o), \
+ _t ## e_get_value(_n)); \
+ if ( __o != _t ## e_get_value(_o) ) \
MEM_LOG("Failed to update %p -> %p: saw %p", \
- _t ## _pgentry_val(_o), _t ## _pgentry_val(_n), __o); \
- (__o == _t ## _pgentry_val(_o)); })
+ _t ## e_get_value(_o), _t ## e_get_value(_n), __o); \
+ (__o == _t ## e_get_value(_o)); })
/* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
@@ -884,7 +884,6 @@ static int mod_l2_entry(l2_pgentry_t *pl
unsigned long pfn)
{
l2_pgentry_t ol2e;
- unsigned long _ol2e;
if ( unlikely(!is_guest_l2_slot(pgentry_ptr_to_slot(pl2e))) )
{
@@ -892,22 +891,20 @@ static int mod_l2_entry(l2_pgentry_t *pl
return 0;
}
- if ( unlikely(__get_user(_ol2e, (unsigned long *)pl2e) != 0) )
+ if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
return 0;
- ol2e = mk_l2_pgentry(_ol2e);
- if ( l2_pgentry_val(nl2e) & _PAGE_PRESENT )
+ if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
{
- if ( unlikely(l2_pgentry_val(nl2e) & L2_DISALLOW_MASK) )
+ if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
{
MEM_LOG("Bad L2 type settings %p",
- l2_pgentry_val(nl2e) & L2_DISALLOW_MASK);
+ l2e_get_value(nl2e) & L2_DISALLOW_MASK);
return 0;
}
/* Fast path for identical mapping and presence. */
- if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) &
- ((PADDR_MASK & PAGE_MASK) | _PAGE_PRESENT)) == 0 )
+ if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e);
if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain,
@@ -1847,8 +1844,11 @@ int do_mmu_update(
if ( likely(get_page_type(
page, type_info & (PGT_type_mask|PGT_va_mask))) )
{
- okay = mod_l1_entry((l1_pgentry_t *)va,
- mk_l1_pgentry(req.val));
+ l1_pgentry_t pte;
+
+ /* FIXME: doesn't work with PAE */
+ l1e_init_phys(pte, req.val, req.val);
+ okay = mod_l1_entry((l1_pgentry_t *)va, pte);
put_page_type(page);
}
break;
@@ -1856,9 +1856,11 @@ int do_mmu_update(
ASSERT(!shadow_mode_enabled(d));
if ( likely(get_page_type(page, PGT_l2_page_table)) )
{
- okay = mod_l2_entry((l2_pgentry_t *)va,
- mk_l2_pgentry(req.val),
- mfn);
+ l2_pgentry_t l2e;
+
+ /* FIXME: doesn't work with PAE */
+ l2e_init_phys(l2e, req.val, req.val);
+ okay = mod_l2_entry((l2_pgentry_t *)va, l2e, mfn);
put_page_type(page);
}
break;
@@ -1991,12 +1993,12 @@ int do_mmu_update(
* and is running in a shadow mode
*/
int update_shadow_va_mapping(unsigned long va,
- unsigned long val,
+ l1_pgentry_t val,
struct exec_domain *ed,
struct domain *d)
{
unsigned long l1mfn;
- unsigned long spte;
+ l1_pgentry_t spte;
int rc = 0;
check_pagetable(ed, "pre-va"); /* debug */
@@ -2022,8 +2024,7 @@ int update_shadow_va_mapping(unsigned lo
* to teach it about this boundary case.
* So we flush this L1 page, if it's out of sync.
*/
- l1mfn = (l2_pgentry_val(linear_l2_table(ed)[l2_table_offset(va)]) >>
- PAGE_SHIFT);
+ l1mfn = l2e_get_pfn(linear_l2_table(ed)[l2_table_offset(va)]);
if ( mfn_out_of_sync(l1mfn) )
{
perfc_incrc(extra_va_update_sync);
@@ -2031,8 +2032,8 @@ int update_shadow_va_mapping(unsigned lo
}
#endif /* keep check_pagetables() happy */
- if ( unlikely(__put_user(val, &l1_pgentry_val(
- linear_pg_table[l1_linear_offset(va)]))) )
+ if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
+ &val, sizeof(val))))
{
rc = -EINVAL;
goto out;
@@ -2059,7 +2060,7 @@ int update_shadow_va_mapping(unsigned lo
}
int update_grant_va_mapping(unsigned long va,
- unsigned long _nl1e,
+ l1_pgentry_t _nl1e,
struct domain *d,
struct exec_domain *ed)
{
@@ -2072,22 +2073,20 @@ int update_grant_va_mapping(unsigned lon
int rc = 0;
l1_pgentry_t *pl1e;
- unsigned long _ol1e;
-
+ l1_pgentry_t ol1e;
+
cleanup_writable_pagetable(d);
pl1e = &linear_pg_table[l1_linear_offset(va)];
- if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
+ if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
rc = -EINVAL;
else
{
- l1_pgentry_t ol1e = mk_l1_pgentry(_ol1e);
-
- if ( update_l1e(pl1e, ol1e, mk_l1_pgentry(_nl1e)) )
+ if ( update_l1e(pl1e, ol1e, _nl1e) )
{
put_page_from_l1e(ol1e, d);
- if ( _ol1e & _PAGE_PRESENT )
+ if ( l1e_get_flags(ol1e) & _PAGE_PRESENT )
rc = 0; /* Caller needs to invalidate TLB entry */
else
rc = 1; /* Caller need not invalidate TLB entry */
@@ -2104,7 +2103,7 @@ int update_grant_va_mapping(unsigned lon
int do_update_va_mapping(unsigned long va,
- unsigned long val,
+ l1_pgentry_t val,
unsigned long flags)
{
struct exec_domain *ed = current;
@@ -2138,7 +2137,7 @@ int do_update_va_mapping(unsigned long v
rc = update_shadow_va_mapping(va, val, ed, d);
}
else if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
- mk_l1_pgentry(val))) )
+ val)) )
rc = -EINVAL;
switch ( flags & UVMF_FLUSHTYPE_MASK )
@@ -2195,7 +2194,7 @@ int do_update_va_mapping(unsigned long v
}
int do_update_va_mapping_otherdomain(unsigned long va,
- unsigned long val,
+ l1_pgentry_t val,
unsigned long flags,
domid_t domid)
{
@@ -2231,9 +2230,9 @@ void destroy_gdt(struct exec_domain *ed)
for ( i = 0; i < 16; i++ )
{
- if ( (pfn = l1_pgentry_to_pfn(ed->arch.perdomain_ptes[i])) != 0 )
+ if ( (pfn = l1e_get_pfn(ed->arch.perdomain_ptes[i])) != 0 )
put_page_and_type(&frame_table[pfn]);
- ed->arch.perdomain_ptes[i] = mk_l1_pgentry(0);
+ l1e_clear(ed->arch.perdomain_ptes[i]);
}
}
@@ -2289,8 +2288,8 @@ long set_gdt(struct exec_domain *ed,
/* Install the new GDT. */
for ( i = 0; i < nr_pages; i++ )
- ed->arch.perdomain_ptes[i] =
- mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+ l1e_init_pfn(ed->arch.perdomain_ptes[i], frames[i],
+ __PAGE_HYPERVISOR);
SET_GDT_ADDRESS(ed, GDT_VIRT_START(ed));
SET_GDT_ENTRIES(ed, entries);
@@ -2366,7 +2365,7 @@ long do_update_descriptor(unsigned long
case PGT_gdt_page:
/* Disallow updates of Xen-reserved descriptors in the current GDT. */
for_each_exec_domain(dom, ed) {
- if ( (l1_pgentry_to_pfn(ed->arch.perdomain_ptes[0]) == mfn) &&
+ if ( (l1e_get_pfn(ed->arch.perdomain_ptes[0]) == mfn) &&
(((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
(((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
goto out;
@@ -2489,7 +2488,7 @@ void ptwr_flush(struct domain *d, const
ol1e = d->arch.ptwr[which].page[i];
nl1e = pl1e[i];
- if ( likely(l1_pgentry_val(ol1e) == l1_pgentry_val(nl1e)) )
+ if ( likely(l1e_get_value(ol1e) == l1e_get_value(nl1e)) )
continue;
/* Update number of entries modified. */
@@ -2499,10 +2498,10 @@ void ptwr_flush(struct domain *d, const
* Fast path for PTEs that have merely been write-protected
* (e.g., during a Unix fork()). A strict reduction in privilege.
*/
- if ( likely(l1_pgentry_val(ol1e) == (l1_pgentry_val(nl1e)|_PAGE_RW)) )
+ if ( likely(l1e_get_value(ol1e) == (l1e_get_value(nl1e)|_PAGE_RW)) )
{
- if ( likely(l1_pgentry_val(nl1e) & _PAGE_PRESENT) )
- put_page_type(&frame_table[l1_pgentry_to_pfn(nl1e)]);
+ if ( likely(l1e_get_flags(nl1e) & _PAGE_PRESENT) )
+ put_page_type(&frame_table[l1e_get_pfn(nl1e)]);
continue;
}
@@ -2533,7 +2532,7 @@ void ptwr_flush(struct domain *d, const
if ( which == PTWR_PT_ACTIVE )
{
pl2e = &__linear_l2_table[d->arch.ptwr[which].l2_idx];
- *pl2e = mk_l2_pgentry(l2_pgentry_val(*pl2e) | _PAGE_PRESENT);
+ l2e_add_flags(*pl2e, _PAGE_PRESENT);
}
/*
@@ -2550,9 +2549,9 @@ static int ptwr_emulated_update(
unsigned int bytes,
unsigned int do_cmpxchg)
{
- unsigned long pte, pfn;
+ unsigned long pfn;
struct pfn_info *page;
- l1_pgentry_t ol1e, nl1e, *pl1e;
+ l1_pgentry_t pte, ol1e, nl1e, *pl1e;
struct domain *d = current->domain;
/* Aligned access only, thank you. */
@@ -2564,6 +2563,7 @@ static int ptwr_emulated_update(
}
/* Turn a sub-word access into a full-word access. */
+ /* FIXME: needs tweaks for PAE */
if ( (addr & ((BITS_PER_LONG/8)-1)) != 0 )
{
int rc;
@@ -2582,18 +2582,18 @@ static int ptwr_emulated_update(
}
/* Read the PTE that maps the page being updated. */
- if ( __get_user(pte, (unsigned long *)
- &linear_pg_table[l1_linear_offset(addr)]) )
+ if (__copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
+ sizeof(pte)))
{
MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table\n");
return X86EMUL_UNHANDLEABLE;
}
- pfn = pte >> PAGE_SHIFT;
+ pfn = l1e_get_pfn(pte);
page = &frame_table[pfn];
/* We are looking only for read-only mappings of p.t. pages. */
- if ( ((pte & (_PAGE_RW | _PAGE_PRESENT)) != _PAGE_PRESENT) ||
+ if ( ((l1e_get_flags(pte) & (_PAGE_RW | _PAGE_PRESENT)) != _PAGE_PRESENT) ||
((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
(page_get_owner(page) != d) )
{
@@ -2603,7 +2603,7 @@ static int ptwr_emulated_update(
}
/* Check the new PTE. */
- nl1e = mk_l1_pgentry(val);
+ l1e_init_phys(nl1e, val, val & ~PAGE_MASK);
if ( unlikely(!get_page_from_l1e(nl1e, d)) )
return X86EMUL_UNHANDLEABLE;
@@ -2611,7 +2611,7 @@ static int ptwr_emulated_update(
pl1e = map_domain_mem(page_to_phys(page) + (addr & ~PAGE_MASK));
if ( do_cmpxchg )
{
- ol1e = mk_l1_pgentry(old);
+ l1e_init_phys(ol1e, old, old & ~PAGE_MASK);
if ( cmpxchg((unsigned long *)pl1e, old, val) != old )
{
unmap_domain_mem(pl1e);
@@ -2636,8 +2636,7 @@ static int ptwr_emulated_update(
{
sl1e = map_domain_mem(
((sstat & PSH_pfn_mask) << PAGE_SHIFT) + (addr & ~PAGE_MASK));
- l1pte_propagate_from_guest(
- d, &l1_pgentry_val(nl1e), &l1_pgentry_val(*sl1e));
+ l1pte_propagate_from_guest(d, &nl1e, sl1e);
unmap_domain_mem(sl1e);
}
#endif
@@ -2677,8 +2676,9 @@ static struct x86_mem_emulator ptwr_mem_
/* Write page fault handler: check if guest is trying to modify a PTE. */
int ptwr_do_page_fault(struct domain *d, unsigned long addr)
{
- unsigned long pte, pfn, l2e;
+ unsigned long pfn;
struct pfn_info *page;
+ l1_pgentry_t pte;
l2_pgentry_t *pl2e;
int which;
u32 l2_idx;
@@ -2690,19 +2690,19 @@ int ptwr_do_page_fault(struct domain *d,
* Attempt to read the PTE that maps the VA being accessed. By checking for
* PDE validity in the L2 we avoid many expensive fixups in __get_user().
*/
- if ( !(l2_pgentry_val(__linear_l2_table[addr>>L2_PAGETABLE_SHIFT]) &
+ if ( !(l2e_get_flags(__linear_l2_table[addr>>L2_PAGETABLE_SHIFT]) &
_PAGE_PRESENT) ||
- __get_user(pte, (unsigned long *)
- &linear_pg_table[l1_linear_offset(addr)]) )
+ __copy_from_user(&pte,&linear_pg_table[l1_linear_offset(addr)],
+ sizeof(pte)) )
{
return 0;
}
- pfn = pte >> PAGE_SHIFT;
+ pfn = l1e_get_pfn(pte);
page = &frame_table[pfn];
/* We are looking only for read-only mappings of p.t. pages. */
- if ( ((pte & (_PAGE_RW | _PAGE_PRESENT)) != _PAGE_PRESENT) ||
+ if ( ((l1e_get_flags(pte) & (_PAGE_RW | _PAGE_PRESENT)) != _PAGE_PRESENT) ||
((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
(page_get_owner(page) != d) )
{
@@ -2732,9 +2732,8 @@ int ptwr_do_page_fault(struct domain *d,
* an ACTIVE p.t., otherwise it is INACTIVE.
*/
pl2e = &__linear_l2_table[l2_idx];
- l2e = l2_pgentry_val(*pl2e);
which = PTWR_PT_INACTIVE;
- if ( (l2e >> PAGE_SHIFT) == pfn )
+ if ( (l2e_get_pfn(*pl2e)) == pfn )
{
/*
* Check the PRESENT bit to set ACTIVE mode.
@@ -2742,7 +2741,7 @@ int ptwr_do_page_fault(struct domain *d,
* ACTIVE p.t. (it may be the same p.t. mapped at another virt addr).
* The ptwr_flush call below will restore the PRESENT bit.
*/
- if ( likely(l2e & _PAGE_PRESENT) ||
+ if ( likely(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ||
(d->arch.ptwr[PTWR_PT_ACTIVE].l1va &&
(l2_idx == d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx)) )
which = PTWR_PT_ACTIVE;
@@ -2772,7 +2771,7 @@ int ptwr_do_page_fault(struct domain *d,
/* For safety, disconnect the L1 p.t. page from current space. */
if ( which == PTWR_PT_ACTIVE )
{
- *pl2e = mk_l2_pgentry(l2e & ~_PAGE_PRESENT);
+ l2e_remove_flags(*pl2e, _PAGE_PRESENT);
local_flush_tlb(); /* XXX Multi-CPU guests? */
}
@@ -2783,11 +2782,11 @@ int ptwr_do_page_fault(struct domain *d,
L1_PAGETABLE_ENTRIES * sizeof(l1_pgentry_t));
/* Finally, make the p.t. page writable by the guest OS. */
- pte |= _PAGE_RW;
+ l1e_add_flags(pte, _PAGE_RW);
PTWR_PRINTK("[%c] update %p pte to %p\n", PTWR_PRINT_WHICH,
&linear_pg_table[addr>>PAGE_SHIFT], pte);
- if ( unlikely(__put_user(pte, (unsigned long *)
- &linear_pg_table[addr>>PAGE_SHIFT])) )
+ if ( unlikely(__copy_to_user(&linear_pg_table[addr>>PAGE_SHIFT],
+ &pte, sizeof(pte))) )
{
MEM_LOG("ptwr: Could not update pte at %p", (unsigned long *)
&linear_pg_table[addr>>PAGE_SHIFT]);
Index: xen/arch/x86/vmx_platform.c
===================================================================
--- xen.orig/arch/x86/vmx_platform.c 2005-04-12 17:01:40.000000000 +0200
+++ xen/arch/x86/vmx_platform.c 2005-04-12 17:33:01.000000000 +0200
@@ -408,7 +408,7 @@ static int vmx_decode(const unsigned cha
static int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int inst_len)
{
- unsigned long gpte;
+ l1_pgentry_t gpte;
unsigned long mfn;
unsigned long ma;
unsigned char * inst_start;
@@ -419,7 +419,7 @@ static int inst_copy_from_guest(unsigned
if ((guest_eip & PAGE_MASK) == ((guest_eip + inst_len) & PAGE_MASK)) {
gpte = gva_to_gpte(guest_eip);
- mfn = phys_to_machine_mapping(gpte >> PAGE_SHIFT);
+ mfn = phys_to_machine_mapping(l1e_get_pfn(gpte));
ma = (mfn << PAGE_SHIFT) | (guest_eip & (PAGE_SIZE - 1));
inst_start = (unsigned char *)map_domain_mem(ma);
Index: xen/include/asm-x86/x86_32/page.h
===================================================================
--- xen.orig/include/asm-x86/x86_32/page.h 2005-04-12 17:01:37.000000000 +0200
+++ xen/include/asm-x86/x86_32/page.h 2005-04-12 19:37:40.000000000 +0200
@@ -21,34 +21,48 @@
#ifndef __ASSEMBLY__
#include <xen/config.h>
-typedef struct { unsigned long l1_lo; } l1_pgentry_t;
-typedef struct { unsigned long l2_lo; } l2_pgentry_t;
+#include <asm/types.h>
+typedef struct { u32 l1_lo; } l1_pgentry_t;
+typedef struct { u32 l2_lo; } l2_pgentry_t;
typedef l2_pgentry_t root_pgentry_t;
#endif /* !__ASSEMBLY__ */
-/* Strip type from a table entry. */
-#define l1_pgentry_val(_x) ((_x).l1_lo)
-#define l2_pgentry_val(_x) ((_x).l2_lo)
-#define root_pgentry_val(_x) (l2_pgentry_val(_x))
+/* read access (depricated) */
+#define l1e_get_value(_x) ((_x).l1_lo)
+#define l2e_get_value(_x) ((_x).l2_lo)
-/* Add type to a table entry. */
-#define mk_l1_pgentry(_x) ( (l1_pgentry_t) { (_x) } )
-#define mk_l2_pgentry(_x) ( (l2_pgentry_t) { (_x) } )
-#define mk_root_pgentry(_x) (mk_l2_pgentry(_x))
+/* read access */
+#define l1e_get_pfn(_x) ((_x).l1_lo >> PAGE_SHIFT)
+#define l1e_get_phys(_x) ((_x).l1_lo & PAGE_MASK)
+#define l1e_get_flags(_x) ((_x).l1_lo & ~PAGE_MASK)
-/* Turn a typed table entry into a physical address. */
-#define l1_pgentry_to_phys(_x) (l1_pgentry_val(_x) & PAGE_MASK)
-#define l2_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PAGE_MASK)
-#define root_pgentry_to_phys(_x) (l2_pgentry_to_phys(_x))
+#define l2e_get_pfn(_x) ((_x).l2_lo >> PAGE_SHIFT)
+#define l2e_get_phys(_x) ((_x).l2_lo & PAGE_MASK)
+#define l2e_get_flags(_x) ((_x).l2_lo & ~PAGE_MASK)
-/* Turn a typed table entry into a page index. */
-#define l1_pgentry_to_pfn(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT)
-#define l2_pgentry_to_pfn(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT)
-#define root_pgentry_to_pfn(_x) (l2_pgentry_to_pfn(_x))
+#define root_get_pfn l2e_get_pfn
+#define root_get_flags l2e_get_flags
+
+/* write access */
+#define l1e_clear(_x) do { (_x).l1_lo = 0; } while(0)
+#define l1e_init_pfn(_x,_p,_f) do { (_x).l1_lo = (_p << PAGE_SHIFT) | ((u32)_f); } while(0)
+#define l1e_init_phys(_x,_p,_f) do { (_x).l1_lo = (_p & PAGE_MASK) | ((u32)_f); } while(0)
+#define l1e_add_flags(_x, _f) do { (_x).l1_lo |= ((u32)_f); } while(0)
+#define l1e_remove_flags(_x, _f) do { (_x).l1_lo &= ~((u32)_f); } while(0)
+
+#define l2e_clear(_x) do { (_x).l2_lo = 0; } while(0)
+#define l2e_init_pfn(_x,_p,_f) do { (_x).l2_lo = (_p << PAGE_SHIFT) | ((u32)_f); } while(0)
+#define l2e_init_phys(_x,_p,_f) do { (_x).l2_lo = (_p & PAGE_MASK) | ((u32)_f); } while(0)
+#define l2e_add_flags(_x, _f) do { (_x).l2_lo |= ((u32)_f); } while(0)
+#define l2e_remove_flags(_x, _f) do { (_x).l2_lo &= ~((u32)_f); } while(0)
+
+/* check entries */
+#define l1e_has_changed(_x1,_x2,_f) ((((_x1).l1_lo ^ (_x2).l1_lo) & (PAGE_MASK | (_f))) != 0)
+#define l2e_has_changed(_x1,_x2,_f) ((((_x1).l2_lo ^ (_x2).l2_lo) & (PAGE_MASK | (_f))) != 0)
/* Pagetable walking. */
-#define l2_pgentry_to_l1(_x) \
- ((l1_pgentry_t *)__va(l2_pgentry_to_phys(_x)))
+#define l2e_to_l1e(_x) \
+ ((l1_pgentry_t *)__va(l2e_get_phys(_x)))
/* Given a virtual address, get an entry offset into a page table. */
#define l1_table_offset(_a) \
@@ -65,6 +79,7 @@ typedef l2_pgentry_t root_pgentry_t;
#define PGT_root_page_table PGT_l2_page_table
#define _PAGE_NX 0UL
+#define PAGE_FLAG_MASK 0xfff
#define L1_DISALLOW_MASK (3UL << 7)
#define L2_DISALLOW_MASK (7UL << 7)
Index: xen/include/asm-x86/mm.h
===================================================================
--- xen.orig/include/asm-x86/mm.h 2005-04-12 17:01:37.000000000 +0200
+++ xen/include/asm-x86/mm.h 2005-04-12 17:33:01.000000000 +0200
@@ -263,13 +263,14 @@ static inline unsigned long phys_to_mach
unsigned long mfn;
l1_pgentry_t pte;
- if ( !__get_user(l1_pgentry_val(pte), (__phys_to_machine_mapping + pfn)) &&
- (l1_pgentry_val(pte) & _PAGE_PRESENT) )
- mfn = l1_pgentry_to_phys(pte) >> PAGE_SHIFT;
- else
- mfn = INVALID_MFN;
-
- return mfn;
+ if (!__copy_from_user(&pte, (__phys_to_machine_mapping + pfn),
+ sizeof(pte))
+ && (l1e_get_flags(pte) & _PAGE_PRESENT) )
+ mfn = l1e_get_pfn(pte);
+ else
+ mfn = INVALID_MFN;
+
+ return mfn;
}
#define set_machinetophys(_mfn, _pfn) machine_to_phys_mapping[(_mfn)] = (_pfn)
@@ -352,7 +353,7 @@ void propagate_page_fault(unsigned long
* hold a reference to the page.
*/
int update_grant_va_mapping(unsigned long va,
- unsigned long val,
+ l1_pgentry_t _nl1e,
struct domain *d,
struct exec_domain *ed);
#endif /* __ASM_X86_MM_H__ */
Index: xen/arch/x86/domain_build.c
===================================================================
--- xen.orig/arch/x86/domain_build.c 2005-04-12 17:01:37.000000000 +0200
+++ xen/arch/x86/domain_build.c 2005-04-12 17:33:01.000000000 +0200
@@ -243,10 +243,10 @@ int construct_dom0(struct domain *d,
/* WARNING: The new domain must have its 'processor' field filled in! */
l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
- l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
- l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(d->arch.mm_perdomain_pt) | __PAGE_HYPERVISOR);
+ l2e_init_phys(l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT],
+ (unsigned long)l2start, __PAGE_HYPERVISOR);
+ l2e_init_phys(l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT],
+ __pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
ed->arch.guest_table = mk_pagetable((unsigned long)l2start);
l2tab += l2_table_offset(dsi.v_start);
@@ -257,12 +257,14 @@ int construct_dom0(struct domain *d,
{
l1start = l1tab = (l1_pgentry_t *)mpt_alloc;
mpt_alloc += PAGE_SIZE;
- *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT);
+ l2e_init_phys(*l2tab, (unsigned long)l1start, L2_PROT);
+ l2tab++;
clear_page(l1tab);
if ( count == 0 )
l1tab += l1_table_offset(dsi.v_start);
}
- *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
+ l1e_init_pfn(*l1tab, mfn, L1_PROT);
+ l1tab++;
page = &frame_table[mfn];
if ( !get_page_and_type(page, d, PGT_writable_page) )
@@ -273,13 +275,13 @@ int construct_dom0(struct domain *d,
/* Pages that are part of page tables must be read only. */
l2tab = l2start + l2_table_offset(vpt_start);
- l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
+ l1start = l1tab = (l1_pgentry_t *)l2e_get_phys(*l2tab);
l1tab += l1_table_offset(vpt_start);
for ( count = 0; count < nr_pt_pages; count++ )
{
- page = &frame_table[l1_pgentry_to_pfn(*l1tab)];
+ page = &frame_table[l1e_get_pfn(*l1tab)];
if ( !opt_dom0_shadow )
- *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
+ l1e_remove_flags(*l1tab, _PAGE_RW);
else
if ( !get_page_type(page, PGT_writable_page) )
BUG();
@@ -317,7 +319,7 @@ int construct_dom0(struct domain *d,
get_page(page, d); /* an extra ref because of readable mapping */
}
if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
- l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*++l2tab);
+ l1start = l1tab = (l1_pgentry_t *)l2e_get_phys(*++l2tab);
}
#elif defined(__x86_64__)
@@ -525,8 +527,8 @@ int construct_dom0(struct domain *d,
#if defined(__i386__)
/* Destroy low mappings - they were only for our convenience. */
for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE )
- l2start[i] = mk_l2_pgentry(0);
+ if ( l2e_get_flags(l2start[i]) & _PAGE_PSE )
+ l2e_clear(l2start[i]);
zap_low_mappings(); /* Do the same for the idle page tables. */
#endif
@@ -544,10 +546,18 @@ int construct_dom0(struct domain *d,
: SHM_enable));
if ( opt_dom0_translate )
{
+#if 1
+ printk("FIXME: %s:%d\n",__FUNCTION__,__LINE__);
+ for ( ; ; )
+ __asm__ __volatile__ ( "hlt" );
+#else
+ /* Hmm, what does this?
+ Looks like isn't portable across 32/64 bit and pae/non-pae ...
+ -- kraxel */
// map this domain's p2m table into current page table,
// so that we can easily access it.
//
- ASSERT( root_pgentry_val(idle_pg_table[1]) == 0 );
+ ASSERT( root_get_value(idle_pg_table[1]) == 0 );
ASSERT( pagetable_val(d->arch.phys_table) );
idle_pg_table[1] = mk_root_pgentry(
pagetable_val(d->arch.phys_table) | __PAGE_HYPERVISOR);
@@ -556,6 +566,7 @@ int construct_dom0(struct domain *d,
>> PAGE_SHIFT);
idle_pg_table[1] = mk_root_pgentry(0);
local_flush_tlb();
+#endif
}
update_pagetables(ed); /* XXX SMP */
Index: xen/arch/x86/dom0_ops.c
===================================================================
--- xen.orig/arch/x86/dom0_ops.c 2005-04-12 17:01:37.000000000 +0200
+++ xen/arch/x86/dom0_ops.c 2005-04-12 17:33:01.000000000 +0200
@@ -425,7 +425,7 @@ void arch_getdomaininfo_ctxt(
{
for ( i = 0; i < 16; i++ )
c->gdt_frames[i] =
- l1_pgentry_to_pfn(ed->arch.perdomain_ptes[i]);
+ l1e_get_pfn(ed->arch.perdomain_ptes[i]);
c->gdt_ents = GET_GDT_ENTRIES(ed);
}
c->kernel_ss = ed->arch.kernel_ss;
Index: xen/include/asm-x86/page.h
===================================================================
--- xen.orig/include/asm-x86/page.h 2005-04-12 17:01:37.000000000 +0200
+++ xen/include/asm-x86/page.h 2005-04-12 17:33:01.000000000 +0200
@@ -77,7 +77,7 @@ typedef struct { unsigned long pt_lo; }
#define linear_l4_table(_ed) ((_ed)->arch.guest_vl4table)
#define va_to_l1mfn(_ed, _va) \
- (l2_pgentry_val(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]) >> PAGE_SHIFT)
+ (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]))
extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
Index: xen/arch/x86/x86_32/traps.c
===================================================================
--- xen.orig/arch/x86/x86_32/traps.c 2005-04-12 17:01:36.000000000 +0200
+++ xen/arch/x86/x86_32/traps.c 2005-04-12 17:33:02.000000000 +0200
@@ -163,7 +163,7 @@ void show_page_walk(unsigned long addr)
printk("Pagetable walk from %p:\n", addr);
- page = l2_pgentry_val(idle_pg_table[l2_table_offset(addr)]);
+ page = l2e_get_value(idle_pg_table[l2_table_offset(addr)]);
printk(" L2 = %p %s\n", page, (page & _PAGE_PSE) ? "(4MB)" : "");
if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) )
return;
Index: xen/arch/x86/x86_32/mm.c
===================================================================
--- xen.orig/arch/x86/x86_32/mm.c 2005-04-12 17:01:38.000000000 +0200
+++ xen/arch/x86/x86_32/mm.c 2005-04-12 17:33:02.000000000 +0200
@@ -47,9 +47,9 @@ int map_pages(
if ( ((s|v|p) & ((1<<L2_PAGETABLE_SHIFT)-1)) == 0 )
{
/* Super-page mapping. */
- if ( (l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
+ if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
local_flush_tlb_pge();
- *pl2e = mk_l2_pgentry(p|flags|_PAGE_PSE);
+ l2e_init_phys(*pl2e, p, flags|_PAGE_PSE);
v += 1 << L2_PAGETABLE_SHIFT;
p += 1 << L2_PAGETABLE_SHIFT;
@@ -58,16 +58,16 @@ int map_pages(
else
{
/* Normal page mapping. */
- if ( !(l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
+ if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
{
newpg = (void *)alloc_xenheap_page();
clear_page(newpg);
- *pl2e = mk_l2_pgentry(__pa(newpg) | (flags & __PTE_MASK));
+ l2e_init_phys(*pl2e, __pa(newpg), flags & __PTE_MASK);
}
- pl1e = l2_pgentry_to_l1(*pl2e) + l1_table_offset(v);
- if ( (l1_pgentry_val(*pl1e) & _PAGE_PRESENT) )
+ pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(v);
+ if ( (l1e_get_flags(*pl1e) & _PAGE_PRESENT) )
local_flush_tlb_one(v);
- *pl1e = mk_l1_pgentry(p|flags);
+ l1e_init_phys(*pl1e, p, flags);
v += 1 << L1_PAGETABLE_SHIFT;
p += 1 << L1_PAGETABLE_SHIFT;
@@ -90,14 +90,14 @@ void __set_fixmap(
void __init paging_init(void)
{
void *ioremap_pt;
- unsigned long v, l2e;
+ unsigned long v;
struct pfn_info *pg;
/* Allocate and map the machine-to-phys table. */
if ( (pg = alloc_domheap_pages(NULL, 10)) == NULL )
panic("Not enough memory to bootstrap Xen.\n");
- idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)] =
- mk_l2_pgentry(page_to_phys(pg) | __PAGE_HYPERVISOR | _PAGE_PSE);
+ l2e_init_phys(idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)],
+ page_to_phys(pg), __PAGE_HYPERVISOR | _PAGE_PSE);
memset((void *)RDWR_MPT_VIRT_START, 0x55, 4UL << 20);
/* Xen 4MB mappings can all be GLOBAL. */
@@ -105,44 +105,43 @@ void __init paging_init(void)
{
for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) )
{
- l2e = l2_pgentry_val(idle_pg_table[l2_table_offset(v)]);
- if ( l2e & _PAGE_PSE )
- l2e |= _PAGE_GLOBAL;
- idle_pg_table[v >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(l2e);
+ if (l2e_get_flags(idle_pg_table[l2_table_offset(v)]) & _PAGE_PSE)
+ l2e_add_flags(idle_pg_table[v >> L2_PAGETABLE_SHIFT],
+ _PAGE_GLOBAL);
}
}
/* Create page table for ioremap(). */
ioremap_pt = (void *)alloc_xenheap_page();
clear_page(ioremap_pt);
- idle_pg_table[l2_table_offset(IOREMAP_VIRT_START)] =
- mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR);
+ l2e_init_phys(idle_pg_table[l2_table_offset(IOREMAP_VIRT_START)],
+ __pa(ioremap_pt), __PAGE_HYPERVISOR);
/* Create read-only mapping of MPT for guest-OS use.
* NB. Remove the global bit so that shadow_mode_translate()==true domains
* can reused this address space for their phys-to-machine mapping.
*/
- idle_pg_table[l2_table_offset(RO_MPT_VIRT_START)] =
- mk_l2_pgentry(l2_pgentry_val(
- idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]) &
- ~(_PAGE_RW | _PAGE_GLOBAL));
+ l2e_init_pfn(idle_pg_table[l2_table_offset(RO_MPT_VIRT_START)],
+ l2e_get_pfn(idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]),
+ l2e_get_flags(idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)])
+ & ~(_PAGE_RW | _PAGE_GLOBAL));
/* Set up mapping cache for domain pages. */
mapcache = (unsigned long *)alloc_xenheap_page();
clear_page(mapcache);
- idle_pg_table[l2_table_offset(MAPCACHE_VIRT_START)] =
- mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR);
+ l2e_init_phys(idle_pg_table[l2_table_offset(MAPCACHE_VIRT_START)],
+ __pa(mapcache), __PAGE_HYPERVISOR);
/* Set up linear page table mapping. */
- idle_pg_table[l2_table_offset(LINEAR_PT_VIRT_START)] =
- mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
+ l2e_init_phys(idle_pg_table[l2_table_offset(LINEAR_PT_VIRT_START)],
+ __pa(idle_pg_table), __PAGE_HYPERVISOR);
}
void __init zap_low_mappings(void)
{
int i;
for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- idle_pg_table[i] = mk_l2_pgentry(0);
+ l2e_clear(idle_pg_table[i]);
flush_tlb_all_pge();
}
@@ -168,7 +167,7 @@ void subarch_init_memory(struct domain *
}
/* M2P table is mappable read-only by privileged domains. */
- m2p_start_mfn = l2_pgentry_to_pfn(
+ m2p_start_mfn = l2e_get_pfn(
idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]);
for ( i = 0; i < 1024; i++ )
{
@@ -318,11 +317,9 @@ void *memguard_init(void *heap_start)
l1 = (l1_pgentry_t *)heap_start;
heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE);
for ( j = 0; j < L1_PAGETABLE_ENTRIES; j++ )
- l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
- (j << L1_PAGETABLE_SHIFT) |
- __PAGE_HYPERVISOR);
- idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
- mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
+ l1e_init_pfn(l1[j], j, __PAGE_HYPERVISOR);
+ l2e_init_phys(idle_pg_table[i + l2_table_offset(PAGE_OFFSET)],
+ virt_to_phys(l1), __PAGE_HYPERVISOR);
}
return heap_start;
@@ -344,11 +341,12 @@ static void __memguard_change_range(void
while ( _l != 0 )
{
l2 = &idle_pg_table[l2_table_offset(_p)];
- l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
- if ( guard )
- *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
- else
- *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
+ l1 = l2e_to_l1e(*l2) + l1_table_offset(_p);
+ if ( guard ) {
+ l1e_remove_flags(*l1, _PAGE_PRESENT);
+ } else {
+ l1e_add_flags(*l1, _PAGE_PRESENT);
+ }
_p += PAGE_SIZE;
_l -= PAGE_SIZE;
}
Index: xen/common/grant_table.c
===================================================================
--- xen.orig/common/grant_table.c 2005-04-12 17:01:41.000000000 +0200
+++ xen/common/grant_table.c 2005-04-12 17:33:02.000000000 +0200
@@ -253,12 +253,12 @@ __gnttab_activate_grant_ref(
{
/* Write update into the pagetable
*/
+ l1_pgentry_t pte;
- rc = update_grant_va_mapping( host_virt_addr,
- (frame << PAGE_SHIFT) | _PAGE_PRESENT |
- _PAGE_ACCESSED |
- _PAGE_DIRTY |
- ((dev_hst_ro_flags & GNTMAP_readonly) ? 0 : _PAGE_RW),
+ l1e_init_pfn(pte, frame, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY);
+ if (!(dev_hst_ro_flags & GNTMAP_readonly))
+ l1e_add_flags(pte,_PAGE_RW);
+ rc = update_grant_va_mapping( host_virt_addr, pte,
mapping_d, mapping_ed );
/* IMPORTANT: (rc == 0) => must flush / invalidate entry in TLB.
Index: xen/include/asm-x86/domain.h
===================================================================
--- xen.orig/include/asm-x86/domain.h 2005-04-12 17:01:35.000000000 +0200
+++ xen/include/asm-x86/domain.h 2005-04-12 17:33:02.000000000 +0200
@@ -130,7 +130,7 @@ struct arch_exec_domain
l2_pgentry_t *guest_vtable; /* virtual address of pagetable */
l2_pgentry_t *shadow_vtable; /* virtual address of shadow_table */
l2_pgentry_t *monitor_vtable; /* virtual address of monitor_table */
- l1_pgentry_t *hl2_vtable; /* virtual address of hl2_table */
+ l2_pgentry_t *hl2_vtable; /* virtual address of hl2_table */
#ifdef __x86_64__
l3_pgentry_t *guest_vl3table;
^ permalink raw reply [flat|nested] 13+ messages in thread* RE: [patch] pagetable cleanups
2005-04-12 18:58 [patch] pagetable cleanups Gerd Knorr
@ 2005-04-14 12:25 ` Michael A Fetterman
2005-04-14 15:01 ` Gerd Knorr
2005-04-14 16:47 ` Christian Limpach
1 sibling, 1 reply; 13+ messages in thread
From: Michael A Fetterman @ 2005-04-14 12:25 UTC (permalink / raw)
To: 'Gerd Knorr'; +Cc: xen-devel
Overall, I think the patch looks pretty good...
A couple of comments:
1) There's no Signed-Off-By comment attached to this. Could you please
provide one?
2) About your question at the bottom of construct_dom0:
The current code there is intended to allow booting of dom0's with
translate mode
enabled. As such, it probably won't stay in the code base forever, but
it was and
is a useful hack. The bringup process for this new shadow code was to
get dom0's
working first, and we're now working on (cleaner) domU support. I'm not
too
worried about x86_64 or pae support for this dom0 translate mode support.
I'd
like to just remove the halt you added there, OK?
3) HL2 tables are not tables of L2 entries. They contain L1 entries.
They are essentially shadows of guest L2 pages, which will be used by Xen
to get
a linear-pagetable-like mapping of the guest's L1 pages.
A normal guest L2 has guest-physical pages in it.
A normal shadow L2 points at shadows of the guest's L1s.
An HL2 has machine addresses of the guest's L1 pages in it, and is *used*
as an
L1 table by Xen. So the things like "l1_pgentry_t *hl2_vtable" in
domain.h were
not typos, and should stay the way they were...
The HL2 is a concept that works well for the simple 2 level page tables,
and is a
clever solution to avoid doing lots of extra map_domain_mem() calls to
access the
guest's L1 tables, but it both falls apart and is fortunately unnecessary
for
64-bit mode. I haven't thought much about PAE yet, but HL2 are probably
still useful there because of the cost of map_domain_mem()...
4) There was probably a bunch of debate about this somewhere before, but I
missed it.
The macros which set/clear page table types don't obey C's pass-by-value
calling
semantics. That means that they can't be replaced with simple functions,
if
desired -- there would always have to be a macro layer. There's also no
macros
for creating L1E or L2E as expressions -- only statements which assign
them.
Perhaps this was intentional? It means that you end up declaring extra
variables to hold essentially temporary values in a few places...
Comments?
5) I found a couple compilation problems when by compiling with debug=y...
I've dealt with issues #2, 3, and 5 in a slightly modified version of your
patch,
available at http://www.cl.cam.ac.uk/~maf46/kraxel.patch.v2
Take a look, and let me know what you think.
Michael
-----Original Message-----
From: xen-devel-bounces@lists.xensource.com
[mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Gerd Knorr
Sent: Tuesday, April 12, 2005 7:59 PM
To: xen-devel@lists.xensource.com
Subject: [Xen-devel] [patch] pagetable cleanups
Hi,
Next version of the pagetable cleanup patch. Builds and boots
domain 0 on x86_32. Changes from the last version:
* macro names are changed.
* adapted to the new shadow code checked in last week.
* new macro: l1e_has_changed() to compare page table
entries.
Open issues:
* I'm not sure how to handle the debug printk's best. These use
the l1e_get_value() macro at the moment to get the raw bits and
print them as unsigned long hex value. I'd like to get rid of
the l1e_get_value() macro altogether though ...
* x86_64 build needs fixing, will look into this tomorrow.
Enjoy,
Gerd
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [patch] pagetable cleanups
2005-04-14 12:25 ` Michael A Fetterman
@ 2005-04-14 15:01 ` Gerd Knorr
2005-04-14 14:48 ` Mark Williamson
0 siblings, 1 reply; 13+ messages in thread
From: Gerd Knorr @ 2005-04-14 15:01 UTC (permalink / raw)
To: Michael A Fetterman; +Cc: xen-devel
On Thu, Apr 14, 2005 at 01:25:19PM +0100, Michael A Fetterman wrote:
> Overall, I think the patch looks pretty good...
>
> A couple of comments:
>
> 1) There's no Signed-Off-By comment attached to this. Could you please
> provide one?
Yes, thats easy ;)
> 2) About your question at the bottom of construct_dom0: The current
> code there is intended to allow booting of dom0's with translate mode
> enabled. As such, it probably won't stay in the code base forever,
> but it was and is a useful hack.
Ah, ok.
> I'd like to just remove the halt you added there, OK?
Fine with me, I've added your changes instead.
> 3) HL2 tables are not tables of L2 entries. They contain L1 entries.
> They are essentially shadows of guest L2 pages, which will be used by
> Xen to get a linear-pagetable-like mapping of the guest's L1 pages.
Ah, *thats* the point of these beasts. The page manipulations done on
them look like l2 operations (well, they actually are as they really
point to l1 pages), that confused me ;)
> 4) There was probably a bunch of debate about this somewhere before,
> but I missed it. The macros which set/clear page table types don't
> obey C's pass-by-value calling semantics. That means that they can't
> be replaced with simple functions, if desired -- there would always
> have to be a macro layer.
Yep, I noticed that as well as the PAE versions became a bit more
complex and I tried to make them inline functions instead which didn't
work ...
I can change them to pass-by-reference instead, it's probably a good
idea. Hope gcc is clever enougth to see that it's the same after all
and doesn't generate extra code then.
> There's also no macros for creating L1E or L2E as expressions -- only
> statements which assign them. Perhaps this was intentional? It means
> that you end up declaring extra variables to hold essentially
> temporary values in a few places...
Yes, was intentionally. I think that isn't bad, it makes the code more
readable. And I think it actually is impossible to return structs in C,
you can only return a pointer to a struct, which would't help for the
"building entries as expressions" case.
> 5) I found a couple compilation problems when by compiling with debug=y...
Merged, thanks.
Current patch set is at http://dl.bytesex.org/patches/xen-2/ now
(issue #4 isn't adressed yet).
Gerd
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [patch] pagetable cleanups
2005-04-14 15:01 ` Gerd Knorr
@ 2005-04-14 14:48 ` Mark Williamson
2005-04-14 18:27 ` Hollis Blanchard
0 siblings, 1 reply; 13+ messages in thread
From: Mark Williamson @ 2005-04-14 14:48 UTC (permalink / raw)
To: xen-devel; +Cc: Michael A Fetterman, Gerd Knorr
> Yes, was intentionally. I think that isn't bad, it makes the code more
> readable. And I think it actually is impossible to return structs in C,
> you can only return a pointer to a struct, which would't help for the
> "building entries as expressions" case.
Returning structs by values works with GCC. That said, I've never checked
whether that's standard or a GCC extension. For big structs I think it
transparently generates a memcpy, don't know what it generates for little
ones.
Cheers,
Mark
>
> > 5) I found a couple compilation problems when by compiling with
> > debug=y...
>
> Merged, thanks.
>
> Current patch set is at http://dl.bytesex.org/patches/xen-2/ now
> (issue #4 isn't adressed yet).
>
> Gerd
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [patch] pagetable cleanups
2005-04-14 14:48 ` Mark Williamson
@ 2005-04-14 18:27 ` Hollis Blanchard
2005-04-14 19:07 ` Anthony Liguori
0 siblings, 1 reply; 13+ messages in thread
From: Hollis Blanchard @ 2005-04-14 18:27 UTC (permalink / raw)
To: Mark Williamson; +Cc: Michael A Fetterman, xen-devel, Gerd Knorr
On Thu, 2005-04-14 at 15:48 +0100, Mark Williamson wrote:
> > Yes, was intentionally. I think that isn't bad, it makes the code more
> > readable. And I think it actually is impossible to return structs in C,
> > you can only return a pointer to a struct, which would't help for the
> > "building entries as expressions" case.
>
> Returning structs by values works with GCC. That said, I've never checked
> whether that's standard or a GCC extension. For big structs I think it
> transparently generates a memcpy, don't know what it generates for little
> ones.
Depends on the ABI.
For ppc64 Linux:
Aggregates or unions of any length, and character strings of
length longer than 8 bytes, will be returned in a storage buffer
allocated by the caller. The caller will pass the address of
this buffer as a hidden first argument in r3, causing the first
explicit argument to be passed in r4.
For ppc32 (System V R4 ABI) it's a little different for small
structures:
A structure or union whose size is less than or equal to 8 bytes
shall be returned in r3 and r4, as if it were first stored in an
8-byte aligned memory area and then the low-addressed word were
loaded into r3 and the high-addressed word into r4.
Values of type long double and structures or unions that do not
meet the requirements for being returned in registers are
returned in a storage buffer allocated by the caller. The
address of this buffer is passed as a hidden argument in r3 as
if it were the first argument, causing gr in the argument
passing algorithm above to be initialized to 4 instead of 3.
x86 (again from SVR4):
If a function returns a structure or union, then the caller
provides space for the return value and places its address on
the stack as argument word zero. In effect, this address becomes
a ``hidden'' first argument.
So no memcpys at all; the caller fills in the values directly into the
callee's struct.
--
Hollis Blanchard
IBM Linux Technology Center
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [patch] pagetable cleanups
2005-04-14 18:27 ` Hollis Blanchard
@ 2005-04-14 19:07 ` Anthony Liguori
2005-04-14 19:20 ` Hollis Blanchard
0 siblings, 1 reply; 13+ messages in thread
From: Anthony Liguori @ 2005-04-14 19:07 UTC (permalink / raw)
To: Hollis Blanchard
Cc: Michael A Fetterman, Mark Williamson, xen-devel, Gerd Knorr
On Thu, 2005-04-14 at 13:27, Hollis Blanchard wrote:
> So no memcpys at all; the caller fills in the values directly into the
> callee's struct.
Unless you're dealing with struct assignments. You basically get a
memcpy() when you assign structs. The standard makes it pretty clear
that the returning a struct is not an lvalue (it's a copy). Unoptimized
C will have an inlined memcpy() because of the assignment. Otherwise,
the following:
foo(b, c).a = 3;
Would be valid and this is expressly forbidden by the standard (see
section 6.5.2.3).
A smart compiler will propagate the copy but it has it's limitations.
Once you have sufficiently complex pointer operations in a function
there's no guarantee copies will be propagated correctly.
Because you cannot rely on the optimization, structs are not typically
returned by value. Since this is so rarely done, I'd argue it's more
readable to pass a struct as an argument (the fact that some people
don't even realize it's possible should support that).
Regards,
--
Anthony Liguori
Linux Technology Center (LTC) - IBM Austin
E-mail: aliguori@us.ibm.com
Phone: (512) 838-1208
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [patch] pagetable cleanups
2005-04-14 19:07 ` Anthony Liguori
@ 2005-04-14 19:20 ` Hollis Blanchard
2005-04-14 19:30 ` Gerd Knorr
0 siblings, 1 reply; 13+ messages in thread
From: Hollis Blanchard @ 2005-04-14 19:20 UTC (permalink / raw)
To: Anthony Liguori
Cc: Michael A Fetterman, Mark Williamson, xen-devel, Gerd Knorr
[-- Attachment #1: Type: text/plain, Size: 729 bytes --]
On Thu, 2005-04-14 at 14:07 -0500, Anthony Liguori wrote:
> On Thu, 2005-04-14 at 13:27, Hollis Blanchard wrote:
> > So no memcpys at all; the caller fills in the values directly into the
> > callee's struct.
>
> Unless you're dealing with struct assignments. You basically get a
> memcpy() when you assign structs.
I have attached my testcase. Compile with -c and disassemble the object
file.
It is certainly possible that the callee could create a temporary struct
on its stack and memcpy into the address provided by the ABI. However,
even at -O0 the address of the struct is popped off the stack and used
directly when assigning to structure fields. I'm using gcc 3.3.5.
--
Hollis Blanchard
IBM Linux Technology Center
[-- Attachment #2: ctest.c --]
[-- Type: text/x-csrc, Size: 174 bytes --]
struct foo {
int i;
};
extern int e;
struct foo bar(void)
{
struct foo ret;
ret.i = e + 0x5a;
return ret;
}
int main(void)
{
struct foo f;
f = bar();
return f.i;
}
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [patch] pagetable cleanups
2005-04-14 19:20 ` Hollis Blanchard
@ 2005-04-14 19:30 ` Gerd Knorr
0 siblings, 0 replies; 13+ messages in thread
From: Gerd Knorr @ 2005-04-14 19:30 UTC (permalink / raw)
To: Hollis Blanchard; +Cc: Michael A Fetterman, Mark Williamson, xen-devel
> It is certainly possible that the callee could create a temporary struct
> on its stack and memcpy into the address provided by the ABI. However,
> even at -O0 the address of the struct is popped off the stack and used
> directly when assigning to structure fields. I'm using gcc 3.3.5.
Same here. And with optimization turned on gcc simply inlines the
functions, which makes the whole discussion kida useless ;)
So I think we don't have do worry about gcc doing stupid things and
can simply return structs in the C code.
Gerd
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [patch] pagetable cleanups
2005-04-12 18:58 [patch] pagetable cleanups Gerd Knorr
2005-04-14 12:25 ` Michael A Fetterman
@ 2005-04-14 16:47 ` Christian Limpach
1 sibling, 0 replies; 13+ messages in thread
From: Christian Limpach @ 2005-04-14 16:47 UTC (permalink / raw)
To: Gerd Knorr; +Cc: xen-devel
On 4/12/05, Gerd Knorr <kraxel@bytesex.org> wrote:
> Index: xen/arch/x86/shadow.c
> ===================================================================
> --- xen.orig/arch/x86/shadow.c 2005-04-12 17:01:41.000000000 +0200
> +++ xen/arch/x86/shadow.c 2005-04-12 17:33:01.000000000 +0200
> @@ -333,9 +333,8 @@ free_shadow_hl2_table(struct domain *d,
>
> for ( i = 0; i < limit; i++ )
> {
> - unsigned long hl2e = l1_pgentry_val(hl2[i]);
> - if ( hl2e & _PAGE_PRESENT )
> - put_page(pfn_to_page(hl2e >> PAGE_SHIFT));
> + if ( l1e_get_flags(*hl2) & _PAGE_PRESENT )
> + put_page(pfn_to_page(l1e_get_pfn(*hl2)));
> }
>
> unmap_domain_mem(hl2);
This hunk seems incorrect, you need to use hl2[i] instead of *hl2.
We need to be careful not to introduce bugs when making cleanups like
this one... It would be good if a few more people looked through the
patch before we commit it, thanks!
christian
^ permalink raw reply [flat|nested] 13+ messages in thread
* RE: [patch] pagetable cleanups
@ 2005-04-14 14:14 Ian Pratt
0 siblings, 0 replies; 13+ messages in thread
From: Ian Pratt @ 2005-04-14 14:14 UTC (permalink / raw)
To: Michael A Fetterman, Gerd Knorr; +Cc: xen-devel
> 4) There was probably a bunch of debate about this somewhere
> before, but I missed it.
> The macros which set/clear page table types don't obey C's
> pass-by-value calling
> semantics. That means that they can't be replaced with
> simple functions, if
> desired -- there would always have to be a macro layer.
> There's also no macros
> for creating L1E or L2E as expressions -- only statements
> which assign them.
> Perhaps this was intentional? It means that you end up
> declaring extra
> variables to hold essentially temporary values in a few places...
> Comments?
I know it's a total pain in the arse, but I'm actually quite convinced
by Michael's argument. Having the 'create' macros return a struct is
actually much nicer for building expressions.
I want to get this stuff checked in soon, though.
Ian
^ permalink raw reply [flat|nested] 13+ messages in thread
* RE: [patch] pagetable cleanups
@ 2005-04-14 15:11 Ian Pratt
2005-04-14 15:31 ` Gerd Knorr
0 siblings, 1 reply; 13+ messages in thread
From: Ian Pratt @ 2005-04-14 15:11 UTC (permalink / raw)
To: Gerd Knorr, Michael A Fetterman; +Cc: xen-devel
> > There's also no macros for creating L1E or L2E as
> expressions -- only
> > statements which assign them. Perhaps this was
> intentional? It means
> > that you end up declaring extra variables to hold essentially
> > temporary values in a few places...
>
> Yes, was intentionally. I think that isn't bad, it makes the
> code more readable. And I think it actually is impossible to
> return structs in C, you can only return a pointer to a
> struct, which would't help for the "building entries as
> expressions" case.
Returning structs is actually fine, at least on modern C
implementations.
Gcc seems to optimise for it OK.
Personally think having "a = foo(b,c)" is actually more readable than
"foo(&a,b,c)".
Ian
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [patch] pagetable cleanups
2005-04-14 15:11 Ian Pratt
@ 2005-04-14 15:31 ` Gerd Knorr
0 siblings, 0 replies; 13+ messages in thread
From: Gerd Knorr @ 2005-04-14 15:31 UTC (permalink / raw)
To: Ian Pratt; +Cc: Michael A Fetterman, xen-devel
> Personally think having "a = foo(b,c)" is actually more readable than
> "foo(&a,b,c)".
In that case yes.
If you use that to avoid extra temporary variables, i.e.
bar(x, foo(b,c));
instead of
a = foo(b,c);
bar(x,a);
it quickly becomes unreadable, especially if the function and argument
names are a bit longer and the first version doesn't fit onto a single
line any more. You are not forced to do that though, even if the
macros/functions _do_ return the entries ;)
I'll go over it again ...
Gerd
--
#define printk(args...) fprintf(stderr, ## args)
^ permalink raw reply [flat|nested] 13+ messages in thread
* RE: [patch] pagetable cleanups
@ 2005-04-14 19:33 Ian Pratt
0 siblings, 0 replies; 13+ messages in thread
From: Ian Pratt @ 2005-04-14 19:33 UTC (permalink / raw)
To: Anthony Liguori, Hollis Blanchard
Cc: Michael A Fetterman, xen-devel, Mark Williamson, Gerd Knorr
> Because you cannot rely on the optimization, structs are not
> typically returned by value. Since this is so rarely done,
> I'd argue it's more readable to pass a struct as an argument
> (the fact that some people don't even realize it's possible
> should support that).
Gcc optimises the case where an 8 byte struct is returned from an inline
function just fine. It's far more likely to generate poor code when you
start passing values by reference. I don't buy your readability
argument: Being able to use the return value in expressions can make
code rather easier to grock.
Ian
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2005-04-14 19:33 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-04-12 18:58 [patch] pagetable cleanups Gerd Knorr
2005-04-14 12:25 ` Michael A Fetterman
2005-04-14 15:01 ` Gerd Knorr
2005-04-14 14:48 ` Mark Williamson
2005-04-14 18:27 ` Hollis Blanchard
2005-04-14 19:07 ` Anthony Liguori
2005-04-14 19:20 ` Hollis Blanchard
2005-04-14 19:30 ` Gerd Knorr
2005-04-14 16:47 ` Christian Limpach
-- strict thread matches above, loose matches on Subject: below --
2005-04-14 14:14 Ian Pratt
2005-04-14 15:11 Ian Pratt
2005-04-14 15:31 ` Gerd Knorr
2005-04-14 19:33 Ian Pratt
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.