* [PATCH 6/7, RFC] x86_64: basic changes for supporting compatibility mode guest
@ 2006-08-23 9:16 Jan Beulich
2006-08-23 9:40 ` Keir Fraser
0 siblings, 1 reply; 8+ messages in thread
From: Jan Beulich @ 2006-08-23 9:16 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 529 bytes --]
This introduces
- a (generic) config setting to turn on compatibility mode support
- support to load a Dom0 32-bit ELF image on 64-bit
- a compatibility guest mode GDT (requires moving the ring 0
selectors to different GDT slots), and the respective selector
handling changes
- some initial code path changes to address the needs of a
compatibility mode guest
This changes the ABI in adding to vcpu_guest_context - does a change like
this require bumping the ABI version?
Signed-off-by: Jan Beulich <jbeulich@novell.com>
[-- Attachment #2: 32on64-base.patch --]
[-- Type: text/plain, Size: 38326 bytes --]
Index: 2006-08-16/config/x86_64.mk
===================================================================
--- 2006-08-16.orig/config/x86_64.mk 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/config/x86_64.mk 2006-08-23 08:56:04.000000000 +0200
@@ -1,4 +1,5 @@
CONFIG_X86 := y
+CONFIG_COMPAT := y
CONFIG_HVM := y
CONFIG_MIGRATE := y
CONFIG_XCUTILS := y
Index: 2006-08-16/xen/arch/x86/boot/x86_64.S
===================================================================
--- 2006-08-16.orig/xen/arch/x86/boot/x86_64.S 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/arch/x86/boot/x86_64.S 2006-08-23 10:25:59.000000000 +0200
@@ -223,15 +223,35 @@ high_start:
.align PAGE_SIZE, 0
ENTRY(gdt_table)
.quad 0x0000000000000000 /* unused */
- .quad 0x00cf9a000000ffff /* 0xe008 ring 0 code, compatibility */
- .quad 0x00af9a000000ffff /* 0xe010 ring 0 code, 64-bit mode */
- .quad 0x00cf92000000ffff /* 0xe018 ring 0 data */
+ .quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */
+ .quad 0x00cf92000000ffff /* 0xe010 ring 0 data */
+ .quad 0x0000000000000000 /* reserved */
.quad 0x00cffa000000ffff /* 0xe023 ring 3 code, compatibility */
.quad 0x00cff2000000ffff /* 0xe02b ring 3 data */
.quad 0x00affa000000ffff /* 0xe033 ring 3 code, 64-bit mode */
- .quad 0x0000000000000000 /* unused */
+ .quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */
+ .org gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
.fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
+#ifdef CONFIG_COMPAT
+# define LIMIT(d) (((d) << 16) \
+ | (((MACH2PHYS_COMPAT_VIRT_END - 1) >> PAGE_SHIFT) & 0xffff) \
+ | (((MACH2PHYS_COMPAT_VIRT_END - 1) << (32 - PAGE_SHIFT)) & (0xf << 48)))
+ .align PAGE_SIZE, 0
+ENTRY(compat_gdt_table)
+ .quad 0x0000000000000000 /* unused */
+ .quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */
+ .quad 0x00cf92000000ffff /* 0xe010 ring 0 data */
+ .quad LIMIT(0x00cfba000000) /* 0xe019 ring 1 code, compatibility */
+ .quad LIMIT(0x00cfb2000000) /* 0xe021 ring 1 data */
+ .quad LIMIT(0x00c0fa000000) /* 0xe02b ring 3 code, compatibility */
+ .quad LIMIT(0x00c0f2000000) /* 0xe033 ring 3 data */
+ .quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */
+ .org compat_gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
+ .fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
+# undef LIMIT
+#endif
+
/* Initial PML4 -- level-4 page table. */
.align PAGE_SIZE, 0
ENTRY(idle_pg_table)
Index: 2006-08-16/xen/arch/x86/domain.c
===================================================================
--- 2006-08-16.orig/xen/arch/x86/domain.c 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/arch/x86/domain.c 2006-08-23 08:56:04.000000000 +0200
@@ -256,17 +256,17 @@ int arch_set_info_guest(
if ( !(c->flags & VGCF_HVM_GUEST) )
{
- fixup_guest_stack_selector(c->user_regs.ss);
- fixup_guest_stack_selector(c->kernel_ss);
- fixup_guest_code_selector(c->user_regs.cs);
-
-#ifdef __i386__
- fixup_guest_code_selector(c->event_callback_cs);
- fixup_guest_code_selector(c->failsafe_callback_cs);
+ fixup_guest_stack_selector(d, c->user_regs.ss);
+ fixup_guest_stack_selector(d, c->kernel_ss);
+ fixup_guest_code_selector(d, c->user_regs.cs);
+
+#if defined(__i386__) || defined(CONFIG_COMPAT)
+ fixup_guest_code_selector(d, c->event_callback_cs);
+ fixup_guest_code_selector(d, c->failsafe_callback_cs);
#endif
for ( i = 0; i < 256; i++ )
- fixup_guest_code_selector(c->trap_ctxt[i].cs);
+ fixup_guest_code_selector(d, c->trap_ctxt[i].cs);
}
else if ( !hvm_enabled )
return -EINVAL;
@@ -314,6 +314,7 @@ int arch_set_info_guest(
if ( !(c->flags & VGCF_HVM_GUEST) )
{
+ BUG_ON(IS_COMPAT(d)); /* XXX */
cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c->ctrlreg[3]));
v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
}
@@ -409,9 +410,11 @@ void new_thread(struct vcpu *d,
* ESI = start_info
* [EAX,EBX,ECX,EDX,EDI,EBP are zero]
*/
- regs->ds = regs->es = regs->fs = regs->gs = FLAT_KERNEL_DS;
- regs->ss = FLAT_KERNEL_SS;
- regs->cs = FLAT_KERNEL_CS;
+ regs->ds = regs->es = regs->fs = regs->gs = !IS_COMPAT(d->domain)
+ ? FLAT_KERNEL_DS
+ : FLAT_COMPAT_KERNEL_DS;
+ regs->ss = !IS_COMPAT(d->domain) ? FLAT_KERNEL_SS : FLAT_COMPAT_KERNEL_SS;
+ regs->cs = !IS_COMPAT(d->domain) ? FLAT_KERNEL_CS : FLAT_COMPAT_KERNEL_CS;
regs->eip = start_pc;
regs->esp = start_stack;
regs->esi = start_info;
@@ -490,27 +493,30 @@ static void load_segments(struct vcpu *n
all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
}
- /* This can only be non-zero if selector is NULL. */
- if ( nctxt->fs_base )
- wrmsr(MSR_FS_BASE,
- nctxt->fs_base,
- nctxt->fs_base>>32);
-
- /* Most kernels have non-zero GS base, so don't bother testing. */
- /* (This is also a serialising instruction, avoiding AMD erratum #88.) */
- wrmsr(MSR_SHADOW_GS_BASE,
- nctxt->gs_base_kernel,
- nctxt->gs_base_kernel>>32);
-
- /* This can only be non-zero if selector is NULL. */
- if ( nctxt->gs_base_user )
- wrmsr(MSR_GS_BASE,
- nctxt->gs_base_user,
- nctxt->gs_base_user>>32);
-
- /* If in kernel mode then switch the GS bases around. */
- if ( n->arch.flags & TF_kernel_mode )
- __asm__ __volatile__ ( "swapgs" );
+ if ( !IS_COMPAT(n->domain) )
+ {
+ /* This can only be non-zero if selector is NULL. */
+ if ( nctxt->fs_base )
+ wrmsr(MSR_FS_BASE,
+ nctxt->fs_base,
+ nctxt->fs_base>>32);
+
+ /* Most kernels have non-zero GS base, so don't bother testing. */
+ /* (This is also a serialising instruction, avoiding AMD erratum #88.) */
+ wrmsr(MSR_SHADOW_GS_BASE,
+ nctxt->gs_base_kernel,
+ nctxt->gs_base_kernel>>32);
+
+ /* This can only be non-zero if selector is NULL. */
+ if ( nctxt->gs_base_user )
+ wrmsr(MSR_GS_BASE,
+ nctxt->gs_base_user,
+ nctxt->gs_base_user>>32);
+
+ /* If in kernel mode then switch the GS bases around. */
+ if ( (n->arch.flags & TF_kernel_mode) )
+ __asm__ __volatile__ ( "swapgs" );
+ }
if ( unlikely(!all_segs_okay) )
{
@@ -521,6 +527,54 @@ static void load_segments(struct vcpu *n
(unsigned long *)nctxt->kernel_sp;
unsigned long cs_and_mask, rflags;
+ if ( IS_COMPAT(n->domain) )
+ {
+ unsigned int *esp = ring_1(regs) ?
+ (unsigned int *)regs->rsp :
+ (unsigned int *)nctxt->kernel_sp;
+ unsigned int cs_and_mask, eflags;
+ int ret = 0;
+
+ /* CS longword also contains full evtchn_upcall_mask. */
+ cs_and_mask = (unsigned short)regs->cs |
+ ((unsigned int)n->vcpu_info->evtchn_upcall_mask << 16);
+ /* Fold upcall mask into RFLAGS.IF. */
+ eflags = regs->_eflags & ~X86_EFLAGS_IF;
+ eflags |= !n->vcpu_info->evtchn_upcall_mask << 9;
+
+ if ( !ring_1(regs) )
+ {
+ ret = put_user(regs->ss, esp-1);
+ ret |= put_user(regs->_esp, esp-2);
+ esp -= 2;
+ }
+
+ if ( ret |
+ put_user(eflags, esp-1) |
+ put_user(cs_and_mask, esp-2) |
+ put_user(regs->_eip, esp-3) |
+ put_user(nctxt->user_regs.gs, esp-4) |
+ put_user(nctxt->user_regs.fs, esp-5) |
+ put_user(nctxt->user_regs.es, esp-6) |
+ put_user(nctxt->user_regs.ds, esp-7) )
+ {
+ DPRINTK("Error while creating failsafe callback frame.\n");
+ domain_crash(n->domain);
+ }
+
+ if ( test_bit(_VGCF_failsafe_disables_events,
+ &n->arch.guest_context.flags) )
+ n->vcpu_info->evtchn_upcall_mask = 1;
+
+ regs->entry_vector = TRAP_syscall;
+ regs->_eflags &= 0xFFFCBEFFUL;
+ regs->ss = FLAT_COMPAT_KERNEL_SS;
+ regs->_esp = (unsigned long)(esp-7);
+ regs->cs = FLAT_COMPAT_KERNEL_CS;
+ regs->_eip = nctxt->failsafe_callback_eip;
+ return;
+ }
+
if ( !(n->arch.flags & TF_kernel_mode) )
toggle_guest_mode(n);
else
@@ -580,7 +634,7 @@ static void save_segments(struct vcpu *v
if ( regs->es )
dirty_segment_mask |= DIRTY_ES;
- if ( regs->fs )
+ if ( regs->fs || IS_COMPAT(v->domain) )
{
dirty_segment_mask |= DIRTY_FS;
ctxt->fs_base = 0; /* != 0 selector kills fs_base */
@@ -590,7 +644,7 @@ static void save_segments(struct vcpu *v
dirty_segment_mask |= DIRTY_FS_BASE;
}
- if ( regs->gs )
+ if ( regs->gs || IS_COMPAT(v->domain) )
{
dirty_segment_mask |= DIRTY_GS;
ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
@@ -728,6 +782,9 @@ void context_switch(struct vcpu *prev, s
if ( !hvm_guest(next) )
{
+ if (IS_COMPAT(prev->domain) != IS_COMPAT(next->domain))
+ local_flush_tlb_one(GDT_VIRT_START(next)
+ + FIRST_RESERVED_GDT_BYTE);
load_LDT(next);
load_segments(next);
}
@@ -933,6 +990,8 @@ void domain_relinquish_resources(struct
put_page_type(mfn_to_page(pfn));
put_page(mfn_to_page(pfn));
+ if ( pfn == pagetable_get_pfn(v->arch.guest_table_user) )
+ v->arch.guest_table_user = pagetable_null();
v->arch.guest_table = pagetable_null();
}
Index: 2006-08-16/xen/arch/x86/domain_build.c
===================================================================
--- 2006-08-16.orig/xen/arch/x86/domain_build.c 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/arch/x86/domain_build.c 2006-08-23 08:56:04.000000000 +0200
@@ -285,8 +285,39 @@ int construct_dom0(struct domain *d,
else
nr_pages = dom0_nrpages;
- if ( (rc = parseelfimage(&dsi)) != 0 )
+ rc = parseelfimage(&dsi);
+#ifdef CONFIG_COMPAT
+ if ( rc == -ENOSYS
+ && (rc = parseelf32image(&dsi)) == 0 )
+ {
+ l1_pgentry_t gdt_l1e;
+
+ set_bit(_DOMF_compat, &d->domain_flags);
+ if ( nr_pages != (unsigned int)nr_pages )
+ nr_pages = UINT_MAX;
+ /*
+ * Map compatibility Xen segments into every VCPU's GDT. See
+ * arch_domain_create() for further comments.
+ */
+ gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table),
+ PAGE_HYPERVISOR);
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ {
+ d->arch.mm_perdomain_pt[((i << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
+ flush_tlb_one_mask(d->domain_dirty_cpumask,
+ GDT_LDT_VIRT_START
+ + (i << GDT_LDT_VCPU_VA_SHIFT)
+ + FIRST_RESERVED_GDT_BYTE);
+ }
+ }
+#endif
+ if ( rc != 0)
+ {
+ if (rc == -ENOSYS)
+ printk("DOM0 image is not a Xen-compatible Elf image.\n");
return rc;
+ }
if ( dsi.xen_section_string == NULL )
{
@@ -295,7 +326,7 @@ int construct_dom0(struct domain *d,
}
dom0_pae = !!strstr(dsi.xen_section_string, "PAE=yes");
- xen_pae = (CONFIG_PAGING_LEVELS == 3);
+ xen_pae = (CONFIG_PAGING_LEVELS == 3) || IS_COMPAT(d);
if ( dom0_pae != xen_pae )
{
printk("PAE mode mismatch between Xen and DOM0 (xen=%s, dom0=%s)\n",
@@ -420,9 +451,9 @@ int construct_dom0(struct domain *d,
* We're basically forcing default RPLs to 1, so that our "what privilege
* level are we returning to?" logic works.
*/
- v->arch.guest_context.kernel_ss = FLAT_KERNEL_SS;
+ v->arch.guest_context.kernel_ss = !IS_COMPAT(d) ? FLAT_KERNEL_SS : FLAT_COMPAT_KERNEL_SS;
for ( i = 0; i < 256; i++ )
- v->arch.guest_context.trap_ctxt[i].cs = FLAT_KERNEL_CS;
+ v->arch.guest_context.trap_ctxt[i].cs = !IS_COMPAT(d) ? FLAT_KERNEL_CS : FLAT_COMPAT_KERNEL_CS;
#if defined(__i386__)
@@ -575,6 +606,11 @@ int construct_dom0(struct domain *d,
return -EINVAL;
}
+#ifdef CONFIG_COMPAT
+ v->arch.guest_context.failsafe_callback_cs = FLAT_COMPAT_KERNEL_CS;
+ v->arch.guest_context.event_callback_cs = FLAT_COMPAT_KERNEL_CS;
+#endif
+
/* WARNING: The new domain must have its 'processor' field filled in! */
maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
@@ -584,6 +620,8 @@ int construct_dom0(struct domain *d,
l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
+ if ( IS_COMPAT(d) )
+ v->arch.guest_table_user = v->arch.guest_table;
l4tab += l4_table_offset(dsi.v_start);
mfn = alloc_spfn;
@@ -691,6 +729,11 @@ int construct_dom0(struct domain *d,
write_ptbase(v);
/* Copy the OS image and free temporary buffer. */
+#ifdef CONFIG_COMPAT
+ if ( IS_COMPAT(d) )
+ (void)loadelf32image(&dsi);
+ else
+#endif
(void)loadelfimage(&dsi);
p = strstr(dsi.xen_section_string, "HYPERCALL_PAGE=");
@@ -730,7 +773,7 @@ int construct_dom0(struct domain *d,
si->mfn_list = vphysmap_start;
sprintf(si->magic, "xen-%i.%i-x86_%d%s",
xen_major_version(), xen_minor_version(),
- BITS_PER_LONG, xen_pae ? "p" : "");
+ !IS_COMPAT(d) ? BITS_PER_LONG : 32, xen_pae ? "p" : "");
/* Write the phys->machine and machine->phys table entries. */
for ( pfn = 0; pfn < d->tot_pages; pfn++ )
@@ -876,13 +919,28 @@ int elf_sanity_check(Elf_Ehdr *ehdr)
(ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
(ehdr->e_type != ET_EXEC) )
{
- printk("DOM0 image is not a Xen-compatible Elf image.\n");
return 0;
}
return 1;
}
+#ifdef CONFIG_COMPAT
+int elf32_sanity_check(Elf32_Ehdr *ehdr)
+{
+ if ( !IS_ELF(*ehdr) ||
+ (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
+ (ehdr->e_machine != EM_386) ||
+ (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
+ (ehdr->e_type != ET_EXEC) )
+ {
+ return 0;
+ }
+
+ return 1;
+}
+#endif
+
/*
* Local variables:
* mode: C
Index: 2006-08-16/xen/arch/x86/mm.c
===================================================================
--- 2006-08-16.orig/xen/arch/x86/mm.c 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/arch/x86/mm.c 2006-08-23 08:56:05.000000000 +0200
@@ -391,7 +391,7 @@ static int alloc_segdesc_page(struct pag
descs = map_domain_page(page_to_mfn(page));
for ( i = 0; i < 512; i++ )
- if ( unlikely(!check_descriptor(&descs[i])) )
+ if ( unlikely(!check_descriptor(page_get_owner(page), &descs[i])) )
goto fail;
unmap_domain_page(descs);
@@ -1746,6 +1746,8 @@ int new_guest_cr3(unsigned long mfn)
/* Switch to idle pagetable: this VCPU has no active p.t. now. */
MEM_LOG("New baseptr %lx: slow path via idle pagetables", mfn);
old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+ if ( old_base_mfn == pagetable_get_pfn(v->arch.guest_table_user) )
+ v->arch.guest_table_user = pagetable_null();
v->arch.guest_table = pagetable_null();
update_pagetables(v);
write_cr3(__pa(idle_pg_table));
@@ -1770,6 +1772,8 @@ int new_guest_cr3(unsigned long mfn)
old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
v->arch.guest_table = pagetable_from_pfn(mfn);
+ if ( IS_COMPAT(d) )
+ v->arch.guest_table_user = v->arch.guest_table;
update_pagetables(v); /* update shadow_table and monitor_table */
write_ptbase(v);
@@ -2882,7 +2886,7 @@ long do_update_descriptor(u64 pa, u64 de
if ( !VALID_MFN(mfn = gmfn_to_mfn(dom, gmfn)) ||
(((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
!mfn_valid(mfn) ||
- !check_descriptor(&d) )
+ !check_descriptor(dom, &d) )
{
UNLOCK_BIGLOCK(dom);
return -EINVAL;
Index: 2006-08-16/xen/arch/x86/traps.c
===================================================================
--- 2006-08-16.orig/xen/arch/x86/traps.c 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/arch/x86/traps.c 2006-08-23 09:52:35.000000000 +0200
@@ -1720,6 +1720,13 @@ void set_tss_desc(unsigned int n, void *
(unsigned long)addr,
offsetof(struct tss_struct, __cacheline_filler) - 1,
9);
+#ifdef CONFIG_COMPAT
+ _set_tssldt_desc(
+ compat_gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
+ (unsigned long)addr,
+ offsetof(struct tss_struct, __cacheline_filler) - 1,
+ 11);
+#endif
}
void __init trap_init(void)
@@ -1794,7 +1801,7 @@ long do_set_trap_table(XEN_GUEST_HANDLE(
if ( cur.address == 0 )
break;
- fixup_guest_code_selector(cur.cs);
+ fixup_guest_code_selector(current->domain, cur.cs);
memcpy(&dst[cur.vector], &cur, sizeof(cur));
Index: 2006-08-16/xen/arch/x86/x86_32/mm.c
===================================================================
--- 2006-08-16.orig/xen/arch/x86/x86_32/mm.c 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/arch/x86/x86_32/mm.c 2006-08-23 08:56:05.000000000 +0200
@@ -226,7 +226,7 @@ long do_stack_switch(unsigned long ss, u
int nr = smp_processor_id();
struct tss_struct *t = &init_tss[nr];
- fixup_guest_stack_selector(ss);
+ fixup_guest_stack_selector(current->domain, ss);
current->arch.guest_context.kernel_ss = ss;
current->arch.guest_context.kernel_sp = esp;
@@ -237,7 +237,7 @@ long do_stack_switch(unsigned long ss, u
}
/* Returns TRUE if given descriptor is valid for GDT or LDT. */
-int check_descriptor(struct desc_struct *d)
+int check_descriptor(const struct domain *dom, struct desc_struct *d)
{
unsigned long base, limit;
u32 a = d->a, b = d->b;
@@ -257,8 +257,8 @@ int check_descriptor(struct desc_struct
* gates (consider a call gate pointing at another kernel descriptor with
* DPL 0 -- this would get the OS ring-0 privileges).
*/
- if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL << 13) )
- d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL << 13);
+ if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL(dom) << 13) )
+ d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL(dom) << 13);
if ( !(b & _SEGMENT_S) )
{
@@ -280,8 +280,8 @@ int check_descriptor(struct desc_struct
/* Validate and fix up the target code selector. */
cs = a >> 16;
- fixup_guest_code_selector(cs);
- if ( !guest_gate_selector_okay(cs) )
+ fixup_guest_code_selector(dom, cs);
+ if ( !guest_gate_selector_okay(dom, cs) )
goto bad;
a = d->a = (d->a & 0xffffU) | (cs << 16);
Index: 2006-08-16/xen/arch/x86/x86_32/traps.c
===================================================================
--- 2006-08-16.orig/xen/arch/x86/x86_32/traps.c 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/arch/x86/x86_32/traps.c 2006-08-23 08:56:05.000000000 +0200
@@ -276,7 +276,7 @@ void init_int80_direct_trap(struct vcpu
* switch to the Xen stack and we need to swap back to the guest
* kernel stack before passing control to the system call entry point.
*/
- if ( TI_GET_IF(ti) || !guest_gate_selector_okay(ti->cs) ||
+ if ( TI_GET_IF(ti) || !guest_gate_selector_okay(v->domain, ti->cs) ||
supervisor_mode_kernel )
{
v->arch.int80_desc.a = v->arch.int80_desc.b = 0;
@@ -306,7 +306,7 @@ static long register_guest_callback(stru
long ret = 0;
struct vcpu *v = current;
- fixup_guest_code_selector(reg->address.cs);
+ fixup_guest_code_selector(v->domain, reg->address.cs);
switch ( reg->type )
{
Index: 2006-08-16/xen/arch/x86/x86_64/asm-offsets.c
===================================================================
--- 2006-08-16.orig/xen/arch/x86/x86_64/asm-offsets.c 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/arch/x86/x86_64/asm-offsets.c 2006-08-23 09:52:35.000000000 +0200
@@ -62,8 +62,8 @@ void __dummy__(void)
arch.guest_context.failsafe_callback_eip);
OFFSET(VCPU_syscall_addr, struct vcpu,
arch.guest_context.syscall_callback_eip);
- OFFSET(VCPU_kernel_sp, struct vcpu,
- arch.guest_context.kernel_sp);
+ OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp);
+ OFFSET(VCPU_kernel_ss, struct vcpu, arch.guest_context.kernel_ss);
OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
Index: 2006-08-16/xen/arch/x86/x86_64/mm.c
===================================================================
--- 2006-08-16.orig/xen/arch/x86/x86_64/mm.c 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/arch/x86/x86_64/mm.c 2006-08-23 08:56:05.000000000 +0200
@@ -222,7 +222,7 @@ long subarch_memory_op(int op, XEN_GUEST
long do_stack_switch(unsigned long ss, unsigned long esp)
{
- fixup_guest_stack_selector(ss);
+ fixup_guest_stack_selector(current->domain, ss);
current->arch.guest_context.kernel_ss = ss;
current->arch.guest_context.kernel_sp = esp;
return 0;
@@ -282,7 +282,7 @@ long do_set_segment_base(unsigned int wh
/* Returns TRUE if given descriptor is valid for GDT or LDT. */
-int check_descriptor(struct desc_struct *d)
+int check_descriptor(const struct domain *dom, struct desc_struct *d)
{
u32 a = d->a, b = d->b;
u16 cs;
@@ -292,8 +292,8 @@ int check_descriptor(struct desc_struct
goto good;
/* Check and fix up the DPL. */
- if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL << 13) )
- d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL << 13);
+ if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL(dom) << 13) )
+ d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL(dom) << 13);
/* All code and data segments are okay. No base/limit checking. */
if ( (b & _SEGMENT_S) )
@@ -309,8 +309,8 @@ int check_descriptor(struct desc_struct
/* Validate and fix up the target code selector. */
cs = a >> 16;
- fixup_guest_code_selector(cs);
- if ( !guest_gate_selector_okay(cs) )
+ fixup_guest_code_selector(dom, cs);
+ if ( !guest_gate_selector_okay(dom, cs) )
goto bad;
a = d->a = (d->a & 0xffffU) | (cs << 16);
Index: 2006-08-16/xen/arch/x86/x86_64/traps.c
===================================================================
--- 2006-08-16.orig/xen/arch/x86/x86_64/traps.c 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/arch/x86/x86_64/traps.c 2006-08-23 09:52:35.000000000 +0200
@@ -160,6 +160,8 @@ asmlinkage void do_double_fault(struct c
void toggle_guest_mode(struct vcpu *v)
{
+ if ( IS_COMPAT(v->domain) )
+ return;
v->arch.flags ^= TF_kernel_mode;
__asm__ __volatile__ ( "swapgs" );
update_pagetables(v);
Index: 2006-08-16/xen/common/Makefile
===================================================================
--- 2006-08-16.orig/xen/common/Makefile 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/common/Makefile 2006-08-23 08:56:05.000000000 +0200
@@ -3,6 +3,7 @@ obj-y += bitmap.o
obj-y += dom0_ops.o
obj-y += domain.o
obj-y += elf.o
+obj-$(CONFIG_COMPAT) += elf32.o
obj-y += event_channel.o
obj-y += grant_table.o
obj-y += kernel.o
Index: 2006-08-16/xen/common/elf.c
===================================================================
--- 2006-08-16.orig/xen/common/elf.c 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/common/elf.c 2006-08-23 08:56:05.000000000 +0200
@@ -29,7 +29,7 @@ int parseelfimage(struct domain_setup_in
int h, virt_base_defined, elf_pa_off_defined;
if ( !elf_sanity_check(ehdr) )
- return -EINVAL;
+ return -ENOSYS;
if ( (ehdr->e_phoff + (ehdr->e_phnum*ehdr->e_phentsize)) > dsi->image_len )
{
Index: 2006-08-16/xen/include/asm-x86/config.h
===================================================================
--- 2006-08-16.orig/xen/include/asm-x86/config.h 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/include/asm-x86/config.h 2006-08-23 10:25:12.000000000 +0200
@@ -86,6 +86,7 @@ extern unsigned long _end; /* standard E
#if defined(__x86_64__)
#define CONFIG_X86_64 1
+#define CONFIG_COMPAT 1
#define asmlinkage
@@ -175,13 +176,22 @@ extern unsigned long _end; /* standard E
#define DIRECTMAP_VIRT_START (PML4_ADDR(262))
#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + PML4_ENTRY_BYTES*2)
+#define __HYPERVISOR_COMPAT_VIRT_START 0xF5800000
+#define __MACH2PHYS_COMPAT_VIRT_START 0xF5800000
+#define __MACH2PHYS_COMPAT_VIRT_END 0xF6800000
+#define HYPERVISOR_COMPAT_VIRT_START mk_unsigned_long(__HYPERVISOR_COMPAT_VIRT_START)
+#define MACH2PHYS_COMPAT_VIRT_START mk_unsigned_long(__MACH2PHYS_COMPAT_VIRT_START)
+#define MACH2PHYS_COMPAT_VIRT_END mk_unsigned_long(__MACH2PHYS_COMPAT_VIRT_END)
+#define MACH2PHYS_COMPAT_NR_ENTRIES ((MACH2PHYS_COMPAT_VIRT_END-MACH2PHYS_COMPAT_VIRT_START)>>2)
+#define compat_machine_to_phys_mapping ((unsigned long *)MACH2PHYS_COMPAT_VIRT_START)
+
#define PGT_base_page_table PGT_l4_page_table
-#define __HYPERVISOR_CS64 0xe010
-#define __HYPERVISOR_CS32 0xe008
+#define __HYPERVISOR_CS64 0xe008
+#define __HYPERVISOR_CS32 0xe038
#define __HYPERVISOR_CS __HYPERVISOR_CS64
#define __HYPERVISOR_DS64 0x0000
-#define __HYPERVISOR_DS32 0xe018
+#define __HYPERVISOR_DS32 0xe010
#define __HYPERVISOR_DS __HYPERVISOR_DS64
/* For generic assembly code: use macros to define operation/operand sizes. */
Index: 2006-08-16/xen/include/asm-x86/desc.h
===================================================================
--- 2006-08-16.orig/xen/include/asm-x86/desc.h 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/include/asm-x86/desc.h 2006-08-23 10:27:28.000000000 +0200
@@ -18,31 +18,76 @@
#define LDT_ENTRY_SIZE 8
+#if defined(__x86_64__)
+
+#define FLAT_COMPAT_RING1_CS 0xe019 /* GDT index 259 */
+#define FLAT_COMPAT_RING1_DS 0xe021 /* GDT index 260 */
+#define FLAT_COMPAT_RING1_SS 0xe021 /* GDT index 260 */
+#define FLAT_COMPAT_RING3_CS 0xe02b /* GDT index 261 */
+#define FLAT_COMPAT_RING3_DS 0xe033 /* GDT index 262 */
+#define FLAT_COMPAT_RING3_SS 0xe033 /* GDT index 262 */
+
+#define FLAT_COMPAT_KERNEL_DS FLAT_COMPAT_RING1_DS
+#define FLAT_COMPAT_KERNEL_CS FLAT_COMPAT_RING1_CS
+#define FLAT_COMPAT_KERNEL_SS FLAT_COMPAT_RING1_SS
+#define FLAT_COMPAT_USER_DS FLAT_COMPAT_RING3_DS
+#define FLAT_COMPAT_USER_CS FLAT_COMPAT_RING3_CS
+#define FLAT_COMPAT_USER_SS FLAT_COMPAT_RING3_SS
+
+#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 2)
+
+#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
+#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
+
+#elif defined(__i386__)
+
+#define FLAT_COMPAT_KERNEL_CS FLAT_KERNEL_CS
+#define FLAT_COMPAT_KERNEL_DS FLAT_KERNEL_DS
+#define FLAT_COMPAT_KERNEL_SS FLAT_KERNEL_SS
+#define FLAT_COMPAT_USER_CS FLAT_USER_CS
+#define FLAT_COMPAT_USER_DS FLAT_USER_DS
+#define FLAT_COMPAT_USER_SS FLAT_USER_SS
+
+#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
+
+#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
+
+#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
+#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
+
+#endif
+
+#ifndef __ASSEMBLY__
+
#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (__TSS(n)<<3) )
#if defined(__x86_64__)
-#define GUEST_KERNEL_RPL 3
+#define GUEST_KERNEL_RPL(d) (!IS_COMPAT(d) ? 3 : 1)
#elif defined(__i386__)
-#define GUEST_KERNEL_RPL 1
+#define GUEST_KERNEL_RPL(d) ((void)(d), 1)
#endif
/* Fix up the RPL of a guest segment selector. */
-#define __fixup_guest_selector(sel) \
- ((sel) = (((sel) & 3) >= GUEST_KERNEL_RPL) ? (sel) : \
- (((sel) & ~3) | GUEST_KERNEL_RPL))
+#define __fixup_guest_selector(d, sel) \
+({ \
+ uint16_t _rpl = GUEST_KERNEL_RPL(d); \
+ (sel) = (((sel) & 3) >= _rpl) ? (sel) : (((sel) & ~3) | _rpl); \
+})
/* Stack selectors don't need fixing up if the kernel runs in ring 0. */
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
-#define fixup_guest_stack_selector(ss) ((void)0)
+#define fixup_guest_stack_selector(d, ss) ((void)0)
#else
-#define fixup_guest_stack_selector(ss) __fixup_guest_selector(ss)
+#define fixup_guest_stack_selector(d, ss) __fixup_guest_selector(d, ss)
#endif
/*
* Code selectors are always fixed up. It allows the Xen exit stub to detect
* return to guest context, even when the guest kernel runs in ring 0.
*/
-#define fixup_guest_code_selector(cs) __fixup_guest_selector(cs)
+#define fixup_guest_code_selector(d, cs) __fixup_guest_selector(d, cs)
/*
* We need this function because enforcing the correct guest kernel RPL is
@@ -57,11 +102,15 @@
* DPL < CPL then they'll be cleared automatically. If SS RPL or DPL differs
* from CS RPL then we'll #GP.
*/
-#define guest_gate_selector_okay(sel) \
+#define guest_gate_selector_okay(d, sel) \
((((sel)>>3) < FIRST_RESERVED_GDT_ENTRY) || /* Guest seg? */ \
- ((sel) == FLAT_KERNEL_CS) || /* Xen default seg? */ \
+ ((sel) == (!IS_COMPAT(d) ? \
+ FLAT_KERNEL_CS : /* Xen default seg? */ \
+ FLAT_COMPAT_KERNEL_CS)) || /* Xen default compat seg? */ \
((sel) & 4)) /* LDT seg? */
+#endif /* __ASSEMBLY__ */
+
/* These are bitmasks for the high 32 bits of a descriptor table entry. */
#define _SEGMENT_TYPE (15<< 8)
#define _SEGMENT_EC ( 1<<10) /* Expand-down or Conforming segment */
@@ -81,12 +130,6 @@ struct desc_struct {
#if defined(__x86_64__)
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 2)
-
-#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
-#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
-
typedef struct {
u64 a, b;
} idt_entry_t;
@@ -118,14 +161,6 @@ do {
#elif defined(__i386__)
-#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
-
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
-
-#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
-#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
-
typedef struct desc_struct idt_entry_t;
#define _set_gate(gate_addr,type,dpl,addr) \
@@ -155,6 +190,11 @@ __asm__ __volatile__ ("movw %w3,0(%2)\n\
#endif
extern struct desc_struct gdt_table[];
+#ifdef CONFIG_COMPAT
+extern struct desc_struct compat_gdt_table[];
+#else
+# define compat_gdt_table gdt_table
+#endif
struct Xgt_desc_struct {
unsigned short size;
Index: 2006-08-16/xen/include/asm-x86/ldt.h
===================================================================
--- 2006-08-16.orig/xen/include/asm-x86/ldt.h 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/include/asm-x86/ldt.h 2006-08-23 08:56:05.000000000 +0200
@@ -17,7 +17,8 @@ static inline void load_LDT(struct vcpu
else
{
cpu = smp_processor_id();
- desc = gdt_table + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY;
+ desc = (!IS_COMPAT(v->domain) ? gdt_table : compat_gdt_table)
+ + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY;
_set_tssldt_desc(desc, LDT_VIRT_START(v), ents*8-1, 2);
__asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) );
}
Index: 2006-08-16/xen/include/asm-x86/mm.h
===================================================================
--- 2006-08-16.orig/xen/include/asm-x86/mm.h 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/include/asm-x86/mm.h 2006-08-23 08:56:05.000000000 +0200
@@ -254,7 +254,7 @@ static inline int page_is_removable(stru
ASSERT(((_p)->count_info & PGC_count_mask) != 0); \
ASSERT(page_get_owner(_p) == (_d))
-int check_descriptor(struct desc_struct *d);
+int check_descriptor(const struct domain *, struct desc_struct *d);
/*
* The MPT (machine->physical mapping table) is an array of word-sized
Index: 2006-08-16/xen/include/asm-x86/regs.h
===================================================================
--- 2006-08-16.orig/xen/include/asm-x86/regs.h 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/include/asm-x86/regs.h 2006-08-23 08:56:05.000000000 +0200
@@ -38,7 +38,8 @@ enum EFLAGS {
ASSERT(diff < STACK_SIZE); \
/* If a guest frame, it must be have guest privs (unless HVM guest). */ \
/* We permit CS==0 which can come from an uninitialised trap entry. */ \
- ASSERT((diff != 0) || vm86_mode(r) || ((r->cs&3) >= GUEST_KERNEL_RPL) || \
+ ASSERT((diff != 0) || vm86_mode(r) || \
+ ((r->cs&3) >= GUEST_KERNEL_RPL(current->domain)) || \
(r->cs == 0) || hvm_guest(current)); \
/* If not a guest frame, it must be a hypervisor frame. */ \
ASSERT((diff == 0) || (!vm86_mode(r) && (r->cs == __HYPERVISOR_CS))); \
Index: 2006-08-16/xen/include/asm-x86/x86_64/regs.h
===================================================================
--- 2006-08-16.orig/xen/include/asm-x86/x86_64/regs.h 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/include/asm-x86/x86_64/regs.h 2006-08-23 08:56:05.000000000 +0200
@@ -11,7 +11,9 @@
#define ring_3(r) (((r)->cs & 3) == 3)
#define guest_kernel_mode(v, r) \
- (ring_3(r) && ((v)->arch.flags & TF_kernel_mode))
+ (!IS_COMPAT(v->domain) ? \
+ ring_3(r) && ((v)->arch.flags & TF_kernel_mode) : \
+ ring_1(r))
#define permit_softint(dpl, v, r) \
((dpl) >= (guest_kernel_mode(v, r) ? 1 : 3))
Index: 2006-08-16/xen/include/public/arch-x86_64.h
===================================================================
--- 2006-08-16.orig/xen/include/public/arch-x86_64.h 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/include/public/arch-x86_64.h 2006-08-23 10:19:11.000000000 +0200
@@ -176,7 +176,10 @@ DEFINE_XEN_GUEST_HANDLE(trap_info_t);
#ifdef __GNUC__
/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */
-#define __DECL_REG(name) union { uint64_t r ## name, e ## name; }
+#define __DECL_REG(name) union { \
+ uint64_t r ## name, e ## name; \
+ uint32_t _e ## name; \
+}
#else
/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */
#define __DECL_REG(name) uint64_t r ## name
@@ -255,6 +258,8 @@ struct vcpu_guest_context {
uint64_t fs_base;
uint64_t gs_base_kernel;
uint64_t gs_base_user;
+ unsigned int event_callback_cs; /* compat CS of event callback */
+ unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */
};
typedef struct vcpu_guest_context vcpu_guest_context_t;
DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
Index: 2006-08-16/xen/include/xen/elf.h
===================================================================
--- 2006-08-16.orig/xen/include/xen/elf.h 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/include/xen/elf.h 2006-08-23 08:56:05.000000000 +0200
@@ -529,6 +529,12 @@ struct domain_setup_info;
extern int loadelfimage(struct domain_setup_info *);
extern int parseelfimage(struct domain_setup_info *);
+#ifdef CONFIG_COMPAT
+extern int loadelf32image(struct domain_setup_info *);
+extern int parseelf32image(struct domain_setup_info *);
+extern int elf32_sanity_check(Elf32_Ehdr *ehdr);
+#endif
+
#ifdef Elf_Ehdr
extern int elf_sanity_check(Elf_Ehdr *ehdr);
#endif
Index: 2006-08-16/xen/include/xen/sched.h
===================================================================
--- 2006-08-16.orig/xen/include/xen/sched.h 2006-08-23 08:53:32.000000000 +0200
+++ 2006-08-16/xen/include/xen/sched.h 2006-08-23 08:56:05.000000000 +0200
@@ -404,6 +404,9 @@ extern struct domain *domain_list;
/* Domain is paused by the hypervisor? */
#define _DOMF_paused 6
#define DOMF_paused (1UL<<_DOMF_paused)
+ /* Domain is a compatibility one? */
+#define _DOMF_compat 7
+#define DOMF_compat (1UL<<_DOMF_compat)
static inline int vcpu_runnable(struct vcpu *v)
{
@@ -434,6 +437,13 @@ static inline void vcpu_unblock(struct v
#define IS_PRIV(_d) \
(test_bit(_DOMF_privileged, &(_d)->domain_flags))
+#ifdef CONFIG_COMPAT
+#define IS_COMPAT(_d) \
+ (test_bit(_DOMF_compat, &(_d)->domain_flags))
+#else
+#define IS_COMPAT(_d) 0
+#endif
+
#define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))
#endif /* __SCHED_H__ */
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 6/7, RFC] x86_64: basic changes for supporting compatibility mode guest
2006-08-23 9:16 [PATCH 6/7, RFC] x86_64: basic changes for supporting compatibility mode guest Jan Beulich
@ 2006-08-23 9:40 ` Keir Fraser
2006-08-23 10:17 ` Jan Beulich
2006-08-23 10:32 ` Jan Beulich
0 siblings, 2 replies; 8+ messages in thread
From: Keir Fraser @ 2006-08-23 9:40 UTC (permalink / raw)
To: Jan Beulich, xen-devel
On 23/8/06 10:16 am, "Jan Beulich" <jbeulich@novell.com> wrote:
> This changes the ABI in adding to vcpu_guest_context - does a change like
> this require bumping the ABI version?
We can't do this as the struct is part of the domU ABI (used for
initialising secondary VCPUs). You'll have to add the extra CS values to
64-bit arch_vcpu, I think.
-- Keir
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 6/7, RFC] x86_64: basic changes for supporting compatibility mode guest
2006-08-23 9:40 ` Keir Fraser
@ 2006-08-23 10:17 ` Jan Beulich
2006-08-23 10:36 ` Keir Fraser
2006-08-23 10:32 ` Jan Beulich
1 sibling, 1 reply; 8+ messages in thread
From: Jan Beulich @ 2006-08-23 10:17 UTC (permalink / raw)
To: Keir Fraser, xen-devel
>>> Keir Fraser <Keir.Fraser@cl.cam.ac.uk> 23.08.06 11:40 >>>
>On 23/8/06 10:16 am, "Jan Beulich" <jbeulich@novell.com> wrote:
>
>> This changes the ABI in adding to vcpu_guest_context - does a change like
>> this require bumping the ABI version?
>
>We can't do this as the struct is part of the domU ABI (used for
>initialising secondary VCPUs). You'll have to add the extra CS values to
>64-bit arch_vcpu, I think.
Then libxc/xc_linux_build.c (after appropriate adjustment) wouldn't have
a way to communicate these for a new domain. If extending the structure
isn't possible at all, then we'll either have to make event_callback_eip and
failsafe_callback_eip unions (permitting a selector:offset pair) or make
syscall_callback_eip a union (permitting storing the selectors). I'd favor
the second option as that field is entirely useless as long as x86_32
doesn't support syscall (which doesn't make sense as it would make
things slower rather than speeding them up) - that way one doesn't have
to be careful to not access the other two full 64bit *_callback_eip
fields.
Jan
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 6/7, RFC] x86_64: basic changes for supporting compatibility mode guest
2006-08-23 9:40 ` Keir Fraser
2006-08-23 10:17 ` Jan Beulich
@ 2006-08-23 10:32 ` Jan Beulich
2006-08-23 10:33 ` Keir Fraser
1 sibling, 1 reply; 8+ messages in thread
From: Jan Beulich @ 2006-08-23 10:32 UTC (permalink / raw)
To: Keir Fraser, xen-devel
There was one more issue I forgot to mention: We must disable execution of
'syscall' in some way for compatibility domains. The question is whether to
add respective detection code early into the syscall path (which would affect
performance-wise all 64-bit domains) or whether to {dis,en}able the use of
the instruction in the context switch code by updating EFER as needed
(which would have a performance effect only when switching between
a native and a compatibility domain, but the performance effect would likely
be significantly greater). I'd prefer the second option, not the least because
its implementation is significantly easier.
Jan
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 6/7, RFC] x86_64: basic changes for supporting compatibility mode guest
2006-08-23 10:32 ` Jan Beulich
@ 2006-08-23 10:33 ` Keir Fraser
0 siblings, 0 replies; 8+ messages in thread
From: Keir Fraser @ 2006-08-23 10:33 UTC (permalink / raw)
To: Jan Beulich, xen-devel
On 23/8/06 11:32 am, "Jan Beulich" <jbeulich@novell.com> wrote:
> There was one more issue I forgot to mention: We must disable execution of
> 'syscall' in some way for compatibility domains. The question is whether to
> add respective detection code early into the syscall path (which would affect
> performance-wise all 64-bit domains) or whether to {dis,en}able the use of
> the instruction in the context switch code by updating EFER as needed
> (which would have a performance effect only when switching between
> a native and a compatibility domain, but the performance effect would likely
> be significantly greater). I'd prefer the second option, not the least because
> its implementation is significantly easier.
I'm not fussed either way really. It depends how slow it is to update EFER.
That'd be easy to check.
-- Keir
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 6/7, RFC] x86_64: basic changes for supporting compatibility mode guest
2006-08-23 10:17 ` Jan Beulich
@ 2006-08-23 10:36 ` Keir Fraser
2006-08-23 11:10 ` Jan Beulich
2006-08-23 11:44 ` Gerd Hoffmann
0 siblings, 2 replies; 8+ messages in thread
From: Keir Fraser @ 2006-08-23 10:36 UTC (permalink / raw)
To: Jan Beulich, xen-devel
On 23/8/06 11:17 am, "Jan Beulich" <jbeulich@novell.com> wrote:
> Then libxc/xc_linux_build.c (after appropriate adjustment) wouldn't have
> a way to communicate these for a new domain. If extending the structure
> isn't possible at all, then we'll either have to make event_callback_eip and
> failsafe_callback_eip unions (permitting a selector:offset pair) or make
> syscall_callback_eip a union (permitting storing the selectors). I'd favor
> the second option as that field is entirely useless as long as x86_32
> doesn't support syscall (which doesn't make sense as it would make
> things slower rather than speeding them up) - that way one doesn't have
> to be careful to not access the other two full 64bit *_callback_eip
> fields.
If we do 32-bit dom0 kernel then the tools will pick up the 32-bit version
of that structure. So this is only an issue for userspace if we want 64-bit
dom0 to be able to build 32-bit domU's. I suppose this would be nice to
have.
Obvious thing to do is suffix all the structs and defns in arch-x86_foo.h
with _32 or _64 as appropriate. Then, at the end of the header, we define
the non-suffixed versions only if defined(__i386__) or __defined__(x86_64)
(as appropriate).
This avoids breaking the domU API unnecessarily but allows those who want to
make the distinction to use the suffixed versions.
-- Keir
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 6/7, RFC] x86_64: basic changes for supporting compatibility mode guest
2006-08-23 10:36 ` Keir Fraser
@ 2006-08-23 11:10 ` Jan Beulich
2006-08-23 11:44 ` Gerd Hoffmann
1 sibling, 0 replies; 8+ messages in thread
From: Jan Beulich @ 2006-08-23 11:10 UTC (permalink / raw)
To: Keir Fraser, xen-devel
>>> Keir Fraser <Keir.Fraser@cl.cam.ac.uk> 23.08.06 12:36 >>>
>On 23/8/06 11:17 am, "Jan Beulich" <jbeulich@novell.com> wrote:
>> Then libxc/xc_linux_build.c (after appropriate adjustment) wouldn't have
>> a way to communicate these for a new domain. If extending the structure
>> isn't possible at all, then we'll either have to make event_callback_eip and
>> failsafe_callback_eip unions (permitting a selector:offset pair) or make
>> syscall_callback_eip a union (permitting storing the selectors). I'd favor
>> the second option as that field is entirely useless as long as x86_32
>> doesn't support syscall (which doesn't make sense as it would make
>> things slower rather than speeding them up) - that way one doesn't have
>> to be careful to not access the other two full 64bit *_callback_eip
>> fields.
>
>If we do 32-bit dom0 kernel then the tools will pick up the 32-bit version
>of that structure. So this is only an issue for userspace if we want 64-bit
>dom0 to be able to build 32-bit domU's. I suppose this would be nice to
>have.
Of course. From NetWare's perspective it is even more than just 'nice to
have'.
>Obvious thing to do is suffix all the structs and defns in arch-x86_foo.h
>with _32 or _64 as appropriate. Then, at the end of the header, we define
>the non-suffixed versions only if defined(__i386__) or __defined__(x86_64)
>(as appropriate).
>
>This avoids breaking the domU API unnecessarily but allows those who want to
>make the distinction to use the suffixed versions.
Implying that you would add an extra hypercall sub-functions that you could
pass in a non-native structures? Doesn't seem too nice to me.
Also, all the public headers will anyway need to be converted to compatibility
ones (the next thing I intend to do actually), so converting arch-x86_32.h
will come as a by-product, and handling the compatibility structure will be
purely a job of the compatibility hypercalls, except for the need to add a
compatibility flag to the domain creation request and to handle dual-purpose
fields like the ones talked about above under IS_COMPAT() in the native
hypercall.
Jan
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 6/7, RFC] x86_64: basic changes for supporting compatibility mode guest
2006-08-23 10:36 ` Keir Fraser
2006-08-23 11:10 ` Jan Beulich
@ 2006-08-23 11:44 ` Gerd Hoffmann
1 sibling, 0 replies; 8+ messages in thread
From: Gerd Hoffmann @ 2006-08-23 11:44 UTC (permalink / raw)
To: Keir Fraser; +Cc: xen-devel, Jan Beulich
Hi,
> If we do 32-bit dom0 kernel then the tools will pick up the 32-bit version
> of that structure. So this is only an issue for userspace if we want 64-bit
> dom0 to be able to build 32-bit domU's. I suppose this would be nice to
> have.
I think we certainly want to have that. I'd suggest to have a look at
my kexec bits:
http://www.suse.de/~kraxel/xen/kexec.html
http://www.suse.de/~kraxel/patches/
They include a largely rewritten domain builder, adding support for
loading both 32bit and 64bit kernels with the 64bit tools should be easy.
> Obvious thing to do is suffix all the structs and defns in arch-x86_foo.h
> with _32 or _64 as appropriate. Then, at the end of the header, we define
> the non-suffixed versions only if defined(__i386__) or __defined__(x86_64)
> (as appropriate).
I'd suggest to suffix them with $arch instead, so we have the option to
handle even ia64 on x86 machines.
cheers,
Gerd
--
Gerd Hoffmann <kraxel@suse.de>
http://www.suse.de/~kraxel/julika-dora.jpeg
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2006-08-23 11:44 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-08-23 9:16 [PATCH 6/7, RFC] x86_64: basic changes for supporting compatibility mode guest Jan Beulich
2006-08-23 9:40 ` Keir Fraser
2006-08-23 10:17 ` Jan Beulich
2006-08-23 10:36 ` Keir Fraser
2006-08-23 11:10 ` Jan Beulich
2006-08-23 11:44 ` Gerd Hoffmann
2006-08-23 10:32 ` Jan Beulich
2006-08-23 10:33 ` Keir Fraser
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.