* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
@ 2008-03-05 22:15 Ky Srinivasan
2008-03-05 22:28 ` Daniel P. Berrange
2008-03-06 7:28 ` Keir Fraser
0 siblings, 2 replies; 15+ messages in thread
From: Ky Srinivasan @ 2008-03-05 22:15 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 379 bytes --]
I am attaching updated versions of the patches that I posted a couple of weeks ago. These have been merged up to the current unstable tip: changeset 17186:854b0704962b
These patches have been tested on the unstable tip.
Kier, what are your thoughts on accepting these patches.
Signed-off by K. Y. Srinivasan (ksrinivasan@novell.com)
Regards,
K. Y
[-- Attachment #2: ns_tools.patch --]
[-- Type: text/plain, Size: 4957 bytes --]
Index: xen-unstable.hg/tools/python/xen/lowlevel/xc/xc.c
===================================================================
--- xen-unstable.hg.orig/tools/python/xen/lowlevel/xc/xc.c
+++ xen-unstable.hg/tools/python/xen/lowlevel/xc/xc.c
@@ -622,14 +622,14 @@ static PyObject *pyxc_hvm_build(XcObject
int i;
#endif
char *image;
- int memsize, vcpus = 1, acpi = 0, apic = 1;
+ int memsize, vcpus = 1, acpi = 0, apic = 1, extid = 0;
static char *kwd_list[] = { "domid",
- "memsize", "image", "vcpus", "acpi",
+ "memsize", "image", "vcpus", "extid", "acpi",
"apic", NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iii", kwd_list,
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiii", kwd_list,
&dom, &memsize,
- &image, &vcpus, &acpi, &apic) )
+ &image, &vcpus, &extid, &acpi, &apic) )
return NULL;
if ( xc_hvm_build(self->xc_handle, dom, memsize, image) != 0 )
@@ -654,6 +654,7 @@ static PyObject *pyxc_hvm_build(XcObject
va_hvm->checksum = -sum;
munmap(va_map, XC_PAGE_SIZE);
#endif
+ xc_set_hvm_param(self->xc_handle, dom, HVM_PARAM_EXTEND_HYPERVISOR, extid);
return Py_BuildValue("{}");
}
Index: xen-unstable.hg/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-unstable.hg.orig/tools/python/xen/xend/XendConfig.py
+++ xen-unstable.hg/tools/python/xen/xend/XendConfig.py
@@ -126,7 +126,7 @@ LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(
# Platform configuration keys.
XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'loader', 'display',
'fda', 'fdb', 'keymap', 'isa', 'localtime', 'monitor',
- 'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
+ 'nographic', 'pae', 'extid', 'rtc_timeoffset', 'serial', 'sdl',
'soundhw','stdvga', 'usb', 'usbdevice', 'hpet', 'vnc',
'vncconsole', 'vncdisplay', 'vnclisten', 'timer_mode',
'vncpasswd', 'vncunused', 'xauthority', 'pci', 'vhpt',
Index: xen-unstable.hg/tools/python/xen/xend/image.py
===================================================================
--- xen-unstable.hg.orig/tools/python/xen/xend/image.py
+++ xen-unstable.hg/tools/python/xen/xend/image.py
@@ -504,6 +504,7 @@ class HVMImageHandler(ImageHandler):
self.apic = int(vmConfig['platform'].get('apic', 0))
self.acpi = int(vmConfig['platform'].get('acpi', 0))
+ self.extid = int(vmConfig['platform'].get('extid', 0))
self.guest_os_type = vmConfig['platform'].get('guest_os_type')
# Return a list of cmd line args to the device models based on the
@@ -602,6 +603,7 @@ class HVMImageHandler(ImageHandler):
log.debug("store_evtchn = %d", store_evtchn)
log.debug("memsize = %d", mem_mb)
log.debug("vcpus = %d", self.vm.getVCpuCount())
+ log.debug("extid = %d", self.extid)
log.debug("acpi = %d", self.acpi)
log.debug("apic = %d", self.apic)
@@ -609,6 +611,7 @@ class HVMImageHandler(ImageHandler):
image = self.loader,
memsize = mem_mb,
vcpus = self.vm.getVCpuCount(),
+ extid = self.extid,
acpi = self.acpi,
apic = self.apic)
rc['notes'] = { 'SUSPEND_CANCEL': 1 }
Index: xen-unstable.hg/tools/python/xen/xm/create.py
===================================================================
--- xen-unstable.hg.orig/tools/python/xen/xm/create.py
+++ xen-unstable.hg/tools/python/xen/xm/create.py
@@ -207,6 +207,10 @@ gopts.var('timer_mode', val='TIMER_MODE'
use="""Timer mode (0=delay virtual time when ticks are missed;
1=virtual time is always wallclock time.""")
+gopts.var('extid', val='EXTID',
+ fn=set_int, default=0,
+ use="Specify extention ID for a HVM domain.")
+
gopts.var('acpi', val='ACPI',
fn=set_int, default=1,
use="Disable or enable ACPI of HVM domain.")
@@ -740,7 +744,7 @@ def configure_vifs(config_devs, vals):
def configure_hvm(config_image, vals):
"""Create the config for HVM devices.
"""
- args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb', 'timer_mode',
+ args = [ 'device_model', 'pae', 'extid', 'vcpus', 'boot', 'fda', 'fdb', 'timer_mode',
'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw',
'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor',
[-- Attachment #3: ns_xen_base.patch --]
[-- Type: text/plain, Size: 12589 bytes --]
%patch
Index: xen-unstable.hg/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/hvm/svm/svm.c 2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/hvm/svm/svm.c 2008-03-03 14:03:00.000000000 -0500
@@ -50,6 +50,7 @@
#include <asm/hvm/vpt.h>
#include <asm/hvm/trace.h>
#include <asm/hap.h>
+#include <asm/hvm/hvm_extensions.h>
u32 svm_feature_flags;
@@ -941,12 +942,13 @@
static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs)
{
- unsigned int eax, ebx, ecx, edx, inst_len;
+ unsigned int input, eax, ebx, ecx, edx, inst_len;
eax = regs->eax;
ebx = regs->ebx;
ecx = regs->ecx;
edx = regs->edx;
+ input = eax;
svm_cpuid_intercept(&eax, &ebx, &ecx, &edx);
@@ -955,6 +957,7 @@
regs->ecx = ecx;
regs->edx = edx;
+ ext_intercept_do_cpuid(input, regs);
inst_len = __get_instruction_length(current, INSTR_CPUID, NULL);
__update_guest_eip(regs, inst_len);
}
@@ -972,6 +975,10 @@
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ if (ext_intercept_do_msr_read(ecx, regs))
+ {
+ goto done;
+ }
switch ( ecx )
{
case MSR_IA32_TSC:
@@ -1073,6 +1080,10 @@
msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
hvmtrace_msr_write(v, ecx, msr_content);
+ if (ext_intercept_do_msr_write(ecx, regs))
+ {
+ goto done_msr_write;
+ }
switch ( ecx )
{
@@ -1129,7 +1140,7 @@
}
break;
}
-
+done_msr_write:
return X86EMUL_OKAY;
gpf:
Index: xen-unstable.hg/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/hvm/vmx/vmx.c 2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/hvm/vmx/vmx.c 2008-03-04 18:37:57.000000000 -0500
@@ -49,6 +49,7 @@
#include <asm/hvm/vpt.h>
#include <public/hvm/save.h>
#include <asm/hvm/trace.h>
+#include <asm/hvm/hvm_extensions.h>
enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
@@ -1169,12 +1170,13 @@
static void vmx_do_cpuid(struct cpu_user_regs *regs)
{
- unsigned int eax, ebx, ecx, edx;
+ unsigned int eax, ebx, ecx, edx, input;
eax = regs->eax;
ebx = regs->ebx;
ecx = regs->ecx;
edx = regs->edx;
+ input = eax;
vmx_cpuid_intercept(&eax, &ebx, &ecx, &edx);
@@ -1182,6 +1184,7 @@
regs->ebx = ebx;
regs->ecx = ecx;
regs->edx = edx;
+ ext_intercept_do_cpuid(input, regs);
}
#define CASE_GET_REG_P(REG, reg) \
@@ -1468,6 +1471,9 @@
HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
+ if (ext_intercept_do_msr_read(ecx, regs))
+ goto done;
+
switch ( ecx )
{
case MSR_IA32_TSC:
@@ -1659,6 +1665,9 @@
HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x",
ecx, (u32)regs->eax, (u32)regs->edx);
+ if (ext_intercept_do_msr_write(ecx, regs))
+ return X86EMUL_OKAY;
+
msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
hvmtrace_msr_write(v, ecx, msr_content);
Index: xen-unstable.hg/xen/include/asm-x86/hvm/domain.h
===================================================================
--- xen-unstable.hg.orig/xen/include/asm-x86/hvm/domain.h 2008-03-03 11:32:36.000000000 -0500
+++ xen-unstable.hg/xen/include/asm-x86/hvm/domain.h 2008-03-03 14:03:00.000000000 -0500
@@ -79,6 +79,8 @@
#endif
bool_t hap_enabled;
bool_t qemu_mapcache_invalidate;
+ void *ext_handle; /* will be NULL on creation (memset)*/
+ struct extension_intercept_vector *ext_vector;
};
#endif /* __ASM_X86_HVM_DOMAIN_H__ */
Index: xen-unstable.hg/xen/include/public/hvm/params.h
===================================================================
--- xen-unstable.hg.orig/xen/include/public/hvm/params.h 2008-03-03 11:32:39.000000000 -0500
+++ xen-unstable.hg/xen/include/public/hvm/params.h 2008-03-03 14:03:00.000000000 -0500
@@ -50,10 +50,12 @@
#define HVM_PARAM_BUFIOREQ_PFN 6
+#define HVM_PARAM_EXTEND_HYPERVISOR 7
+
#ifdef __ia64__
-#define HVM_PARAM_NVRAM_FD 7
-#define HVM_PARAM_VHPT_SIZE 8
-#define HVM_PARAM_BUFPIOREQ_PFN 9
+#define HVM_PARAM_NVRAM_FD 8
+#define HVM_PARAM_VHPT_SIZE 9
+#define HVM_PARAM_BUFPIOREQ_PFN 10
#endif
/*
@@ -75,15 +77,16 @@
* Missed interrupts are collapsed together and delivered as one 'late tick'.
* Guest time always tracks wallclock (i.e., real) time.
*/
-#define HVM_PARAM_TIMER_MODE 10
+//KYS Check the modifications done to this file
+#define HVM_PARAM_TIMER_MODE 11
#define HVMPTM_delay_for_missed_ticks 0
#define HVMPTM_no_delay_for_missed_ticks 1
#define HVMPTM_no_missed_ticks_pending 2
#define HVMPTM_one_missed_tick_pending 3
/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
-#define HVM_PARAM_HPET_ENABLED 11
+#define HVM_PARAM_HPET_ENABLED 12
-#define HVM_NR_PARAMS 12
+#define HVM_NR_PARAMS 13
#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
Index: xen-unstable.hg/xen/arch/x86/hvm/Makefile
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/hvm/Makefile 2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/hvm/Makefile 2008-03-03 14:03:00.000000000 -0500
@@ -1,5 +1,6 @@
subdir-y += svm
subdir-y += vmx
+subdir-y += hvm_ext
obj-y += emulate.o
obj-y += hvm.o
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/hvm/hvm.c 2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm.c 2008-03-03 14:03:00.000000000 -0500
@@ -43,6 +43,7 @@
#include <asm/mc146818rtc.h>
#include <asm/spinlock.h>
#include <asm/hvm/hvm.h>
+#include <asm/hvm/hvm_extensions.h>
#include <asm/hvm/vpt.h>
#include <asm/hvm/support.h>
#include <asm/hvm/cacheattr.h>
@@ -119,6 +120,7 @@
rtc_migrate_timers(v);
hpet_migrate_timers(v);
pt_migrate(v);
+ ext_intercept_do_migrate_timers(v);
}
void hvm_do_resume(struct vcpu *v)
@@ -279,6 +281,7 @@
void hvm_domain_destroy(struct domain *d)
{
+ ext_intercept_domain_destroy(d);
hvm_funcs.domain_destroy(d);
vioapic_deinit(d);
hvm_destroy_cacheattr_region_list(d);
@@ -562,8 +565,14 @@
{
int rc;
+ if ((rc = ext_intercept_vcpu_initialize(v)) != 0)
+ goto fail1;
+
if ( (rc = vlapic_init(v)) != 0 )
+ {
+ ext_intercept_vcpu_destroy(v);
goto fail1;
+ }
if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
goto fail2;
@@ -611,12 +620,14 @@
hvm_funcs.vcpu_destroy(v);
fail2:
vlapic_destroy(v);
+ ext_intercept_vcpu_destroy(v);
fail1:
return rc;
}
void hvm_vcpu_destroy(struct vcpu *v)
{
+ ext_intercept_vcpu_destroy(v);
vlapic_destroy(v);
hvm_funcs.vcpu_destroy(v);
@@ -1787,6 +1798,10 @@
case 0:
break;
}
+ if (ext_intercept_do_hypercall(regs))
+ {
+ return HVM_HCALL_completed;
+ }
if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
{
@@ -1928,6 +1943,7 @@
vcpu_wake(v);
gdprintk(XENLOG_INFO, "AP %d bringup succeeded.\n", vcpuid);
+ ext_intercept_vcpu_up(v);
return 0;
}
@@ -2171,6 +2187,9 @@
if ( a.value > HVMPTM_one_missed_tick_pending )
goto param_fail;
break;
+ case HVM_PARAM_EXTEND_HYPERVISOR:
+ if (hvm_ext_bind(d, (int)a.value))
+ goto param_fail;
}
d->arch.hvm_domain.params[a.index] = a.value;
rc = 0;
Index: xen-unstable.hg/xen/arch/x86/x86_64/asm-offsets.c
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/x86_64/asm-offsets.c 2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/x86_64/asm-offsets.c 2008-03-03 14:03:00.000000000 -0500
@@ -146,4 +146,7 @@
BLANK();
OFFSET(CPUINFO_ext_features, struct cpuinfo_x86, x86_capability[1]);
+ BLANK();
+
+ OFFSET(DOM_ext_vector, struct domain, arch.hvm_domain.ext_vector);
}
Index: xen-unstable.hg/xen/arch/x86/hvm/vmx/x86_64/exits.S
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/hvm/vmx/x86_64/exits.S 2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/hvm/vmx/x86_64/exits.S 2008-03-03 14:03:00.000000000 -0500
@@ -112,6 +112,14 @@
ALIGN
ENTRY(vmx_asm_do_vmentry)
GET_CURRENT(%rbx)
+ mov VCPU_domain(%rbx),%rax
+ mov DOM_ext_vector(%rax),%rdx
+ test %rdx,%rdx
+ je vmx_no_ext_vector
+ sti
+ callq *(%rdx)
+vmx_no_ext_vector:
+
cli # tests must not race interrupts
movl VCPU_processor(%rbx),%eax
Index: xen-unstable.hg/xen/arch/x86/hvm/svm/x86_64/exits.S
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/hvm/svm/x86_64/exits.S 2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/hvm/svm/x86_64/exits.S 2008-03-03 14:03:00.000000000 -0500
@@ -37,6 +37,14 @@
ENTRY(svm_asm_do_resume)
GET_CURRENT(%rbx)
+ mov VCPU_domain(%rbx),%rax
+ mov DOM_ext_vector(%rax),%rdx
+ test %rdx,%rdx
+ je svm_no_ext_vector
+ sti
+ callq *(%rdx)
+svm_no_ext_vector:
+
CLGI
movl VCPU_processor(%rbx),%eax
Index: xen-unstable.hg/xen/arch/x86/hvm/save.c
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/hvm/save.c 2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/hvm/save.c 2008-03-03 14:03:00.000000000 -0500
@@ -23,6 +23,8 @@
#include <asm/hvm/support.h>
#include <public/hvm/save.h>
+#include <public/hvm/params.h>
+#include <asm/hvm/hvm_extensions.h>
void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr)
{
@@ -31,8 +33,7 @@
/* Save some CPUID bits */
cpuid(1, &eax, &ebx, &ecx, &edx);
hdr->cpuid = eax;
-
- hdr->pad0 = 0;
+ hdr->ext_id = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
}
int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
@@ -61,6 +62,9 @@
/* VGA state is not saved/restored, so we nobble the cache. */
d->arch.hvm_domain.stdvga.cache = 0;
+ d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] = hdr->ext_id;
+ if (hvm_ext_bind(d, hdr->ext_id))
+ return -1;
return 0;
}
Index: xen-unstable.hg/xen/include/public/arch-x86/hvm/save.h
===================================================================
--- xen-unstable.hg.orig/xen/include/public/arch-x86/hvm/save.h 2008-03-03 11:32:39.000000000 -0500
+++ xen-unstable.hg/xen/include/public/arch-x86/hvm/save.h 2008-03-03 14:03:00.000000000 -0500
@@ -38,7 +38,7 @@
uint32_t version; /* File format version */
uint64_t changeset; /* Version of Xen that saved this file */
uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */
- uint32_t pad0;
+ uint32_t ext_id; /* extension ID */
};
DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header);
@@ -422,9 +422,30 @@
DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr);
+struct hvm_ns_veridian_dom {
+ uint64_t guestid_msr;
+ uint64_t hypercall_msr;
+ uint32_t long_mode;
+ uint32_t pad0;
+};
+DECLARE_HVM_SAVE_TYPE(NS_VERIDIAN_DOM, 15, struct hvm_ns_veridian_dom);
+
+struct hvm_ns_veridian_cpu {
+ uint64_t control_msr;
+ uint64_t version_msr;
+ uint64_t sief_msr;
+ uint64_t simp_msr;
+ uint64_t eom_msr;
+ uint64_t int_msr[16];
+ struct {
+ uint64_t config;
+ uint64_t count;
+ } timers[4];
+};
+DECLARE_HVM_SAVE_TYPE(NS_VERIDIAN_CPU, 16, struct hvm_ns_veridian_cpu);
/*
* Largest type-code in use
*/
-#define HVM_SAVE_CODE_MAX 14
+#define HVM_SAVE_CODE_MAX 16
#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */
[-- Attachment #4: ns_xen_extension.patch --]
[-- Type: text/plain, Size: 126416 bytes --]
%patch
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/Makefile
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/Makefile 2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,3 @@
+subdir-y += novell
+
+obj-y += hvm_ext.o
Index: xen-unstable.hg/xen/include/asm-x86/hvm/hvm_extensions.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/include/asm-x86/hvm/hvm_extensions.h 2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,252 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * hvm_extensions.h
+ * This file implements a framework for extending the hypervisor
+ * functionality in a modular fashion. The framework is comprised of
+ * two components: A) A set of intercepts that will allow the extension
+ * module to implement its functionality by intercepting the corresponding
+ * code paths in Xen and B) A controlled runtime for the extension module.
+ * Initially the goal was to pacakage the extension module as a boot-time
+ * loadable module. This may not be the way we wend up packaging it.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef HVM_EXTENSION_H
+#define HVM_EXTENSION_H
+
+#include <xen/sched.h>
+#include <asm/domain.h>
+#include <xen/timer.h>
+#include <xen/time.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/domain.h>
+
+
+
+/*
+ * Hypervisor extension hooks.
+ */
+typedef struct extension_intercept_vector {
+ /* Do not move the first field (do_continuation). Offset
+ * hardcoded in assembly files exits.S (VMX and SVM).
+ */
+ void (*do_continuation)(void);
+ int (*domain_create)(struct domain *d);
+ void (*domain_destroy)(struct domain *d);
+ int (*vcpu_initialize)(struct vcpu *v);
+ void (*vcpu_destroy)(struct vcpu *v);
+ int (*do_cpuid)(uint32_t idx, struct cpu_user_regs *regs);
+ int (*do_msr_read)(uint32_t idx, struct cpu_user_regs *regs);
+ int (*do_msr_write)(uint32_t idx, struct cpu_user_regs *regs);
+ int (*do_hypercall)(struct cpu_user_regs *pregs);
+ void (*do_migrate_timers)(struct vcpu *v);
+ void (*vcpu_up)(struct vcpu *v);
+} extension_intercept_vector_t;
+
+static inline int
+ext_intercept_domain_create(struct domain *d)
+{
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.ext_vector->domain_create(d));
+ }
+ return (0);
+}
+
+static inline void
+ext_intercept_domain_destroy(struct domain *d)
+{
+ if (d->arch.hvm_domain.ext_vector) {
+ d->arch.hvm_domain.ext_vector->domain_destroy(d);
+ }
+}
+
+static inline int
+ext_intercept_vcpu_initialize(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.ext_vector->vcpu_initialize(v));
+ }
+ return (0);
+}
+
+
+static inline void
+ext_intercept_vcpu_up(struct vcpu *v)
+{
+ struct domain *d = current->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.
+ ext_vector->vcpu_up(v));
+ }
+}
+
+static inline void
+ext_intercept_vcpu_destroy(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ d->arch.hvm_domain.ext_vector->vcpu_destroy(v);
+ }
+}
+
+static inline int
+ext_intercept_do_cpuid(uint32_t idx, struct cpu_user_regs *regs)
+{
+ struct domain *d = current->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.ext_vector->do_cpuid(
+ idx, regs));
+ }
+ return (0);
+}
+
+static inline int
+ext_intercept_do_msr_read(uint32_t idx, struct cpu_user_regs *regs)
+{
+ struct domain *d = current->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.
+ ext_vector->do_msr_read(idx, regs));
+ }
+ return (0);
+}
+static inline int
+ext_intercept_do_msr_write(uint32_t idx, struct cpu_user_regs *regs)
+{
+ struct domain *d = current->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.
+ ext_vector->do_msr_write(idx, regs));
+ }
+ return (0);
+}
+
+static inline int
+ext_intercept_do_hypercall(struct cpu_user_regs *regs)
+{
+ struct domain *d = current->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.
+ ext_vector->do_hypercall(regs));
+ }
+ return (0);
+}
+
+static inline void
+ext_intercept_do_migrate_timers(struct vcpu *v)
+{
+ struct domain *d = current->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.
+ ext_vector->do_migrate_timers(v));
+ }
+}
+
+static inline void
+ext_intercept_do_continuation(void)
+{
+ struct domain *d = current->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ d->arch.hvm_domain.
+ ext_vector->do_continuation();
+ }
+}
+
+/*
+ * Base hypervisor support available to extension modules.
+ * We may choose to do away with this level of indirection!
+ * It may still be useful to have a controlled environment for the
+ * extension modules.
+ */
+typedef struct xen_call_vector {
+ /*
+ * We may want to embed version/compiler info here to avoid mismatches
+ */
+ struct hvm_function_table *hvmFuncTable;
+ struct hvm_mmio_handler *mmIoHandler;
+ void (*extPanic)(const char *s, ...);
+ void (*extPrintk)(const char *format, ...);
+ void (*extPostInterrupt)(struct vcpu *v, int vector, int type);
+ void (*extSetTimer)(struct timer *timer, s_time_t expires);
+ s_time_t (*extGetTimeSinceBoot)(void);
+ void * (*extGetVirtFromGmfn)(struct domain *d, unsigned long gmfn);
+ unsigned long (*extGetMfnFromGmfn)(struct domain *d, unsigned long gmfn);
+ unsigned long (*extGetMfnFromGva)(unsigned long va);
+ void (*extUnmapDomainPage)(void *p);
+ void *(*extAllocMem)(size_t size);
+ void (*extFreeMem)(void *ptr);
+ enum hvm_copy_result (*extCopyToGuestPhysical)(paddr_t paddr, void *buf, int size);
+ enum hvm_copy_result (*extCopyFromGuestPhysical)(void *buf, paddr_t paddr, int size);
+ void *(*extAllocDomHeapPage)(void);
+ void (*extFreeDomHeapPage)(void *);
+ void * (*extGetVirtFromPagePtr)(void *);
+ void (*extVcpuPause)(struct vcpu *v);
+ void (*extVcpuUnPause)(struct vcpu *v);
+ void (*extArchGetDomainInfoCtxt)(struct vcpu *v,
+ struct vcpu_guest_context *);
+ int (*extArchSetDomainInfoCtxt)(struct vcpu *v,
+ struct vcpu_guest_context *);
+ int (*extCpuIsIntel)(void );
+ int (*extWrmsrHypervisorRegs)(uint32_t idx, uint32_t eax,
+ uint32_t edx);
+ void (*extKillTimer)(struct timer *timer);
+ void (*extMigrateTimer)(struct timer *timer, unsigned int new_cpu);
+} xen_call_vector_t;
+
+#define MAX_EXTENSION_ID 1
+
+/*
+ * int hvm_ext_bind(struct domain *d, int ext_id)
+ * Bind the specified domain to the specified extension module.
+ *
+ * Calling/Exit State:
+ * None.
+ *
+ * Remarks:
+ * The goal is to support per-domain extension modules. Domain
+ * creating tools will have to specify the needed extension
+ * module ID. For now it is hard coded.
+ */
+int hvm_ext_bind(struct domain *d, int ext_id);
+
+/*
+ * int hvm_ext_register(int ext_id,
+ * struct extension_intercept_vector *ext_vector,
+ * struct xen_call_vector *xen_vector)
+ * Register the extension module with the hypervisor
+ * Calling/Exit State:
+ * None.
+ */
+
+int hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector,
+ struct xen_call_vector *xen_vector);
+
+
+#endif
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/hvm_ext.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/hvm_ext.c 2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,350 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * hvm_ext.c
+ * Glue code for implementing the extension module.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#include <asm/hvm/hvm_extensions.h>
+#include <xen/lib.h>
+#include <asm/event.h>
+#include <asm/shadow.h>
+#include <asm/hvm/support.h>
+#include <xen/domain_page.h>
+#include <xen/domain.h>
+#include <xen/mm.h>
+#include <xen/event.h>
+#include <xen/sched.h>
+
+
+struct extension_intercept_vector *intercept_vector;
+
+/*
+ * static void
+ * hvm_ext_inject_interrupt(struct vcpu *v, int vector, int type)
+ * Inject the specified exception to the specified virtual cpu.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void
+hvm_ext_inject_interrupt(struct vcpu *v, int vector, int type)
+{
+ struct vlapic *vlapic = vcpu_vlapic(v);
+
+ /*
+ * XXXKYS: Check the trigger mode.
+ */
+ if (vlapic_set_irq(vlapic, vector, 1)) {
+ vcpu_kick(v);
+ }
+}
+
+/*
+ * static void
+ * hvm_ext_set_timer(struct timer *timer, s_time_t expires)
+ * Set a timeout.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void
+hvm_ext_set_timer(struct timer *timer, s_time_t expires)
+{
+ set_timer(timer, expires);
+}
+
+/*
+ * static void
+ * hvm_ext_kill_timer(struct timer *timer)
+ * Kill the specified timer.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void
+hvm_ext_kill_timer(struct timer *timer)
+{
+ kill_timer(timer);
+}
+
+/*
+ * static void
+ * hvm_ext_migrate_timer(struct timer *timer, unsigned int new_cpu)
+ * Migrate the timer to the new cpu.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void
+hvm_ext_migrate_timer(struct timer *timer, unsigned int new_cpu)
+{
+ migrate_timer(timer, new_cpu);
+}
+
+
+/*
+ * static void *
+ * hvm_ext_get_virt_from_gmfn(struct domain *d, unsigned long gmfn)
+ * Given a guest frame number return a virtual address at which
+ * the specified page can be accessed in the hypervisor.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void *
+hvm_ext_get_virt_from_gmfn(struct domain *d, unsigned long gmfn)
+{
+ unsigned long mfn = gmfn_to_mfn(d, gmfn);
+ if (mfn == INVALID_MFN) {
+ return (NULL);
+ }
+ return (map_domain_page_global(mfn));
+}
+
+/*
+ * static unsigned long
+ * hvm_ext_get_mfn_from_gmfn(struct domain *d, unsigned long gmfn)
+ * Get the machine frame number given the guest frame number.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static unsigned long
+hvm_ext_get_mfn_from_gmfn(struct domain *d, unsigned long gmfn)
+{
+ return (gmfn_to_mfn(d, gmfn));
+}
+
+/*
+ * static unsigned long
+ * hvm_ext_get_mfn_from_gva(unsigned long va)
+ * Given the guest virtual address return the machine frame number backing the
+ * address.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static unsigned long
+hvm_ext_get_mfn_from_gva(unsigned long va)
+{
+ uint32_t pfec = PFEC_page_present;
+ unsigned long gfn;
+ gfn = paging_gva_to_gfn(current, va, &pfec);
+ return (gmfn_to_mfn((current->domain), gfn));
+}
+
+/*
+ * static void *
+ * hvm_ext_alloc_mem(size_t size)
+ * Allocate specified bytes of memory.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void *
+hvm_ext_alloc_mem(size_t size)
+{
+ return (xmalloc_bytes(size));
+}
+
+/*
+ * static void *
+ * hvm_ext_alloc_domheap_page(void)
+ * Allocate a page from the per-domain heap.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void *
+hvm_ext_alloc_domheap_page(void)
+{
+ return (alloc_domheap_page(NULL));
+}
+
+/*
+ * static void
+ * hvm_ext_free_domheap_page(void *p)
+ * Free a dom heap page.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void
+hvm_ext_free_domheap_page(void *p)
+{
+ free_domheap_pages(p, 0);
+}
+
+/*
+ * static void *
+ * hvm_ext_get_virt_from_page_ptr(void *page)
+ * Map the specified page a return a hypervisor VA.
+ *
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void *
+hvm_ext_get_virt_from_page_ptr(void *page)
+{
+ struct page_info *pg = page;
+ unsigned long mfn = page_to_mfn(pg);
+ return (map_domain_page_global(mfn));
+}
+
+extern struct cpuinfo_x86 boot_cpu_data;
+
+/*
+ * static int
+ * hvm_ext_cpu_is_intel(void)
+ * Check if the CPU vendor is Intel.
+ *
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static int
+hvm_ext_cpu_is_intel(void)
+{
+ if (boot_cpu_data.x86_vendor == 0) {
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * int
+ * hvm_ext_bind(struct domain *d, int ext_id)
+ * Bind the specified domain with the specified extension module.
+ *
+ *
+ * Calling/Exit State:
+ * None.
+ */
+int
+hvm_ext_bind(struct domain *d, int ext_id)
+{
+ int i;
+ /*
+ * XXXKYS: Assuming that this function will be called before the
+ * new domain begins to run. It is critical that this be the case.
+ */
+ if (ext_id == 0) {
+ /*
+ * This is the default value for this parameter.
+ */
+ return (0);
+ }
+ d->arch.hvm_domain.ext_vector = intercept_vector;
+ /*
+ * Let the extension initialize its state.
+ */
+ if (intercept_vector->domain_create(d)) {
+ return (1);
+ }
+ for (i=0; i < MAX_VIRT_CPUS; i++) {
+ if (d->vcpu[i] != NULL) {
+ if (intercept_vector->vcpu_initialize(d->vcpu[i])) {
+ int j;
+ for (j= (i-1); j >=0; j--) {
+ intercept_vector->vcpu_destroy(
+ d->vcpu[j]);
+ }
+ intercept_vector->domain_destroy(d);
+ return (1);
+ }
+ }
+ }
+ return (0);
+}
+
+
+void extPanic(const char *fmt, ...)
+{
+ domain_crash_synchronous();
+}
+
+/*
+ * For now we will support only one extension; id==1!
+ */
+
+extern struct hvm_function_table hvm_funcs;
+extern struct hvm_mmio_handler vlapic_mmio_handler;
+
+/*
+ * int
+ * hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector,
+ *
+ * Register the invoking extension module with the hypervisor.
+ *
+ *
+ * Calling/Exit State:
+ * None.
+ */
+int
+hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector,
+ struct xen_call_vector *xen_vector)
+{
+ ASSERT(ext_id == 1);
+ intercept_vector = ext_vector;
+ /*
+ * Populate the vector of services from the xen side; ultimately
+ * we may decide to get rid of this level of indirection; it may
+ * still be useful to limit the breadth of xen dependency here.
+ */
+ xen_vector->hvmFuncTable = &hvm_funcs;
+ xen_vector->mmIoHandler = &vlapic_mmio_handler;
+ xen_vector->extPanic = extPanic;
+ xen_vector->extPrintk = printk;
+ xen_vector->extPostInterrupt = hvm_ext_inject_interrupt;
+ xen_vector->extSetTimer = hvm_ext_set_timer;
+ xen_vector->extKillTimer = hvm_ext_kill_timer;
+ xen_vector->extMigrateTimer = hvm_ext_migrate_timer;
+ xen_vector->extGetTimeSinceBoot = get_s_time;
+ xen_vector->extGetVirtFromGmfn = hvm_ext_get_virt_from_gmfn;
+ xen_vector->extGetMfnFromGmfn = hvm_ext_get_mfn_from_gmfn;
+
+ xen_vector->extGetMfnFromGva = hvm_ext_get_mfn_from_gva;
+#ifdef CONFIG_DOMAIN_PAGE
+ xen_vector->extUnmapDomainPage = unmap_domain_page_global;
+#endif
+ xen_vector->extAllocMem = hvm_ext_alloc_mem;
+ xen_vector->extFreeMem = xfree;
+ xen_vector->extCopyToGuestPhysical = hvm_copy_to_guest_phys;
+ xen_vector->extCopyFromGuestPhysical = hvm_copy_from_guest_phys;
+ xen_vector->extAllocDomHeapPage = hvm_ext_alloc_domheap_page;
+ xen_vector->extFreeDomHeapPage = hvm_ext_free_domheap_page;
+ xen_vector->extGetVirtFromPagePtr = hvm_ext_get_virt_from_page_ptr;
+ xen_vector->extVcpuPause = vcpu_pause;
+ xen_vector->extVcpuUnPause = vcpu_unpause;
+ xen_vector->extArchGetDomainInfoCtxt = arch_get_info_guest;
+ xen_vector->extArchSetDomainInfoCtxt = arch_set_info_guest;
+ xen_vector->extCpuIsIntel = hvm_ext_cpu_is_intel;
+ xen_vector->extWrmsrHypervisorRegs = wrmsr_hypervisor_regs;
+
+ return 0;
+}
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/Makefile
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/Makefile 2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,2 @@
+obj-y += nsintercept.o
+obj-y += nshypercall.o
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/ns_errno.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/ns_errno.h 2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,62 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * ns_errno.h
+ * Error codes for the Novell Shim.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef NS_ERRNO_H
+#define NS_ERRNO_H
+
+#define NS_STATUS_SUCCESS 0x0000
+#define NS_STATUS_INVALID_HYPERCALL_CODE 0x0002
+#define NS_STATUS_INVALID_HYPERCALL_INPUT 0x0003
+#define NS_STATUS_INVALID_ALIGNMENT 0x0004
+#define NS_STATUS_INVALID_PARAMETER 0x0005
+#define NS_STATUS_ACCESS_DENIED 0x0006
+#define NS_STATUS_INVALID_PARTITION_STATE 0x0007
+#define NS_STATUS_OPERATION_DENIED 0x0008
+#define NS_STATUS_UNKNOWN_PROPERTY 0x0009
+#define NS_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0x000A
+#define NS_STATUS_INSUFFICIENT_MEMORY 0x000B
+#define NS_STATUS_PARTITION_TOO_DEEP 0x000C
+#define NS_STATUS_INVALID_PARTITION_ID 0x000D
+#define NS_STATUS_INVALID_VP_INDEX 0x000E
+#define NS_STATUS_UNABLE_TO_RESTORE_STATE 0x000F
+#define NS_STATUS_NOT_FOUND 0x0010
+#define NS_STATUS_INVALID_PORT_ID 0x0011
+#define NS_STATUS_INVALID_CONNECTION_ID 0x0012
+#define NS_STATUS_INSUFFICIENT_BUFFERS 0x0013
+#define NS_STATUS_NOT_ACKNOWLEDGED 0x0014
+#define NS_STATUS_INVALID_VP_STATE 0x0015
+#define NS_STATUS_ACKNOWLEDGED 0x0016
+#define NS_STATUS_INVALID_SAVE_RESTORE_STATE 0x0017
+#define NS_STATUS_NO_MEMORY_4PAGES 0x0100
+#define NS_STATUS_NO_MEMORY_16PAGES 0x0101
+#define NS_STATUS_NO_MEMORY_64PAGES 0x0102
+#define NS_STATUS_NO_MEMORY_256PAGES 0x0103
+#define NS_STATUS_NO_MEMORY_1024PAGES 0x0104
+#endif
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/ns_shim.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/ns_shim.h 2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,480 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * Novell Shim Implementation.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef NS_SHIM_H
+#define NS_SHIM_H
+
+#include <xen/sched.h>
+#include <xen/types.h>
+#include <xen/timer.h>
+#include <asm/current.h>
+#include <asm/domain.h>
+#include <asm/shadow.h>
+#include <public/xen.h>
+
+#include "nshypercall.h"
+
+/*
+ * Synthetic MSR addresses
+ */
+#define NS_MSR_GUEST_OS_ID 0x40000000
+#define NS_MSR_HYPERCALL 0x40000001
+#define NS_MSR_VP_INDEX 0x40000002
+#define NS_MSR_SYSTEM_RESET 0x40000003
+#define NS_MSR_TIME_REF_COUNT 0x40000020
+#define NS_MSR_EOI 0x40000070
+#define NS_MSR_ICR 0x40000071
+#define NS_MSR_TPR 0x40000072
+
+#define NS_MSR_SCONTROL 0x40000080
+#define NS_MSR_SVERSION 0x40000081
+#define NS_MSR_SIEFP 0x40000082
+#define NS_MSR_SIMP 0x40000083
+#define NS_MSR_SEOM 0x40000084
+#define NS_MSR_SINT0 0x40000090
+#define NS_MSR_SINT1 0x40000091
+#define NS_MSR_SINT2 0x40000092
+#define NS_MSR_SINT3 0x40000093
+#define NS_MSR_SINT4 0x40000094
+#define NS_MSR_SINT5 0x40000095
+#define NS_MSR_SINT6 0x40000096
+#define NS_MSR_SINT7 0x40000097
+#define NS_MSR_SINT8 0x40000098
+#define NS_MSR_SINT9 0x40000099
+#define NS_MSR_SINT10 0x4000009A
+#define NS_MSR_SINT11 0x4000009B
+#define NS_MSR_SINT12 0x4000009C
+#define NS_MSR_SINT13 0x4000009D
+#define NS_MSR_SINT14 0x4000009E
+#define NS_MSR_SINT15 0x4000009F
+
+#define NS_MSR_TIMER0_CONFIG 0x400000B0
+#define NS_MSR_TIMER0_COUNT 0x400000B1
+#define NS_MSR_TIMER1_CONFIG 0x400000B2
+#define NS_MSR_TIMER1_COUNT 0x400000B3
+#define NS_MSR_TIMER2_CONFIG 0x400000B4
+#define NS_MSR_TIMER2_COUNT 0x400000B5
+#define NS_MSR_TIMER3_CONFIG 0x400000B6
+#define NS_MSR_TIMER3_COUNT 0x400000B7
+
+/*
+ * MSR for supporting PV drivers on longhorn.
+ */
+#define NS_MSR_PVDRV_HCALL 0x40001000
+
+/*
+ * MSR for supporting other enlightened oses.
+ */
+#define NS_MSR_NONLH_GUEST_OS_ID 0x40001000
+
+/*
+ * Novell Shim VCPU flags.
+ * A VCPU is considered up when it is capable of invoking hypercalls.
+ */
+#define NS_VCPU_BOOT_CPU 0x00000001
+#define NS_VCPU_UP 0x00000002
+
+/*
+ * Novell shim flush flags.
+ */
+
+#define NS_FLUSH_TLB 0X01
+#define NS_FLUSH_INVLPG 0X02
+
+/*
+ * We use the following global state to manage TLB flush requests from the
+ * guest. At most only one flush can be active in the guest; we may have to
+ * revisit this if this is a bottleneck.
+ */
+typedef struct nsGlobalFlushState {
+ int cpuCount; //0 unused; else #cpus participating
+ cpumask_t waiters; //Cpus waiting for the flush block
+ struct vcpu *currentOwner;
+ u64 retVal;
+ flushVa_t *flushParam;
+ unsigned short repCount;
+} nsGlobalFlushState_t;
+
+typedef struct nsSpinLock {
+ unsigned long flags;
+ spinlock_t spinLock;
+ struct nsVcpu *owner;
+ void *retAddr;
+} nsSpinLock_t;
+
+/*
+ * Novell shim message structure.
+ */
+typedef enum {
+ /*
+ * For now we only support timer messages
+ */
+ nsMessageTypeNone = 0x00000000,
+ nsMessageTimerExpired = 0x80000010
+} nsMessageType;
+
+typedef struct nsTimerMessage {
+ nsMessageType messageType;
+ u8 pad1[3];
+ u8 messageSize;
+ u32 timerIndex;
+ u32 pad2;
+ u64 expirationTime;
+} nsTimerMessage_t;
+
+typedef struct nsMessage {
+ nsMessageType messageType;
+ uint8_t messageSize;
+ uint8_t flags;
+ uint8_t reserved[2];
+ uint32_t reserved1;
+ uint64_t payLoad[30];
+} nsMessage_t;
+
+
+typedef struct nsVcpTimerState {
+ u64 config;
+ u64 count; /*expiration time in 100ns units*/
+ int timerIndex;
+ struct nsVcpu *thisCpu;
+ struct timer vcpuTimer;
+} nsVcpTimerState_t;
+
+/*
+ * Stats structure.
+ */
+
+typedef struct {
+ u64 numSwitches;
+ u64 numFlushes;
+ u64 numFlushesPosted;
+ u64 numFlushRanges;
+ u64 numFlushRangesPosted;
+
+ u64 numTprReads;
+ u64 numIcrReads;
+ u64 numEoiWrites;
+ u64 numTprWrites;
+ u64 numIcrWrites;
+
+ u64 numGFSAcquires;
+ u64 numGFSReleases;
+ u64 numTlbFlushes;
+ u64 numInvlPages;
+ u64 numTimeOuts;
+} nsVcpuStats_t;
+
+typedef struct nsVcpu {
+ /*
+ * Per-vcpu state to support the Novell shim;
+ */
+ int nsVcplockDepth;
+ unsigned long nsVcpuFlags;
+ unsigned char nsVcpFlushRequest;
+ unsigned char nsVcpWaitingOnGFS;
+ unsigned char nsVcpFlushPending;
+ unsigned char nsVcpWaitingForCleanup;
+ unsigned short nsVcpRepCount;
+ /*
+ * Synthetic msrs.
+ */
+ u64 nsVcpSControlMsr;
+ u64 nsVcpSVersionMsr;
+ u64 nsVcpSIefpMsr;
+ u64 nsVcpSimpMsr;
+ u64 nsVcpEomMsr;
+
+ u64 nsVcpSIntMsr[16];
+ /*
+ * Timer MSRs.
+ */
+ nsVcpTimerState_t nsVcpTimers[4];
+ void *nsVcpSiefPage;
+ void *nsVcpSimPage;
+ /*
+ * Hypercall input/output processing.
+ * We keep these pages mapped in the hypervisor space.
+ */
+ void *nsVcpInputBuffer; /*input buffer virt address*/
+ void *nsVcpInputBufferPage; /*input buffer struct page */
+ void *nsVcpOutputBuffer; /*output buffer virt address*/
+ void *nsVcpOutputBufferPage; /*output buffer struct page */
+ struct vcpu *nsVcpXenVcpu; /*corresponding xen vcpu*/
+ nsVcpuStats_t nsVcpStats;
+} nsVcpu_t;
+
+/*
+ * Events of interest for gathering stats.
+ */
+#define NS_CSWITCH 1
+#define NS_FLUSH_VA_STAT 2
+#define NS_FLUSH_RANGE 3
+#define NS_FLUSH_VA_POSTED 4
+#define NS_FLUSH_RANGE_POSTED 5
+#define NS_TPR_READ 6
+#define NS_ICR_READ 7
+#define NS_TPR_WRITE 8
+#define NS_ICR_WRITE 9
+#define NS_EOI_WRITE 10
+
+#define NS_GFS_ACQUIRE 11
+#define NS_GFS_RELEASE 12
+#define NS_TLB_FLUSH 13
+#define NS_INVL_PG 14
+#define NS_TIMEOUTS 15
+
+void nsCollectStats(int event, nsVcpuStats_t *ststp);
+
+#define NS_STATS //KYS: Temporary
+
+#ifdef NS_STATS
+#define NS_STATS_COLLECT(event, statp) nsCollectStats(event, statp)
+#else
+define NS_STATS_COLLECT(event, statp)
+#endif
+
+typedef struct nsPartition {
+ /*
+ * State maintained on a per guest basis to implement
+ * the Novell shim.
+ */
+ nsSpinLock_t nsLock;
+ atomic_t nsNumVcpusActive;
+ u64 nsGuestIdMsr;
+ u64 nsHypercallMsr;
+ u64 nsPrivileges;
+ u64 nsSupportedFeatures;
+ unsigned long nsHypercallMfn;
+ int nsLongModeGuest;
+ /*
+ * Each VCPU here corresponds to the vcpu in the underlying hypervisor;
+ * they share the same ID.
+ */
+ nsVcpu_t nsVcpuState[MAX_VIRT_CPUS];
+ nsGlobalFlushState_t nsFlushState;
+} nsPartition_t;
+
+/*
+ * Max CPUID leaves supported.
+ */
+
+#define NX_MAX_CPUID_LEAVES 5
+
+/*
+ * We don't want to intercept instructions coming from the hvm bootstrap code.
+ *
+ */
+#define NS_BIOS_HIGH_ADDR
+/*
+ * Privilege flags.
+ */
+
+#define NS_ACCESS_VP_RUNTIME (1ULL << 0)
+#define NS_ACCESS_TIME_REF_CNT (1ULL << 1)
+#define NS_ACCESS_SYNC_MSRS (1ULL << 2)
+#define NS_ACCESS_SYNC_TIMERS (1ULL << 3)
+#define NS_ACCESS_APIC_MSRS (1ULL << 4)
+#define NS_ACCESS_PARTITION_ID (1ULL << 33)
+
+#define nsGetCurrentPartition() \
+((current)->domain->arch.hvm_domain.ext_handle)
+
+#define nsGetCurrentVcpuIndex() (current)->vcpu_id
+
+#define NS_PANIC(x) \
+do {\
+ nsXenVector.extPrintk("File is: %s\n", __FILE__);\
+ nsXenVector.extPrintk("Line is: %d\n", __LINE__);\
+ nsXenVector.extPanic((x));\
+} while (0);
+
+#define NS_ASSERT(x) \
+do {\
+ if (!(x)) \
+ NS_PANIC("ASSERTION FAILED\n")\
+} while (0);
+
+#define nsDebugPrint(x) \
+do { \
+ nsXenVector.extPrintk("File is: %s\n", __FILE__);\
+ nsXenVector.extPrintk("Line is: %d\n", __LINE__);\
+ nsXenVector.extPrintk((x));\
+} while (0);
+
+/* Hooks into Xen */
+extern xen_call_vector_t nsXenVector;
+
+/*
+ * static inline int
+ * nsInvalidCpuState(void)
+ * Check to see if the calling CPU is in the "correct state" to invoke
+ * the functionality implemented in the Novell Shim (Adaptor).
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline int
+nsInvalidCpuState(void)
+{
+ int cpuState;
+ cpuState = nsXenVector.hvmFuncTable->guest_x86_mode(current);
+ if ((cpuState == 4) || (cpuState == 8)) {
+ return (0);
+ }
+ return (1);
+}
+
+/*
+ * inline u64
+ * nsBuildHcallRetVal(int code, int reps)
+ *
+ * Given the return code and the number of successfully completed count,
+ * compose a return value compliant with the Viridian specification.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline u64
+nsBuildHcallRetVal(int code, int reps)
+{
+ u64 retVal=0;
+ retVal |= (code & 0xff);
+ retVal |= (((long long)(reps & 0xfff)) << 32);
+ return (retVal);
+}
+
+
+/*
+ * static inline void nsSetSysCallRetVal(struct cpu_user_regs *pregs,
+ * int longModeGuest, u64 retVal)
+ * Set the return value in the saved guest registers
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline void nsSetSysCallRetVal(struct cpu_user_regs *pregs,
+ int longModeGuest, u64 retVal)
+{
+ if (longModeGuest) {
+ pregs->eax = retVal;
+ } else {
+ pregs->edx = (u32)(retVal >> 32);
+ pregs->eax = (u32)(retVal);
+ }
+}
+
+/*
+ * static inline int
+ * nsPrivilegeCheck(nsPartition_t *curp, u64 flags)
+ * Check if the caller is privileged to perform the operation
+ * specified by the flags argument.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline int
+nsPrivilegeCheck(nsPartition_t *curp, u64 flags)
+{
+ return ((curp->nsPrivileges & flags)? 1: 0);
+}
+
+/* void
+ * nsHandleHyperCall(u64 opcode, u64 input, u64 output,
+ * u64 *retVal);
+ * Common entry point for handling all the extension hypercalls.
+ *
+ * Calling/Exit State:
+ * Based on the hypercall; the caller may give up the CPU while
+ * processing the hypercall. No locks should be held on entry and
+ * no locks will be held on return.
+ *
+ */
+void
+nsHandleHyperCall(u64 opcode, u64 input, u64 output,
+ u64 *retVal);
+
+/*
+ * void nsDoTlbFlush(void);
+ * Perform TLB flush on the invoking virtual CPU.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+void nsDoTlbFlush(void);
+
+/*
+ * void
+ * nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Acquire the specified lock.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+void nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *lock);
+
+/*
+ * void
+ * nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Release the specified spin lock.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+void nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *lock);
+
+/*
+ * void
+ * nsLockInit(nsSpinLock_t *nsLock)
+ * Initialize the specified spin lock.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+void nsLockInit(nsSpinLock_t *lock);
+
+/*
+ * void nsPrintStats(nsPartition_t *curp, int i)
+ * Print the per-vcpu stats for the specified partition.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+void nsPrintStats(nsPartition_t *curp, int i);
+
+#define NS_LOCK_OWNED(v, l) \
+((l)->owner == (v))
+#endif /*NS_SHIM_H */
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.c 2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,1229 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * nshypercall.c.
+ * This file implements the hypercall component of the Novell Shim. Hopefully
+ * we can host this component either as a driver in the guest or an extension
+ * to the Xen hypervisor.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/hvm/support.h>
+#include <xen/cpumask.h>
+#include <xen/event.h>
+
+#include <asm/hvm/hvm_extensions.h>
+#include "ns_shim.h"
+#include "ns_errno.h"
+#include "nshypercall.h"
+
+
+
+void nsDoTlbFlush(void);
+static void
+nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup);
+
+
+
+/*
+ * void nsCollectStats(int event, nsVcpuStats_t *statsp)
+ * Collect stats.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+void nsCollectStats(int event, nsVcpuStats_t *statsp)
+{
+ switch (event) {
+ case NS_CSWITCH:
+ statsp->numSwitches++;
+ return;
+ case NS_FLUSH_VA:
+ statsp->numFlushes++;
+ return;
+ case NS_FLUSH_RANGE:
+ statsp->numFlushRanges++;
+ return;
+ case NS_FLUSH_VA_POSTED:
+ statsp->numFlushesPosted++;
+ return;
+ case NS_FLUSH_RANGE_POSTED:
+ statsp->numFlushRangesPosted++;
+ return;
+ case NS_TPR_READ:
+ statsp->numTprReads++;
+ return;
+ case NS_ICR_READ:
+ statsp->numIcrReads++;
+ return;
+ case NS_TPR_WRITE:
+ statsp->numTprWrites++;
+ return;
+ case NS_ICR_WRITE:
+ statsp->numIcrWrites++;
+ return;
+ case NS_EOI_WRITE:
+ statsp->numEoiWrites++;
+ return;
+
+ case NS_GFS_ACQUIRE:
+ statsp->numGFSAcquires++;
+ return;
+ case NS_GFS_RELEASE:
+ statsp->numGFSReleases++;
+ return;
+ case NS_TLB_FLUSH:
+ statsp->numTlbFlushes++;
+ return;
+ case NS_INVL_PG:
+ statsp->numInvlPages++;
+ return;
+ }
+}
+
+/*
+ * void
+ * nsPrintStats(nsPartition_t *curp, int i)
+ * Print stats.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+void
+nsPrintStats(nsPartition_t *curp, int i)
+{
+ nsVcpu_t *v;
+ v = &curp->nsVcpuState[i];
+ printk("Printing stats for vcpu ID: %d\n", i);
+ printk("Flush pending: %d\n", (int)v->nsVcpFlushPending);
+ printk("Flush Request: %d\n", (int)v->nsVcpFlushRequest);
+ printk("Waiting on GFS: %d\n", (int)v->nsVcpWaitingOnGFS);
+ printk("Waiting for cleanup: %d\n", (int)v->nsVcpWaitingForCleanup);
+
+ printk("Number of context switches: %lu\n", v->nsVcpStats.numSwitches);
+ printk("Number of flushes: %lu\n", v->nsVcpStats.numFlushes);
+ printk("Number of flushes posted: %lu\n", v->nsVcpStats.numFlushesPosted);
+ printk("Number of flush ranges: %lu\n", v->nsVcpStats.numFlushRanges);
+ printk("Number of flush ranges posted: %lu\n", v->nsVcpStats.numFlushRangesPosted);
+ printk("Number of TPR reads: %lu\n", v->nsVcpStats.numTprReads);
+ printk("Number of ICR reads: %lu\n", v->nsVcpStats.numIcrReads);
+ printk("Number of Eoi writes: %lu\n", v->nsVcpStats.numEoiWrites);
+ printk("Number of Tpr writes: %lu\n", v->nsVcpStats.numTprWrites);
+ printk("Number of Icr writes: %lu\n", v->nsVcpStats.numIcrWrites);
+ printk("Number of GFS acuires: %lu\n", v->nsVcpStats.numGFSAcquires);
+ printk("Number of GFS releases: %lu\n", v->nsVcpStats.numGFSReleases);
+ printk("Number of TLB flushes: %lu\n", v->nsVcpStats.numTlbFlushes);
+ printk("Number of INVLPG flushes: %lu\n", v->nsVcpStats.numInvlPages);
+ printk("Number of TIMEOUTS: %lu\n", v->nsVcpStats.numTimeOuts);
+
+}
+
+/*
+ * static inline void nsWakeupWaiters(nsPartition_t *curp)
+ * Wakeup all the VCPUs that may be blocked on the Global
+ * flush state waiting to exclusively own the global flush
+ * state.
+ *
+ * Calling/Exit State:
+ * The partition-wide spin lock nsLock is held on entry and
+ * this lock is held on exit.
+ */
+static inline void nsWakeupWaiters(nsPartition_t *curp)
+{
+ int i;
+ if (!cpus_empty(curp->nsFlushState.waiters)) {
+ /*
+ * Need to wakeup potential waiters that
+ * are waiting for the
+ * flush block to become available.
+ */
+ for (i=0; i < MAX_VIRT_CPUS; i++) {
+ struct vcpu *curVcpu;
+ if (!cpu_isset(i, curp->nsFlushState.waiters))
+ continue;
+ curVcpu =
+ curp->nsVcpuState[i].nsVcpXenVcpu;
+ NS_ASSERT(curVcpu != NULL);
+ if ( test_and_clear_bit(_VPF_blocked_in_xen,
+ &curVcpu->pause_flags) ) {
+ vcpu_wake(curVcpu);
+ }
+ }
+ cpus_clear(curp->nsFlushState.waiters);
+ }
+}
+
+/*
+ * static void nsAcquireGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup)
+ * Acquire the global flush state for exclusive use by the calling
+ * VCPU.
+ *
+ * Calling/Exit State:
+ * On entry nsLock is held and this lock is held on exit. If the calling
+ * VCPU is required to give up the CPU, this lock will be dropped.
+ */
+static void nsAcquireGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup)
+{
+acquireGFSAgain:
+ NS_ASSERT(vcpup->nsVcpWaitingOnGFS == 0);
+ NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0);
+ NS_ASSERT(NS_LOCK_OWNED(vcpup, &curp->nsLock));
+ if (curp->nsFlushState.currentOwner != NULL) {
+ /*
+ * Somebody is in the midst of flushing; deal with this
+ * situation.
+ */
+ /*
+ * We need to wait for the current flush sequence
+ * to end.
+ */
+ NS_ASSERT(curp->nsFlushState.currentOwner != current);
+ NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0);
+ if (vcpup->nsVcpFlushPending) {
+ nsLockRelease(vcpup, &curp->nsLock);
+ nsDoTlbFlush();
+ nsLockAcquire(vcpup, &curp->nsLock);
+ }
+ vcpup->nsVcpWaitingOnGFS = 1;
+ cpu_set(current->vcpu_id, curp->nsFlushState.waiters);
+ nsLockRelease(vcpup, &curp->nsLock);
+ wait_on_xen_event_channel(0,
+ ((curp->nsFlushState.currentOwner == NULL) ||
+ (vcpup->nsVcpFlushPending) ||
+ (cpus_empty(curp->nsFlushState.waiters))));
+ nsLockAcquire(vcpup, &curp->nsLock);
+ cpu_clear(current->vcpu_id, curp->nsFlushState.waiters);
+ vcpup->nsVcpWaitingOnGFS = 0;
+ goto acquireGFSAgain;
+ }
+ curp->nsFlushState.repCount = vcpup->nsVcpRepCount;
+ curp->nsFlushState.flushParam =
+ vcpup->nsVcpInputBuffer;
+ NS_STATS_COLLECT(NS_GFS_ACQUIRE, &vcpup->nsVcpStats);
+}
+
+/*
+ * static void nsReleaseGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup,
+ * int lockOwned)
+ * There can at most be one TLB flush event active in the system. All of the
+ * VCPUs that are part of the flush sequence need to relaese their hold
+ * on the global flush object before the global flush object can be freed.
+ * This function manages the release of the global flush object.
+ * If the "lockOwned" parameter is non-zero; on entry the nsLock is held.
+ *
+ * Calling/Exit State:
+ * The current owner of GFS may be forced to give up the CPU.
+ * On exit nsLock is held.
+ */
+static void nsReleaseGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup,
+ int lockOwned)
+{
+ if (!lockOwned) {
+ nsLockAcquire(vcpup, &curp->nsLock);
+ }
+ NS_ASSERT(curp->nsFlushState.cpuCount >= 0);
+ NS_ASSERT(curp->nsFlushState.currentOwner != NULL);
+
+ if (vcpup->nsVcpFlushPending) {
+ curp->nsFlushState.cpuCount--;
+ NS_ASSERT(curp->nsFlushState.cpuCount >= 0);
+ vcpup->nsVcpFlushPending = 0;
+ mb();
+ }
+
+nsReleaseGFS:
+ if (curp->nsFlushState.cpuCount > 0) {
+ if (curp->nsFlushState.currentOwner == current) {
+ /*
+ * We are the initiator; need to wait for
+ * others to complete.
+ */
+ nsWakeupWaiters(curp);
+ vcpup->nsVcpWaitingForCleanup = 1;
+ nsLockRelease(vcpup, &curp->nsLock);
+ wait_on_xen_event_channel(0,(curp->nsFlushState.cpuCount == 0));
+ nsLockAcquire(vcpup, &curp->nsLock);
+ vcpup->nsVcpWaitingForCleanup = 0;
+ goto nsReleaseGFS;
+ } else {
+ return;
+ }
+ }
+ NS_ASSERT(curp->nsFlushState.cpuCount == 0);
+ if (curp->nsFlushState.currentOwner == current) {
+ /* We are the current owner; do the final cleanup.
+ * But first set the return value. This has been stashed
+ * before we blocked.
+ */
+ NS_STATS_COLLECT(NS_GFS_RELEASE, &vcpup->nsVcpStats);
+ vcpup->nsVcpFlushRequest = 0;
+ vcpup->nsVcpFlushPending = 0;
+ vcpup->nsVcpWaitingForCleanup = 0;
+ nsSetSysCallRetVal(guest_cpu_user_regs(),
+ curp->nsLongModeGuest,
+ curp->nsFlushState.retVal);
+ curp->nsFlushState.cpuCount = 0;
+ curp->nsFlushState.currentOwner = NULL;
+ mb();
+ curp->nsFlushState.retVal = 0;
+ curp->nsFlushState.flushParam = NULL;
+ curp->nsFlushState.repCount = 0;
+ nsWakeupWaiters(curp);
+ } else {
+ /*
+ * We are not the owner; wakeup the owner.
+ */
+ if ( test_and_clear_bit(_VPF_blocked_in_xen,
+ &(curp->nsFlushState.currentOwner->pause_flags))){
+ vcpu_wake(curp->nsFlushState.currentOwner);
+ }
+ }
+}
+
+
+/*
+ * static inline int nsFlushPermitted(nsVcpu_t *vcpup)
+ * Check to see if we can execute a TLB flush on the calling vcpu.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline int nsFlushPermitted(nsVcpu_t *vcpup)
+{
+ if (!hvm_paging_enabled(current)) {
+ return (0);
+ }
+ if (nsInvalidCpuState()) {
+ return (0);
+ }
+
+ return (1);
+}
+
+/*
+ * void
+ * nsDoTlbFlush(void)
+ * Perform flush operations based on the state of GFS. VCPUs may be
+ * forced to relinquish the physical CPU while attempting to flush; in
+ * those events, thi is also the continuation point for execution.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+void
+nsDoTlbFlush(void)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ nsVcpu_t *vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+ flushVa_t *flushArgp;
+ int i,j, numPages;
+ u64 *pgList;
+ long baseVa;
+ unsigned short repCount;
+
+ NS_ASSERT(local_irq_is_enabled());
+
+ NS_ASSERT(vcpup->nsVcplockDepth == 0);
+
+ nsLockAcquire(vcpup, &curp->nsLock);
+ if (vcpup->nsVcpWaitingForCleanup) {
+ /*
+ * This is the continuation point for us; cleanup
+ * the global flush state.
+ */
+ vcpup->nsVcpWaitingForCleanup =0;
+ NS_ASSERT(curp->nsFlushState.currentOwner == current);
+ nsReleaseGlobalFlushState(curp, vcpup, 1);
+ } else if (vcpup->nsVcpWaitingOnGFS) {
+ /*
+ * This is the continuation point for us; acquire
+ * GFS and proceed with our flush operation.
+ */
+ vcpup->nsVcpWaitingOnGFS =0;
+ nsAcquireGlobalFlushState(curp, vcpup);
+ /*
+ * Now do the rest of the syscall processing
+ */
+ nsFlushPostProcess(curp, vcpup);
+ }
+ if (!vcpup->nsVcpFlushPending) {
+ nsLockRelease(vcpup, &curp->nsLock);
+ return;
+ }
+ flushArgp = curp->nsFlushState.flushParam;
+ repCount = curp->nsFlushState.repCount;
+ /*
+ * At this point a flush has been posted; see if we can perform a
+ * flush given our state.
+ */
+ if (!nsFlushPermitted(vcpup)) {
+ nsReleaseGlobalFlushState(curp, vcpup, 1);
+ nsLockRelease(vcpup, &curp->nsLock);
+ NS_ASSERT(vcpup->nsVcplockDepth == 0);
+ return;
+ }
+ nsLockRelease(vcpup, &curp->nsLock);
+ if (vcpup->nsVcpFlushPending & NS_FLUSH_TLB) {
+ NS_STATS_COLLECT(NS_TLB_FLUSH, &vcpup->nsVcpStats);
+ paging_update_cr3(current);
+ } else {
+ pgList = &flushArgp->gva;
+ NS_ASSERT(vcpup->nsVcpFlushPending == NS_FLUSH_INVLPG);
+ NS_ASSERT(pgList != NULL);
+ NS_ASSERT(repCount >=1);
+ NS_STATS_COLLECT(NS_INVL_PG, &vcpup->nsVcpStats);
+ for (i = 0; i < repCount; i++) {
+ baseVa = (long)(pgList[i] & PAGE_MASK);
+ numPages = (int)(~baseVa & pgList[i]);
+ for (j = 0; j <= numPages; j++) {
+ if (paging_invlpg(current,
+ (baseVa + (j << PAGE_SHIFT)))) {
+ flush_tlb_one_local((baseVa +
+ (j<< PAGE_SHIFT)));
+ }
+ //KYS: need to deal with ASIDS
+ }
+ }
+ }
+ /*
+ * Do post processing on the global flush state.
+ */
+ nsReleaseGlobalFlushState(curp, vcpup, 0);
+ nsLockRelease(vcpup, &curp->nsLock);
+ NS_ASSERT(vcpup->nsVcplockDepth == 0);
+}
+
+/*
+ * static int
+ * nsGetVpRegisters(paddr_t input, paddr_t output)
+ * Get the VCP register state.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static int
+nsGetVpRegisters(paddr_t input, paddr_t output)
+{
+ nsVcpu_t *vcpup, *targetp;
+ nsPartition_t *curp = nsGetCurrentPartition();
+ getVpRegistersInput_t *inBuf;
+ getVpRegistersOutput_t *outBuf;
+ struct vcpu_guest_context *vcpuCtx;
+ u32 *regIndexp;
+ getVpRegistersOutput_t *outRegp;
+ u32 numOutputBytes = 0;
+
+ vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+ inBuf = vcpup->nsVcpInputBuffer;
+ outBuf = vcpup->nsVcpOutputBuffer;
+ outRegp = outBuf;
+ /*
+ * Copy the input data to the per-cpu input buffer.
+ * This may be an overkill; obviously it is better to only
+ * copy what we need. XXXKYS: Check with Mike.
+ */
+ if (nsXenVector.extCopyFromGuestPhysical(inBuf, input, PAGE_SIZE)) {
+ return (NS_STATUS_INVALID_ALIGNMENT);
+ }
+ /*
+ * If the partition ID specified does not match with the current
+ * domain return appropriate error.
+ */
+ if ((u64)current->domain->domain_id != inBuf-> partitionId) {
+ return (NS_STATUS_ACCESS_DENIED);
+ }
+ if (inBuf->vpIndex > MAX_VIRT_CPUS) {
+ return (NS_STATUS_INVALID_VP_INDEX);
+ }
+ targetp = &curp->nsVcpuState[inBuf->vpIndex];
+ if (!(targetp->nsVcpuFlags & NS_VCPU_UP)) {
+ return (NS_STATUS_INVALID_VP_STATE);
+ }
+ if ((vcpuCtx =
+ nsXenVector.extAllocMem(sizeof(struct vcpu_guest_context)))
+ == NULL) {
+ return (NS_STATUS_INSUFFICIENT_MEMORY);
+ }
+
+ /*
+ * Get the register state of the specified vcp.
+ */
+ if (current->vcpu_id != inBuf->vpIndex) {
+ nsXenVector.extVcpuPause(targetp->nsVcpXenVcpu);
+ }
+ nsXenVector.extArchGetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx);
+ if (current->vcpu_id != inBuf->vpIndex) {
+ nsXenVector.extVcpuUnPause(targetp->nsVcpXenVcpu);
+ }
+ /*
+ * Now that we have the register state; select what we want and
+ * populate the output buffer.
+ */
+ regIndexp = &inBuf->regIndex;
+ while (*regIndexp != 0) {
+ switch (*regIndexp) {
+ /*
+ * XXXKYS: need mapping code here; populate
+ * outBuf.
+ */
+ NS_PANIC("nsGetVpRegisters not supported\n");
+ }
+ regIndexp++;
+ outRegp++ ; /*128 bit registers */
+ numOutputBytes +=16;
+ if ((char *)regIndexp > ((char *)inBuf + PAGE_SIZE)) {
+ /*
+ *input list not reminated correctly; bail out.
+ */
+ NS_PANIC("nsGetVpRegisters:input list not terminated\n");
+ break;
+ }
+ }
+ if (nsXenVector.extCopyToGuestPhysical(output, outBuf,
+ numOutputBytes)) {
+ /* Some problem copying data out*/
+ NS_PANIC("nsGetVpRegisters:copyout problem\n");
+ }
+ nsXenVector.extFreeMem(vcpuCtx);
+ return (NS_STATUS_SUCCESS);
+}
+
+/*
+ * static int
+ * nsSetVpRegisters(paddr_t input, paddr_t output)
+ * Set the VCPU register state.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static int
+nsSetVpRegisters(paddr_t input, paddr_t output)
+{
+ nsVcpu_t *vcpup, *targetp;
+ nsPartition_t *curp = nsGetCurrentPartition();
+ setVpRegistersInput_t *inBuf;
+ struct vcpu_guest_context *vcpuCtx;
+ setVpRegisterSpec_t *regIndexp;
+ int retVal = NS_STATUS_SUCCESS;
+
+ vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+ inBuf = vcpup->nsVcpInputBuffer;
+ /*
+ * Copy the input data to the per-cpu input buffer.
+ * This may be an overkill; obviously it is better to only
+ * copy what we need. XXXKYS: Check with Mike.
+ */
+ if (nsXenVector.extCopyFromGuestPhysical(inBuf, input, PAGE_SIZE)) {
+ return (NS_STATUS_INVALID_ALIGNMENT);
+ }
+ /*
+ * If the partition ID specified does not match with the current
+ * domain return appropriate error.
+ */
+ if ((u64)current->domain->domain_id != inBuf-> partitionId) {
+ return (NS_STATUS_ACCESS_DENIED);
+ }
+ if (inBuf->vpIndex > MAX_VIRT_CPUS) {
+ return (NS_STATUS_INVALID_VP_INDEX);
+ }
+ targetp = &curp->nsVcpuState[inBuf->vpIndex];
+ if (!(targetp->nsVcpuFlags & NS_VCPU_UP)) {
+ return (NS_STATUS_INVALID_VP_STATE);
+ }
+ if ((vcpuCtx =
+ nsXenVector.extAllocMem(sizeof(struct vcpu_guest_context)))
+ == NULL) {
+ return (NS_STATUS_INSUFFICIENT_MEMORY);
+ }
+ /*
+ * XXXKYS: Is it sufficient to just pause the target vcpu; on the
+ * xen side domain is paused for this call. CHECK.
+ */
+ if (current->vcpu_id != inBuf->vpIndex) {
+ nsXenVector.extVcpuPause(targetp->nsVcpXenVcpu);
+ }
+
+ nsXenVector.extArchGetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx);
+ /*
+ * Now that we have the register state; update the register state
+ * based on what we are given.
+ */
+ regIndexp = &inBuf->regSpec;
+ /*
+ * XXXKYS: Assuming the list is terminated by a regName that is 0.
+ * Check with Mike.
+ */
+ while (regIndexp->regName != 0) {
+ switch (regIndexp->regName) {
+ /*
+ * XXXKYS: need mapping code here; populate
+ * vcpuCtx
+ */
+ NS_PANIC("nsSetVpRegisters not supported\n");
+ }
+ regIndexp++;
+ if ((char *)regIndexp > ((char *)inBuf + PAGE_SIZE)) {
+ /*
+ *input list not reminated correctly; bail out.
+ */
+ NS_PANIC("nsSetVpRegisters:input list not terminated\n");
+ break;
+ }
+ }
+ /*
+ * Now set register state.
+ *
+ * XXXKYS: Is it sufficient to just pause the target vcpu; on the
+ * xen side domain is paused for this call. CHECK.
+ */
+
+ if (nsXenVector.extArchSetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx)) {
+ retVal = NS_STATUS_INVALID_PARAMETER;
+ }
+ if (current->vcpu_id != inBuf->vpIndex) {
+ nsXenVector.extVcpuUnPause(targetp->nsVcpXenVcpu);
+ }
+ nsXenVector.extFreeMem(vcpuCtx);
+ return (retVal);
+}
+
+/*
+ * static int
+ * nsSwitchVa(paddr_t input)
+ *
+ * Switch the page table base of the calling vcpu.
+ *
+ * Calling/Exit State:
+ * None.
+ *
+ * Remarks:
+ * The spec specifies that the input register is pointing to a guest
+ * physical that has the new page table base. However it appears that the
+ * page table base is being passed in the input register.
+ */
+static int
+nsSwitchVa(paddr_t input)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ nsVcpu_t *vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+
+ /*
+ * XXXKYS: the spec sys the asID is passed via memory at offset 0 of
+ * the page whose GPA is in the input register. However, it appears
+ * the current build of longhorn (longhorn-2007-02-06-x86_64-fv-02)
+ * passes the asID in the input register instead. Need to check if
+ * future builds do this.
+ */
+ hvm_set_cr3(input);
+ NS_STATS_COLLECT(NS_CSWITCH, &vcpup->nsVcpStats);
+ return (NS_STATUS_SUCCESS);
+}
+
+/*
+ * static int
+ * nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup)
+ *
+ * Perform the flush operation once GFS is acquired.
+ *
+ * Calling/Exit State:
+ * On entry nsLock is held; on exit this lock continues to be held.
+ */
+
+static void
+nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup)
+{
+ int target;
+ nsVcpu_t *vcpup;
+ cpumask_t vcpuMask;
+ struct flushVa *flushArgp;
+
+ flushArgp = curVcpup->nsVcpInputBuffer;
+ vcpuMask = flushArgp->vMask;
+ /*
+ * On entry we must own the global flush state.
+ */
+ NS_ASSERT(NS_LOCK_OWNED(curVcpup, &curp->nsLock));
+ NS_ASSERT(curp->nsFlushState.cpuCount == 0);
+ NS_ASSERT(curp->nsFlushState.currentOwner == NULL);
+
+ curp->nsFlushState.retVal =
+ nsBuildHcallRetVal(NS_STATUS_SUCCESS, curVcpup->nsVcpRepCount);
+ curp->nsFlushState.currentOwner = current;
+ if (cpu_isset(current->vcpu_id, vcpuMask)) {
+ curp->nsFlushState.cpuCount = 1;
+ curVcpup->nsVcpFlushPending =
+ curVcpup->nsVcpFlushRequest;
+ mb();
+#ifdef NS_STATS
+ if (curVcpup->nsVcpFlushRequest == NS_FLUSH_TLB) {
+ NS_STATS_COLLECT(NS_FLUSH_VA_POSTED, &curVcpup->nsVcpStats);
+ } else {
+ NS_STATS_COLLECT(NS_FLUSH_RANGE_POSTED, &curVcpup->nsVcpStats);
+ }
+#endif
+
+ cpu_clear(current->vcpu_id, vcpuMask);
+ }
+ if (cpus_empty(vcpuMask)) {
+ /*
+ * We are done.
+ */
+ goto flushVaDone;
+ }
+ while (!cpus_empty(vcpuMask)) {
+ target = first_cpu(vcpuMask);
+ vcpup = &curp->nsVcpuState[target];
+ cpu_clear(target, vcpuMask);
+ if (!(vcpup->nsVcpuFlags & NS_VCPU_UP)) {
+ continue;
+ }
+ if (!nsFlushPermitted(vcpup)) {
+ continue;
+ }
+ curp->nsFlushState.cpuCount++;
+ vcpup->nsVcpFlushPending =
+ curVcpup->nsVcpFlushRequest;
+ mb();
+#ifdef NS_STATS
+ if (curVcpup->nsVcpFlushRequest == NS_FLUSH_TLB) {
+ NS_STATS_COLLECT(NS_FLUSH_VA_POSTED, &vcpup->nsVcpStats);
+ } else {
+ NS_STATS_COLLECT(NS_FLUSH_RANGE_POSTED, &vcpup->nsVcpStats);
+ }
+#endif
+
+ /*
+ * We need to force these VCPUs into the hypervisor for
+ * them to act on the pending request.
+ */
+
+ vcpu_kick(vcpup->nsVcpXenVcpu);
+ if ( test_and_clear_bit(_VPF_blocked_in_xen,
+ &vcpup->nsVcpXenVcpu->pause_flags) ) {
+ vcpu_wake(vcpup->nsVcpXenVcpu);
+ }
+
+ }
+ /*
+ * Now that we have posted the state; wait for other CPUs to perform
+ * flushes; we need to wait for all the CPUs to complete the flush
+ * before returning.
+ */
+flushVaDone:
+ /*
+ * If we are included in this round of tlb flush; we will wait for
+ * other CPUs in the tlb flush function; else we wait right here.
+ */
+ if (!curVcpup->nsVcpFlushPending) {
+ nsReleaseGlobalFlushState(curp, curVcpup, 1);
+ }
+ return;
+}
+
+/*
+ * static int
+ * nsFlushVa(paddr_t input)
+ * Perform a TLB flush on the specified set of VCPUs.
+ *
+ * Calling/Exit State:
+ * No locks can be held on entry and no locks will be held on return.
+ * The calling VCPU may relinquish the physical CPU.
+ */
+static int
+nsFlushVa(paddr_t input)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ int i;
+ nsVcpu_t *curVcpup;
+
+ flushVa_t *flushArgp;
+ cpumask_t vcpuMask;
+ u64 asId, inputMask, retVal;
+ int flushGlobal = 1;
+
+ curVcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+ flushArgp = curVcpup->nsVcpInputBuffer;
+
+ NS_ASSERT(curVcpup->nsVcplockDepth == 0);
+ NS_ASSERT(curVcpup->nsVcpFlushRequest == 0);
+ NS_ASSERT(curVcpup->nsVcpWaitingForCleanup == 0);
+ NS_ASSERT(curVcpup->nsVcpWaitingOnGFS == 0);
+
+ if (nsXenVector.extCopyFromGuestPhysical(flushArgp, input,
+ sizeof(*flushArgp))) {
+ return (NS_STATUS_INVALID_ALIGNMENT);
+ }
+ inputMask = flushArgp->pMask;
+ asId = flushArgp->asHandle;
+ cpus_clear(vcpuMask);
+ /*
+ * Deal with all trivial error conditions.
+ */
+ if (flushArgp->flags != 0 && (!(flushArgp->flags &
+ (NS_FLUSH_ALL_PROCESSORS |
+ NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY)))) {
+ return (NS_STATUS_INVALID_PARAMETER);
+ }
+ if (((flushArgp->pMask) == 0) &&
+ !(flushArgp->flags & NS_FLUSH_ALL_PROCESSORS)) {
+ return (NS_STATUS_INVALID_PARAMETER);
+ }
+
+ if (flushArgp->flags & NS_FLUSH_ALL_PROCESSORS) {
+ for (i=0; i< MAX_VIRT_CPUS; i++) {
+ if (current->domain->vcpu[i] != NULL) {
+ cpu_set(i, vcpuMask);
+ }
+ }
+ } else {
+ i = 0;
+ while (inputMask) {
+ if (inputMask &0x1) {
+ cpu_set(i, vcpuMask);
+ }
+ inputMask = (inputMask >> 1);
+ i++;
+ }
+ }
+
+ if (flushArgp->flags & NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES) {
+ asId = NS_ALL_AS;
+ }
+ if (flushArgp->flags & NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY) {
+ flushGlobal = 0;
+ }
+ /*
+ * Now operate on what we are given
+ * XXXKYS: For now we are ignoring asId and fushGlobal flag.
+ * May have to revisit this. But first stash away the processed
+ * parameters for subsequent use.
+ */
+ flushArgp->asHandle = asId;
+ flushArgp->flags = flushGlobal;
+ flushArgp->vMask = vcpuMask;
+
+ curVcpup->nsVcpRepCount = 0;
+ curVcpup->nsVcpFlushRequest = NS_FLUSH_TLB;
+
+ retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, 0);
+ nsSetSysCallRetVal(guest_cpu_user_regs(),
+ curp->nsLongModeGuest,
+ retVal);
+ NS_STATS_COLLECT(NS_FLUSH_VA_STAT, &curVcpup->nsVcpStats);
+ nsLockAcquire(curVcpup, &curp->nsLock);
+ nsAcquireGlobalFlushState(curp, curVcpup);
+ nsFlushPostProcess(curp, curVcpup);
+ nsLockRelease(curVcpup, &curp->nsLock);
+ return (NS_STATUS_SUCCESS);
+}
+
+/*
+ * static int
+ * nsFlushVaRange(paddr_t input, unsigned short startIndex,
+ * unsigned short repCount, unsigned short *repsDone)
+ * Perform a INVLPG flush on the specified set of VCPUs.
+ *
+ * Calling/Exit State:
+ * No locks can be held on entry and no locks will be held on return.
+ * The calling VCPU may relinquish the physical CPU.
+ */
+static int
+nsFlushVaRange(paddr_t input, unsigned short startIndex,
+unsigned short repCount, unsigned short *repsDone)
+{
+ nsVcpu_t *curVcpup;
+ nsPartition_t *curp = nsGetCurrentPartition();
+ flushVa_t *flushArgp;
+ cpumask_t vcpuMask;
+ u64 asId, inputMask, retVal;
+ int flushGlobal = 1;
+ int flushAllProc = 0;
+ int i;
+
+ curVcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+ flushArgp = curVcpup->nsVcpInputBuffer;
+ NS_ASSERT(curVcpup->nsVcplockDepth == 0);
+ NS_ASSERT(curVcpup->nsVcpFlushRequest == 0);
+ NS_ASSERT(curVcpup->nsVcpWaitingForCleanup == 0);
+ NS_ASSERT(curVcpup->nsVcpWaitingOnGFS == 0);
+ NS_ASSERT(repCount >=1);
+ NS_ASSERT(((sizeof(*flushArgp)) + 8*(repCount -1)) <= PAGE_SIZE);
+ if (nsXenVector.extCopyFromGuestPhysical(flushArgp, input,
+ ((sizeof(*flushArgp)) + 8*(repCount -1)))) {
+ return (NS_STATUS_INVALID_ALIGNMENT);
+ }
+ *repsDone = repCount;
+ inputMask = flushArgp->pMask;
+ asId = flushArgp->asHandle;
+ cpus_clear(vcpuMask);
+ /*
+ * Deal with all trivial error conditions.
+ */
+ if (flushArgp->flags != 0 && (!(flushArgp->flags &
+ (NS_FLUSH_ALL_PROCESSORS |
+ NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY)))) {
+ return (NS_STATUS_INVALID_PARAMETER);
+ }
+ if ((flushArgp->pMask == 0) &&
+ !(flushArgp->flags & NS_FLUSH_ALL_PROCESSORS)) {
+ return (NS_STATUS_INVALID_PARAMETER);
+ }
+
+ if (flushArgp->flags & NS_FLUSH_ALL_PROCESSORS) {
+ flushAllProc = 1;
+ for (i=0; i< MAX_VIRT_CPUS; i++) {
+ if (current->domain->vcpu[i] != NULL) {
+ cpu_set(i, vcpuMask);
+ }
+ }
+ } else {
+ i = 0;
+ /*
+ * populate the vcpu mask based on the input.
+ */
+ while (inputMask) {
+ if (inputMask & 0x1) {
+ cpu_set(i, vcpuMask);
+ }
+ inputMask = (inputMask >> 1);
+ i++;
+ }
+ }
+ if (flushArgp->flags & NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES) {
+ asId = NS_ALL_AS;
+ }
+ if (flushArgp->flags & NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY) {
+ flushGlobal = 0;
+ }
+ /*
+ * Now operate on what we are given
+ * XXXKYS: For now we are ignoring asId and fushGlobal flag.
+ * May have to revisit this.
+ * May have to revisit this. But first stash away the processed
+ * parameters for subsequent use.
+ */
+ flushArgp->asHandle = asId;
+ flushArgp->flags = flushGlobal;
+ flushArgp->vMask = vcpuMask;
+
+ curVcpup->nsVcpRepCount = repCount;
+ curVcpup->nsVcpFlushRequest = NS_FLUSH_INVLPG;
+
+ retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, repCount);
+ nsSetSysCallRetVal(guest_cpu_user_regs(),
+ curp->nsLongModeGuest,
+ retVal);
+
+
+ NS_STATS_COLLECT(NS_FLUSH_RANGE, &curVcpup->nsVcpStats);
+ nsLockAcquire(curVcpup, &curp->nsLock);
+ nsAcquireGlobalFlushState(curp, curVcpup);
+ nsFlushPostProcess(curp, curVcpup);
+ nsLockRelease(curVcpup, &curp->nsLock);
+ return (NS_STATUS_SUCCESS);
+}
+
+/* void
+ * nsHandleHyperCall(u64 opcode, u64 input, u64 output,
+ * u64 *retVal);
+ * Common entry point for handling all the extension hypercalls.
+ *
+ * Calling/Exit State:
+ * Based on the hypercall; the caller may give up the CPU while
+ * processing the hypercall. No locks should be held on entry and
+ * no locks will be held on return.
+ *
+ */
+
+void
+nsHandleHyperCall(u64 opcode, u64 input, u64 output,
+ u64 *retVal)
+{
+ unsigned short verb;
+ unsigned short repCount;
+ unsigned short repsDone =0;
+ unsigned short startIndex;
+ nsPartition_t *curp = nsGetCurrentPartition();
+ u64 partitionId;
+ int value;
+
+
+ verb = (short)(opcode & 0xffff);
+ repCount = (short)((opcode >>32) & 0xfff);
+ startIndex = (short)((opcode >> 48) & 0xfff);
+ switch (verb) {
+ case NS_CREATE_PARTITION:
+ /*
+ * Xen only allows dom0 to create domains.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_INITIALIZE_PARTITION:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_DELETE_PARTITION:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_GET_PARTITION_PROPERTY:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_SET_PARTITION_PROPERTY:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_GET_PARTITION_ID:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_PARTITION_ID)) {
+ *retVal =
+ nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ }
+ partitionId = (u64)current->domain->domain_id;
+ if (nsXenVector.extCopyToGuestPhysical(output,
+ &partitionId, 8)) {
+ /*
+ * Invalid output area.
+ */
+ *retVal =
+ nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ }
+ *retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, 0);
+ return;
+ case NS_GET_NEXT_CHILD_PARTITION:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_SET_LOGICAL_PROCESSOR_RUN_TIME_GROUP:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_CLEAR_LOGICAL_PROCESSOR_RUN_TIME_GROUP:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_NOTIFY_LOGICAL_PROCESSOR_POWER_STATE:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_GET_LOGICAL_PROCESSOR_RUN_TIME:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_DEPOSIT_MEMORY:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_WITHDRAW_MEMORY:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_GET_MEMORY_BALANCE:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_MAP_GPA_PAGES:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_UNMAP_GPA_PAGES:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_INSTALL_INTERCEPT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_CREATE_VP:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_TERMINATE_VP:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_DELETE_VP:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_GET_NEXT_VP:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_GET_VP_REGISTERS:
+ *retVal = nsBuildHcallRetVal(
+ nsGetVpRegisters(input, output), 0);
+ return;
+ case NS_SET_VP_REGISTERS:
+ *retVal = nsBuildHcallRetVal(
+ nsSetVpRegisters(input, output), 0);
+ case NS_SWITCH_VA:
+ *retVal =
+ nsBuildHcallRetVal(nsSwitchVa(input), 0);
+ return;
+ case NS_FLUSH_VA:
+ *retVal =
+ nsBuildHcallRetVal(nsFlushVa(input), 0);
+ return;
+ case NS_FLUSH_VA_LIST:
+ value = nsFlushVaRange(input, startIndex,
+ repCount, &repsDone);
+ *retVal = nsBuildHcallRetVal(value, repsDone);
+ return;
+
+ case NS_TRASLATE_VA:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_READ_GPA:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_WRITE_GPA:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_ASSERT_VIRTUAL_INTERRUPT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_CLEAR_VIRTUAL_INTERRUPT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_CREATE_PORT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_DELETE_PORT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_CONNECT_PORT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_GET_PORT_PROPERTY:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_DISCONNECT_PORT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_POST_MESSAGE:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_POST_EVENT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case 0:
+ /*
+ * 32 bit longhorn invokes hypercall with verb == 0; need to
+ * check with Mike (XXXKYS). For now ignore it.
+ */
+ *retVal =
+ nsBuildHcallRetVal(NS_STATUS_INVALID_HYPERCALL_CODE, 0);
+ return;
+ default:
+ nsXenVector.extPrintk("Unkown hypercall: verb is: %d\n", verb);
+ *retVal =
+ nsBuildHcallRetVal(NS_STATUS_INVALID_HYPERCALL_CODE, 0);
+ return;
+ }
+}
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.h 2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,125 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * nshypercall.h
+ * Memory layouts for the various hypercalls supported.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef NS_HYPERCALL_H
+#define NS_HYPERCALL_H
+
+#include <xen/cpumask.h>
+
+
+typedef struct getVpRegistersInput {
+ u64 partitionId;
+ u64 vpIndex;
+ u32 regIndex;
+} getVpRegistersInput_t;
+
+typedef struct getVpRegistersOutput {
+ u64 lowValue;
+ u64 highValue;
+} getVpRegistersOutput_t;
+
+
+
+typedef struct setVpRegisterSpec {
+ u32 regName;
+ u32 pad;
+ u64 pad1;
+ u64 lowValue;
+ u64 highValue;
+} setVpRegisterSpec_t;
+typedef struct setVpRegistersInput {
+ u64 partitionId;
+ u64 vpIndex;
+ setVpRegisterSpec_t regSpec;
+} setVpRegistersInput_t;
+
+
+typedef struct flushVa {
+ u64 asHandle;
+ u64 flags;
+ union {
+ u64 processorMask;
+ cpumask_t vcpuMask;
+ } procMask;
+#define pMask procMask.processorMask
+#define vMask procMask.vcpuMask
+ u64 gva;
+} flushVa_t;
+
+#define NS_FLUSH_ALL_PROCESSORS 0x00000001
+#define NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES 0x00000002
+#define NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY 0x00000004
+
+#define NS_ALL_AS (-1)
+
+/*
+ * Hypercall verbs.
+ */
+
+#define NS_CREATE_PARTITION 0x0010
+#define NS_INITIALIZE_PARTITION 0x0011
+#define NS_DELETE_PARTITION 0x0014
+#define NS_GET_PARTITION_PROPERTY 0x0017
+#define NS_SET_PARTITION_PROPERTY 0x0018
+#define NS_GET_PARTITION_ID 0x0015
+#define NS_GET_NEXT_CHILD_PARTITION 0x0016
+#define NS_SET_LOGICAL_PROCESSOR_RUN_TIME_GROUP 0x0005
+#define NS_CLEAR_LOGICAL_PROCESSOR_RUN_TIME_GROUP 0x0006
+#define NS_NOTIFY_LOGICAL_PROCESSOR_POWER_STATE 0x0007
+#define NS_GET_LOGICAL_PROCESSOR_RUN_TIME 0x0004
+#define NS_DEPOSIT_MEMORY 0x001C
+#define NS_WITHDRAW_MEMORY 0x001D
+#define NS_GET_MEMORY_BALANCE 0x001E
+#define NS_MAP_GPA_PAGES 0x001A
+#define NS_UNMAP_GPA_PAGES 0x001B
+#define NS_INSTALL_INTERCEPT 0x0019
+#define NS_CREATE_VP 0x001F
+#define NS_TERMINATE_VP 0x0020
+#define NS_DELETE_VP 0x0021
+#define NS_GET_NEXT_VP 0x0027
+#define NS_GET_VP_REGISTERS 0x0022
+#define NS_SET_VP_REGISTERS 0x0023
+#define NS_SWITCH_VA 0x0001
+#define NS_FLUSH_VA 0x0002
+#define NS_FLUSH_VA_LIST 0x0003
+#define NS_TRASLATE_VA 0x0024
+#define NS_READ_GPA 0x0025
+#define NS_WRITE_GPA 0x0026
+#define NS_ASSERT_VIRTUAL_INTERRUPT 0x002A
+#define NS_CLEAR_VIRTUAL_INTERRUPT 0x002C
+#define NS_CREATE_PORT 0x002D
+#define NS_DELETE_PORT 0x002E
+#define NS_CONNECT_PORT 0x002F
+#define NS_GET_PORT_PROPERTY 0x0031
+#define NS_DISCONNECT_PORT 0x0030
+#define NS_POST_MESSAGE 0x0032
+#define NS_POST_EVENT 0x0034
+
+#endif /* NS_HYPERCALL_H */
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/nsintercept.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/nsintercept.c 2008-03-04 18:38:32.000000000 -0500
@@ -0,0 +1,2094 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * nsintercept.c.
+ * This file implements the intercepts to support the Novell Shim.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#include <asm/hvm/hvm_extensions.h>
+
+
+#include <asm/config.h>
+#include <asm/hvm/io.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/apicdef.h>
+#include <asm/regs.h>
+#include <asm/msr.h>
+
+#include <xen/string.h>
+#include <xen/init.h>
+#include <xen/compile.h>
+#include <xen/hvm/save.h>
+#include <public/sched.h>
+
+
+/*
+ * Local includes; extension specific.
+ */
+#include "ns_errno.h"
+#include "ns_shim.h"
+
+
+/*
+ * Implement Novell Shim.
+ */
+
+
+/*
+ * Hypervisor intercept vector.
+ */
+static int
+nsDomainCreate(struct domain *d);
+static void
+nsDomainDestroy(struct domain *d);
+static int
+nsVcpuInitialize(struct vcpu *v);
+static void
+nsVcpuUp(struct vcpu *v);
+static void
+nsVcpuDestroy(struct vcpu *v);
+static int
+nsDoCpuId(uint32_t input, struct cpu_user_regs *regs);
+static int
+nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs);
+static int
+nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs);
+static int
+nsDoHyperCall(struct cpu_user_regs *pregs);
+static void
+nsDoMigrateTimers(struct vcpu *v);
+
+extension_intercept_vector_t nsExtensionVector = {
+ .domain_create = nsDomainCreate,
+ .domain_destroy = nsDomainDestroy,
+ .vcpu_initialize = nsVcpuInitialize,
+ .vcpu_destroy = nsVcpuDestroy,
+ .do_cpuid = nsDoCpuId,
+ .do_msr_read = nsDoRdMsr,
+ .do_msr_write = nsDoWrMsr,
+ .do_hypercall = nsDoHyperCall,
+ .do_continuation = nsDoTlbFlush,
+ .do_migrate_timers = nsDoMigrateTimers,
+ .vcpu_up = nsVcpuUp
+};
+
+/*
+ * Hooks into xen services; to be populated by our proxy in xen.
+ */
+
+xen_call_vector_t nsXenVector;
+
+static inline void
+nsInjectException(int trap);
+
+static inline void
+nsHypercallPageInitialize(void *hypercallPage, nsPartition_t *curp);
+
+static inline void
+nsInitEventPage(void *siefPage);
+
+static inline void
+nsInitMessagePage(void *simPage);
+
+/*
+ * static int __init nsExtensionInit(void)
+ * Initialize the extensiom module.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static int __init nsExtensionInit(void)
+{
+ int retVal;
+ retVal = hvm_ext_register(1, &nsExtensionVector, &nsXenVector);
+ NS_ASSERT(retVal == 0);
+ nsXenVector.extPrintk("NS Extension Initialized\n");
+ return 0;
+}
+__initcall(nsExtensionInit);
+
+/*
+ * Our lock primitives.
+ */
+/*
+ * void
+ * nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Acquire the specified lock.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+void
+nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+{
+ NS_ASSERT(nsLock->owner != vcpup);
+ spin_lock_irqsave(&nsLock->spinLock, nsLock->flags);
+ nsLock->owner = vcpup;
+ nsLock->retAddr = __builtin_return_address(0);
+ vcpup->nsVcplockDepth++;
+}
+
+/*
+ * void
+ * nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Release the specified spin lock.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+void
+nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+{
+ NS_ASSERT((nsLock->owner == vcpup));
+ nsLock->owner = NULL;
+ vcpup->nsVcplockDepth--;
+ NS_ASSERT(vcpup->nsVcplockDepth >= 0);
+ spin_unlock_irqrestore(&nsLock->spinLock, nsLock->flags);
+}
+
+/*
+ * void
+ * nsLockInit(nsSpinLock_t *nsLock)
+ * Initialize the specified spin lock.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+void
+nsLockInit(nsSpinLock_t *nsLock)
+{
+ spin_lock_init(&nsLock->spinLock);
+ nsLock->owner = NULL;
+ nsLock->retAddr = NULL;
+}
+
+/*
+ * static inline void nsWriteGuestIdMsr(nsPartition_t *curp,
+ * nsVcpu_t *curVcpu,
+ * u64 msrContent)
+ * Write the guest ID.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsWriteGuestIdMsr(nsPartition_t *curp, nsVcpu_t *curVcpu, u64 msrContent)
+{
+ curp->nsGuestIdMsr = msrContent;
+ if (curp->nsGuestIdMsr == 0) {
+ /*
+ * Guest has cleared the guest ID;
+ * clear the hypercall page.
+ */
+ if (curp->nsHypercallMsr) {
+ curVcpu->nsVcpuFlags &= ~NS_VCPU_UP;
+ }
+ }
+}
+
+/*
+ * static inline void nsWriteHypercallMsr(nsPartition_t *curp,
+ * nsVcpu_t *curVcpu,
+ * u64 msrContent)
+ * Write hypercall msr.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline void
+nsWriteHypercallMsr(nsPartition_t *curp,
+ nsVcpu_t *curVcpu,
+ u64 msrContent)
+{
+ unsigned long gmfn;
+ void *hypercallPage;
+ struct domain *d = curVcpu->nsVcpXenVcpu->domain;
+
+ nsLockAcquire(curVcpu, &curp->nsLock);
+ gmfn = (msrContent >> 12);
+ if (curp->nsGuestIdMsr == 0) {
+ /* Nothing to do if the guest is not registered*/
+ nsLockRelease(curVcpu, &curp->nsLock);
+ return;
+ }
+ /*
+ * Guest is registered; see if we can turn-on the
+ * hypercall page.
+ * XXXKYS: Can the guest write the GPA in one call and
+ * subsequently enable it? Check. For now assume that all the
+ * info is specified in one call.
+ */
+ if (((u32)msrContent & (0x00000001)) == 0) {
+ /*
+ * The client is not enabling the hypercall; just
+ * ignore everything.
+ */
+ nsLockRelease(curVcpu, &curp->nsLock);
+ return;
+ }
+ hypercallPage = nsXenVector.extGetVirtFromGmfn(d,gmfn);
+ if (hypercallPage == NULL) {
+ /*
+ * The guest specified a bogus GPA; inject a GP fault
+ * into the guest.
+ */
+ nsInjectException(TRAP_gp_fault);
+ nsLockRelease(curVcpu, &curp->nsLock);
+ return;
+ }
+ nsHypercallPageInitialize(hypercallPage, curp);
+ curp->nsHypercallMfn = nsXenVector.extGetMfnFromGmfn(d, gmfn);
+#ifdef CONFIG_DOMAIN_PAGE
+ nsXenVector.extUnmapDomainPage(hypercallPage);
+#endif
+ curp->nsHypercallMsr = msrContent;
+ nsLockRelease(curVcpu, &curp->nsLock);
+ curVcpu->nsVcpuFlags |= NS_VCPU_UP;
+}
+
+/*
+ * static inline void nsWriteSxMsr(uint32_t idx, nsPartition_t *curp,
+ * nsVcpu_t *curVcpu,
+ * u64 msrContent)
+ * Write SIEFP or SIMP msr.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline void nsWriteSxMsr(uint32_t idx, nsPartition_t *curp,
+ nsVcpu_t *curVcpu,
+ u64 msrContent)
+{
+ unsigned long gmfn;
+ void *sxPage;
+ struct domain *d = curVcpu->nsVcpXenVcpu->domain;
+ gmfn = (msrContent >> 12);
+ /*
+ * Can the client enable the siefp and specify
+ * the base address in two
+ * different calls? XXXKYS: For now assume
+ * that it is done in one call.
+ */
+ if (!((u32)msrContent & (0x00000001))) {
+ /*
+ * The client is not enabling the sx page; just
+ * ignore everything.
+ */
+ return;
+ }
+ sxPage = nsXenVector.extGetVirtFromGmfn(d, gmfn);
+ if (sxPage == NULL) {
+ /*
+ * The guest specified a bogus GPA; inject a GP fault
+ * into the guest.
+ */
+ nsInjectException(TRAP_gp_fault);
+ return;
+ }
+ switch (idx) {
+ case NS_MSR_SIEFP:
+ nsInitEventPage(sxPage);
+ curVcpu->nsVcpSIefpMsr = msrContent;
+ curVcpu->nsVcpSiefPage = sxPage;
+ break;
+ case NS_MSR_SIMP:
+ nsInitMessagePage(sxPage);
+ curVcpu->nsVcpSimpMsr = msrContent;
+ curVcpu->nsVcpSimPage = sxPage;
+ break;
+ }
+
+}
+
+/*
+ * Time this domain booted.
+ */
+s_time_t nsDomainBootTime;
+
+/*
+ * static inline u64
+ * nsGetTimeSinceDomainBoot(void)
+ * Retrieve the time since boot in 100ns units.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline u64
+nsGetTimeSinceDomainBoot(void)
+{
+ u64 curTime = nsXenVector.extGetTimeSinceBoot();
+ return ((curTime - nsDomainBootTime)/100) ;
+}
+
+/*
+ * static inline int
+ * nsCallFromBios(struct cpu_user_regs *regs)
+ * Check if the caller is in the right state to consumE the services of the
+ * extension module.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline int
+nsCallFromBios(struct cpu_user_regs *regs)
+{
+ if (hvm_paging_enabled(current)) {
+ return (0);
+ } else {
+ return (1);
+ }
+}
+
+/*
+ * static inline void
+ * nsInjectException(int trap)
+ * Injecct the specified exception into the invoking virtual CPU.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline void
+nsInjectException(int trap)
+{
+ nsXenVector.hvmFuncTable->inject_exception(trap, 0, 0);
+}
+
+
+/*
+ * static inline int
+ * nsOsRegistered(void)
+ * Check to see if the guest has registered itself with the Novell Shim.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline int
+nsOsRegistered(void)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ return (curp->nsGuestIdMsr != 0?1:0);
+}
+
+
+/*
+ * static inline void
+ * nsSetPartitionPrivileges(nsPartition_t *nspp)
+ * Set the partitionwide privileges. Currently it is harcoded.
+ * We could perhaps make this an attribute of the domain and have the
+ * configuration tools manage it.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline void
+nsSetPartitionPrivileges(nsPartition_t *nspp)
+{
+ /*
+ * This is based on the hypervisor spec under section 5.2.3.
+ */
+ nspp->nsPrivileges = 0x000000020000007f;
+}
+
+/*
+ * static inline u32
+ * nsGetRecommendations(void)
+ * Get the recommendations.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline u32
+nsGetRecommendations(void)
+{
+ /*
+ *For now we recommend all the features. Need to validate.
+ */
+ if ( paging_mode_hap(current->domain)) {
+ /*
+ * If HAP is enabled; the guest should not use TLB flush
+ * related enlightenments.
+ */
+ return (0x19);
+ } else {
+ return (0x1f);
+ }
+}
+
+/*
+ * static inline void
+ * nsSetPartitionFeatures(nsPartition_t *nspp)
+ * Set the partitionwide features. Currently it is harcoded.
+ * We could perhaps make this an attribute of the domain and have the
+ * configuration tools manage it.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline void
+nsSetPartitionFeatures(nsPartition_t *nspp)
+{
+ nspp->nsSupportedFeatures = 0x1f;
+}
+
+static inline u16
+nsGetGuestMajor(void)
+{
+ return (0);
+}
+static inline u16
+nsGetGuestMinor(void)
+{
+ return (0);
+}
+static inline u32
+nsGetGuestServicePack(void)
+{
+ return (0);
+}
+
+static inline u8
+nsGetGuestServiceBranchInfo(void)
+{
+ return (0);
+}
+static inline u32
+nsGetGuestServiceNumber(void)
+{
+ return (0);
+}
+
+/*
+ * static inline u32
+ * nsGetSupportedSyntheticMsrs(void)
+ * Get the synthetic MSRs supported by the Novell Shim. Currently
+ * it is hardcoded.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline u32
+nsGetSupportedSyntheticMsrs(void)
+{
+ /*
+ * All MSRS in the spec version 0.83 including RESET MSR.
+ */
+ return (0xff);
+}
+
+
+/*
+ * static inline u32
+ * nsGetMaxVcpusSupported(void)
+ * Retrieve the maximum vcpus supported.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline u32
+nsGetMaxVcpusSupported(void)
+{
+ return MAX_VIRT_CPUS;
+}
+
+/*
+ * static inline u32
+ * nsGetMaxLcpusSupported(void)
+ * Retrieve the maximum physical cpus supported.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline u32
+nsGetMaxLcpusSupported(void)
+{
+ return NR_CPUS;
+}
+
+
+/*
+ * static inline void
+ * nsReadIcr(u64 *icrContent)
+ * Read the ICR of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsReadIcr(u64 *icrContent)
+{
+ u32 icrLow, icrHigh;
+ u64 retVal;
+
+
+ icrLow = nsXenVector.mmIoHandler->read_handler(current,
+ (vlapic_base_address(vcpu_vlapic(current)) + 0x300), 4);
+ icrHigh = nsXenVector.mmIoHandler->read_handler(current,
+ (vlapic_base_address(vcpu_vlapic(current)) + 0x310), 4);
+ retVal = icrHigh;
+ *icrContent = ((retVal << 32) | icrLow);
+
+}
+
+/*
+ * static inline void
+ * nsReadTpr(u64 *tprContent)
+ * Read the TPR of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsReadTpr(u64 *tprContent)
+{
+ u32 tprLow;
+
+
+ tprLow = nsXenVector.mmIoHandler->read_handler(current,
+ (vlapic_base_address(vcpu_vlapic(current)) + 0x80), 4);
+ *tprContent = (u64)tprLow;
+
+}
+
+/*
+ * static inline void
+ * nsWriteEoi(u64 msrContent)
+ * Write the EOI register of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsWriteEoi(u64 msrContent)
+{
+ u32 eoi = (u32)msrContent;
+
+ nsXenVector.mmIoHandler->write_handler(current,
+ (vlapic_base_address(vcpu_vlapic(current)) + 0xb0), 4, eoi);
+
+}
+
+/*
+ * static inline void
+ * nsWriteIcr(u64 msrContent)
+ * Write the ICR register of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsWriteIcr(u64 msrContent)
+{
+ u32 icrLow, icrHigh;
+ icrLow = (u32)msrContent;
+ icrHigh = (u32)(msrContent >> 32);
+
+ if (icrHigh != 0) {
+ nsXenVector.mmIoHandler->write_handler(current,
+ (vlapic_base_address(vcpu_vlapic(current)) + 0x310), 4,
+ icrHigh);
+ }
+ if (icrLow != 0) {
+ nsXenVector.mmIoHandler->write_handler(current,
+ (vlapic_base_address(vcpu_vlapic(current)) + 0x300), 4,
+ icrLow);
+ }
+
+}
+
+/*
+ * static inline void
+ * nsWriteTpr(u64 msrContent)
+ * Write the TPR register of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsWriteTpr(u64 msrContent)
+{
+ u32 tpr = (u32)msrContent;
+
+
+ nsXenVector.mmIoHandler->write_handler(current,
+ (vlapic_base_address(vcpu_vlapic(current)) + 0x80), 4, tpr);
+
+}
+
+/*
+ * static inline void
+ * nsHypercallPageInitialize(void *hypercallPage, nsPartition_t *curp)
+ * Initialize the hypercall page to support the Novell Shim Hypercalls.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsHypercallPageInitialize(void *hypercallPage, nsPartition_t *curp)
+{
+ char *p;
+
+ if (nsXenVector.hvmFuncTable->guest_x86_mode(current) == 8) {
+ curp->nsLongModeGuest = 1;
+ } else {
+ curp->nsLongModeGuest = 0;
+ }
+
+ memset(hypercallPage, 0, PAGE_SIZE);
+ p = (char *)(hypercallPage) ;
+ *(u8 *)(p + 0) = 0x0f; /* vmcall */
+ *(u8 *)(p + 1) = 0x01;
+ if (nsXenVector.extCpuIsIntel()) {
+ *(u8 *)(p + 2) = 0xc1;
+ } else {
+ *(u8 *)(p + 2) = 0xd9;
+ }
+ *(u8 *)(p + 3) = 0xc3; /* ret */
+}
+
+/*
+ * static inline void
+ * nsInitEventPage(void *siefPage)
+ * Initialize the per-vcpu event page.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsInitEventPage(void *siefPage)
+{
+ memset(siefPage, 0, PAGE_SIZE);
+}
+
+/*
+ * static inline void
+ * nsInitMessagePage(void *siefPage)
+ * Initialize the per-vcpu message page.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsInitMessagePage(void *simPage)
+{
+ memset(simPage, 0, PAGE_SIZE);
+}
+
+
+/*
+ * static inline void
+ * nsProcessMessageQ(nsPartition_t *curp, nsVcpu_t *curVcpu)
+ * Process the message queue.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsProcessMessageQ(nsPartition_t *curp, nsVcpu_t *curVcpu)
+{
+ /*
+ * XXXKYS: we currently do not support queued messages.
+ */
+}
+
+/*
+ * static inline void
+ * nsScheduleTimeOut(nsVcpTimerState_t *timer)
+ * Schedule a timeout based on the specified timer.
+ *
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsScheduleTimeOut(nsVcpTimerState_t *timer)
+{
+ /*
+ * We maintain the count in the units of 100ns. Furthermore,
+ * this is not relative to NOW() but rather absolute.
+ */
+ nsXenVector.extSetTimer(&timer->vcpuTimer, (timer->count * 100));
+}
+
+/*
+ * static void
+ * nsTimeOutHandler(void *arg)
+ * The timeout handler for Novell Shim/Adaptor.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static void
+nsTimeOutHandler(void *arg)
+{
+ nsVcpTimerState_t *timerData = arg;
+ nsVcpu_t *curVcpu = timerData->thisCpu;
+ int sIntNum;
+ int vector;
+ if (!(curVcpu->nsVcpSControlMsr & 0x9)) {
+ goto nsToPostProcess;
+ }
+ /*
+ * SynIC is enabled; do further processing. Timeouts are posted as
+ * messages; verify if the message page is enabled.
+ */
+ if (!(curVcpu->nsVcpSimpMsr & 0x1)) {
+ goto nsToPostProcess;
+ }
+ sIntNum = (((u32)(timerData->config >> 16)) & 0x0000000f);
+ /*
+ * First post the message and then optionally deal with the
+ * interrupt notification.
+ */
+ if (curVcpu->nsVcpSimPage == NULL) {
+ NS_PANIC("Novell Shim: Sim page not setup\n");
+ }
+ if ((((nsMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageType !=
+ nsMessageTypeNone) {
+ /*
+ * The message slot is not empty just silently return.
+ */
+ goto nsToPostProcess;
+ }
+ /*
+ * The slot is available; post the message.
+ */
+ (((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageType =
+ nsMessageTimerExpired;
+ (((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageSize =
+ sizeof(nsTimerMessage_t);
+ (((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).timerIndex =
+ timerData->timerIndex;
+ (((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).expirationTime =
+ timerData->count;
+ if ((curVcpu->nsVcpSIntMsr[sIntNum] >> 16) &0x1) {
+ /*
+ * The designated sintx register is masked; just return.
+ */
+ goto nsToPostProcess;
+ }
+ vector = ((u32)curVcpu->nsVcpSIntMsr[sIntNum] &0xff);
+
+ /*
+ * Now post the interrupt to the VCPU.
+ * XXXKYS: What is the delivery mode for interrupts delivered here.
+ * Check with Mike?
+ */
+ nsXenVector.extPostInterrupt(current, vector, APIC_DM_FIXED);
+
+ /*
+ * If auto eoi is set; deal with that.
+ */
+ if (((u32)(curVcpu->nsVcpSIntMsr[sIntNum] >> 16)) & 0x1) {
+ nsWriteEoi(0);
+ }
+
+nsToPostProcess:
+ /*
+ * Prior to returning, deal with all the post timeout issues.
+ */
+ if (((u32)(timerData->config)) & 0x00000002) {
+ NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats);
+ nsScheduleTimeOut(timerData);
+ }
+}
+
+/*
+ * static inline void
+ * nsTimerInit(nsVcpu_t *vcpup, int timer)
+ * Initialize the specified timer structure.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline void
+nsTimerInit(nsVcpu_t *vcpup, int timer)
+{
+ vcpup->nsVcpTimers[timer].config = 0;
+ vcpup->nsVcpTimers[timer].count = 0;
+ vcpup->nsVcpTimers[timer].thisCpu = vcpup;
+ vcpup->nsVcpTimers[timer].timerIndex = timer;
+ init_timer(&vcpup->nsVcpTimers[timer].vcpuTimer, nsTimeOutHandler,
+ &vcpup->nsVcpTimers[timer], current->processor);
+}
+
+/*
+ * static inline int
+ * nsAccessTimeRefCnt(nsPartition_t *curp, u64 *msrContent)
+ * Read the per-partition time base.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline int
+nsAccessTimeRefCnt(nsPartition_t *curp, u64 *msrContent)
+{
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_TIME_REF_CNT)) {
+ /*
+ * The partition does not have the privilege to
+ * read this; return error.
+ */
+ return (0);
+ }
+ *msrContent = nsGetTimeSinceDomainBoot();
+ return (1);
+}
+
+/*
+ * static void
+ * nsDoMigrateTimers(struct vcpu *v)
+ * The binding between this vcpu and the physical cpu has changed; migrate
+ * the timers for this vcpu.
+ *
+ * Calling/Exit State:
+ * The new binding is already in place.
+ */
+
+static void
+nsDoMigrateTimers(struct vcpu *v)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ nsVcpu_t *vcpup;
+ int i;
+ vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+
+ for (i=0; i<4; i++) {
+ nsXenVector.extMigrateTimer(&vcpup->nsVcpTimers[i].vcpuTimer,
+ v->processor);
+ }
+}
+
+/*
+ * static void
+ * nsVcpuUp(struct vcpu *v)
+ * A secondary processor has come on line; mark the processor as up.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static void
+nsVcpuUp(struct vcpu *v)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ nsVcpu_t *vcpup;
+ vcpup = &curp->nsVcpuState[v->vcpu_id];
+ vcpup->nsVcpuFlags |= NS_VCPU_UP;
+}
+
+/*
+ * static int
+ * nsDoHyperCall(struct cpu_user_regs *pregs)
+ * Intercept for implementing Extension hypercalls.
+ *
+ * Calling/Exit State:
+ * Based on the hypercall; the caller may give up the CPU while
+ * processing the hypercall. No locks should be held on entry and
+ * no locks will be held on return.
+ *
+ *
+ */
+
+static int
+nsDoHyperCall(struct cpu_user_regs *pregs)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ nsVcpu_t *vcpup;
+ int longModeGuest = curp->nsLongModeGuest;
+ unsigned long hypercallMfn;
+ unsigned long gmfn;
+ gmfn = (curp->nsHypercallMsr >> 12);
+
+ hypercallMfn = nsXenVector.extGetMfnFromGva(pregs->eip);
+
+ if (hypercallMfn == curp->nsHypercallMfn) {
+ u64 opcode, input, output, retVal;
+ vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+
+ /*
+ * This is an extension hypercall; process it; but first make
+ * sure that the CPU is in the right state for invoking
+ * the hypercall - protected mode at CPL 0.
+ */
+ if (nsInvalidCpuState()) {
+ nsInjectException(TRAP_gp_fault);
+ retVal = nsBuildHcallRetVal(NS_STATUS_INVALID_VP_STATE,
+ 0);
+ nsSetSysCallRetVal(pregs, longModeGuest, retVal);
+ return (1);
+ }
+ if (longModeGuest) {
+ opcode = pregs->ecx;
+ input = pregs->edx;
+ output = pregs->r8;
+ } else {
+ opcode =
+ ((((u64)pregs->edx) << 32) | ((u64)pregs->eax));
+ input =
+ ((((u64)pregs->ebx) << 32) | ((u64)pregs->ecx));
+ output =
+ ((((u64)pregs->edi) << 32) | ((u64)pregs->esi));
+ }
+ NS_ASSERT(vcpup->nsVcplockDepth == 0);
+ nsHandleHyperCall(opcode, input, output, &retVal);
+ nsSetSysCallRetVal(pregs, longModeGuest, retVal);
+ NS_ASSERT(vcpup->nsVcplockDepth == 0);
+ return (1);
+ }
+ /*
+ * This hypercall page is not the page for extension.
+ */
+ return (0);
+}
+
+/*
+ * static int
+ * nsDomainCreate(struct domain *d)
+ * NS intercept for domain creation.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+
+static int
+nsDomainCreate(struct domain *d)
+{
+ nsPartition_t *nspp;
+ nspp = nsXenVector.extAllocMem(sizeof(nsPartition_t));
+ if (nspp == NULL) {
+ nsDebugPrint("Memory allocation failed\n");
+ return (1);
+ }
+ memset(nspp, 0, sizeof(*nspp));
+ nsLockInit(&nspp->nsLock);
+ /*
+ * Set the partition wide privilege; We can start with no privileges
+ * and progressively turn on fancier hypervisor features.
+ */
+ nsSetPartitionPrivileges(nspp);
+ nsSetPartitionFeatures(nspp);
+ /*
+ * Stash away pointer to our state in the hvm domain structure.
+ */
+ d->arch.hvm_domain.ext_handle = nspp;
+ nsDomainBootTime = nsXenVector.extGetTimeSinceBoot();
+ return (0);
+}
+
+
+
+/*
+ * static void
+ * nsDomainDestroy(struct domain *d)
+ * NS intercept for the domain destruction.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void
+nsDomainDestroy(struct domain *d)
+{
+ int i;
+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle;
+ nsXenVector.extPrintk("NS Domain Being Destroyed\n");
+ NS_ASSERT(curp != NULL);
+ nsXenVector.extPrintk("DUMP STATS\n");
+ nsXenVector.extPrintk("GFS cpucount is %d\n", curp->nsFlushState.cpuCount);
+ if (curp->nsFlushState.currentOwner != NULL) {
+ nsXenVector.extPrintk("GFS owner is %d\n", curp->nsFlushState.currentOwner->vcpu_id);
+ } else {
+ nsXenVector.extPrintk("GFS is free\n");
+ }
+ if (!cpus_empty(curp->nsFlushState.waiters)) {
+ nsXenVector.extPrintk("GFS: waiters not empty\n");
+ } else {
+ nsXenVector.extPrintk("GFS: waiters empty\n");
+ }
+ for (i=0; i < MAX_VIRT_CPUS; i++) {
+ if (d->vcpu[i] != NULL) {
+ nsPrintStats(curp, i);
+ }
+ }
+
+ nsXenVector.extFreeMem(d->arch.hvm_domain.ext_handle);
+ d->arch.hvm_domain.ext_handle = NULL;
+}
+
+/*
+ * static int
+ * nsVcpuInitialize(struct vcpu *v)
+ * NS intercept for vcpu creation.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static int
+nsVcpuInitialize(struct vcpu *v)
+{
+ nsVcpu_t *vcpup;
+ nsPartition_t *curp = v->domain->arch.hvm_domain.ext_handle;
+ int i;
+ vcpup = &curp->nsVcpuState[v->vcpu_id];
+ atomic_inc(&curp->nsNumVcpusActive);
+ if (v->vcpu_id == 0) {
+ vcpup->nsVcpuFlags |= NS_VCPU_BOOT_CPU;
+ }
+ /*
+ * Initialize all the synthetic MSRs corresponding to this VCPU.
+ * Note that all state is set to 0 to begin
+ * with.
+ */
+ vcpup->nsVcpSVersionMsr = 0x00000001;
+ /*
+ * Initialize the synthetic timet structures.
+ */
+ for (i=0; i < 4; i++) {
+ nsTimerInit(vcpup, i);
+ }
+ /*
+ * Setup the input page for handling hypercalls.
+ *
+ */
+ vcpup->nsVcpInputBufferPage =
+ nsXenVector.extAllocDomHeapPage();
+ if (vcpup->nsVcpInputBufferPage == NULL) {
+ nsDebugPrint("Memory allocation failed\n");
+ return (1);
+ }
+ vcpup->nsVcpInputBuffer =
+ nsXenVector.extGetVirtFromPagePtr(vcpup->nsVcpInputBufferPage);
+ if (vcpup->nsVcpInputBuffer == NULL) {
+ nsDebugPrint("Coud not get VA\n");
+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);
+ return (1);
+ }
+ memset(vcpup->nsVcpInputBuffer, 0, PAGE_SIZE);
+ vcpup->nsVcpOutputBufferPage =
+ nsXenVector.extAllocDomHeapPage();
+ if (vcpup->nsVcpOutputBufferPage == NULL) {
+ nsDebugPrint("Memory allocation failed\n");
+#ifdef CONFIG_DOMAIN_PAGE
+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer);
+#endif
+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);
+ return (1);
+ }
+ vcpup->nsVcpOutputBuffer =
+ nsXenVector.extGetVirtFromPagePtr(vcpup->nsVcpOutputBufferPage);
+ if (vcpup->nsVcpOutputBuffer == NULL) {
+ nsDebugPrint("Coud not get VA\n");
+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpOutputBufferPage);
+#ifdef CONFIG_DOMAIN_PAGE
+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer);
+#endif
+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);
+ return (1);
+ }
+ vcpup->nsVcpXenVcpu = v;
+ vcpup->nsVcpFlushRequest = 0;
+
+ return (0);
+}
+
+/*
+ * static void
+ * nsVcpuDestroy(struct vcpu *v)
+ * NS intercept for domain destruction.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void
+nsVcpuDestroy(struct vcpu *v)
+{
+ nsVcpu_t *vcpup;
+ nsPartition_t *curp = v->domain->arch.hvm_domain.ext_handle;
+ int i;
+
+ vcpup = &curp->nsVcpuState[v->vcpu_id];
+ atomic_dec(&curp->nsNumVcpusActive);
+ vcpup->nsVcpuFlags &= ~NS_VCPU_UP;
+ /*
+ * Get rid of the pages we have allocated for this VCPU.
+ */
+#ifdef CONFIG_DOMAIN_PAGE
+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpSiefPage);
+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpSimPage);
+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer);
+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpOutputBuffer);
+#endif
+
+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);
+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpOutputBufferPage);
+ /*
+ * Kill the timers
+ */
+ for (i=0; i < 4; i++) {
+ nsXenVector.extKillTimer(&vcpup->nsVcpTimers[i].vcpuTimer);
+ }
+ return;
+}
+
+/*
+ * static int nsVcpuSave(struct domain *d, hvm_domain_context_t *h)
+ * Save per-cpu shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ * None.
+ */
+static int
+nsVcpuSave(struct domain *d, hvm_domain_context_t *h)
+{
+ struct vcpu *v;
+ struct hvm_ns_veridian_cpu ctxt;
+
+ nsVcpu_t *vcpup;
+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle;
+ int i;
+
+ if (curp == NULL) {
+ return 0;
+ }
+ for_each_vcpu(d, v) {
+ vcpup = &curp->nsVcpuState[v->vcpu_id];
+
+ NS_ASSERT(vcpup->nsVcplockDepth == 0);
+ NS_ASSERT(vcpup->nsVcpFlushRequest == 0);
+ NS_ASSERT(vcpup->nsVcpWaitingOnGFS == 0);
+ NS_ASSERT(vcpup->nsVcpFlushPending == 0);
+ NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0);
+ /*
+ * We don't need to save state for a
+ * vcpu that is down; the restore
+ * code will leave it down if there is nothing saved.
+ */
+ if ( test_bit(_VPF_down, &v->pause_flags) )
+ continue;
+ ctxt.control_msr = vcpup->nsVcpSControlMsr;
+ ctxt.version_msr = vcpup->nsVcpSVersionMsr;
+ ctxt.sief_msr = vcpup->nsVcpSIefpMsr;
+ ctxt.simp_msr = vcpup->nsVcpSimpMsr;
+ ctxt.eom_msr = vcpup->nsVcpEomMsr;
+ for (i=0; i < 16; i++)
+ ctxt.int_msr[i] = vcpup->nsVcpSIntMsr[i];
+ for (i=0; i < 4; i++) {
+ ctxt.timers[i].config = vcpup->nsVcpTimers[i].config;
+ /*
+ * Save the count in units of 100ns relative to NOW()
+ * When we restore we will add NOW() to properly
+ * account for the elapsed time when the timer was
+ * active.
+ */
+ if (vcpup->nsVcpTimers[i].count > ((NOW())/100)) {
+ ctxt.timers[i].count =
+ (vcpup->nsVcpTimers[i].count - ((NOW())/100));
+ } else {
+ ctxt.timers[i].count = 0;
+ }
+ }
+ if ( hvm_save_entry(NS_VERIDIAN_CPU,
+ v->vcpu_id, h, &ctxt) != 0 )
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * static int nsVcpuRestore(struct domain *d, hvm_domain_context_t *h)
+ * Restore per-cpu shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ * None.
+ */
+static int
+nsVcpuRestore(struct domain *d, hvm_domain_context_t *h)
+{
+ int vcpuid, i;
+ struct hvm_ns_veridian_cpu ctxt;
+
+ nsVcpu_t *vcpup;
+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle;
+
+ if (curp == NULL) {
+ return 0;
+ }
+ /* Which vcpu is this? */
+ vcpuid = hvm_load_instance(h);
+ vcpup = &curp->nsVcpuState[vcpuid];
+ NS_ASSERT(vcpup != NULL);
+ if ( hvm_load_entry(NS_VERIDIAN_CPU, h, &ctxt) != 0 )
+ return -22;
+
+ vcpup->nsVcpSControlMsr = ctxt.control_msr;
+ vcpup->nsVcpSVersionMsr = ctxt.version_msr;
+
+ nsWriteSxMsr(NS_MSR_SIEFP, curp, vcpup, ctxt.sief_msr);
+ nsWriteSxMsr(NS_MSR_SIMP, curp, vcpup, ctxt.simp_msr);
+
+ vcpup->nsVcpEomMsr = ctxt.eom_msr;
+ for (i=0; i<16; i++)
+ vcpup->nsVcpSIntMsr[i] = ctxt.int_msr[i];
+ for (i=0; i < 4; i++) {
+ vcpup->nsVcpTimers[i].config = ctxt.timers[i].config;
+ vcpup->nsVcpTimers[i].count =
+ (ctxt.timers[i].count + ((NOW())/100));
+ if ((vcpup->nsVcpTimers[i].config | 0x9)) {
+ /*
+ * XXXKYS: Some issues with regards to time
+ * management here:
+ * 1) We will ignore the elapsed wall clock time
+ * when the domain was not running.
+ * 2) Clearly we should account fot the time that
+ * has elapsed when the domain was running with
+ * respect to the timeouts that were scheduled
+ * prior to saving the domain.
+ * We will deal with on the save side.
+ */
+ nsScheduleTimeOut(&vcpup->nsVcpTimers[i]);
+ NS_STATS_COLLECT(NS_TIMEOUTS, &vcpup->nsVcpStats);
+ }
+ }
+
+ vcpup->nsVcpuFlags |= NS_VCPU_UP;
+ return 0;
+}
+
+
+
+/*
+ * static int nsDomSave(struct domain *d, hvm_domain_context_t *h)
+ * Save per-domain shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ * None.
+ */
+
+static int
+nsDomSave(struct domain *d, hvm_domain_context_t *h)
+{
+ struct hvm_ns_veridian_dom ctxt;
+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle;
+
+ if (curp == NULL) {
+ return 0;
+ }
+
+ ctxt.guestid_msr = curp->nsGuestIdMsr;
+ ctxt.hypercall_msr = curp->nsHypercallMsr;
+ ctxt.long_mode = curp->nsLongModeGuest;
+ ctxt.pad0 = 0;
+ return (hvm_save_entry(NS_VERIDIAN_DOM, 0, h, &ctxt));
+}
+
+/*
+ * static int nsDomRestore(struct domain *d, hvm_domain_context_t *h)
+ * Restore per-domain shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ * None.
+ */
+
+static int
+nsDomRestore(struct domain *d, hvm_domain_context_t *h)
+{
+ struct hvm_ns_veridian_dom ctxt;
+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle;
+
+ if (curp == NULL) {
+ return 0;
+ }
+
+ if ( hvm_load_entry(NS_VERIDIAN_DOM, h, &ctxt) != 0 )
+ return -22;
+ curp->nsGuestIdMsr = ctxt.guestid_msr;
+ curp->nsHypercallMsr = ctxt.hypercall_msr;
+ curp->nsLongModeGuest = ctxt.long_mode;
+ curp->nsHypercallMfn =
+ nsXenVector.extGetMfnFromGmfn(d, (ctxt.hypercall_msr >> 12));
+
+ return 0;
+}
+
+HVM_REGISTER_SAVE_RESTORE(NS_VERIDIAN_DOM, nsDomSave, nsDomRestore,
+ 1, HVMSR_PER_DOM);
+
+
+HVM_REGISTER_SAVE_RESTORE(NS_VERIDIAN_CPU, nsVcpuSave , nsVcpuRestore,
+ 1, HVMSR_PER_VCPU);
+
+
+/*
+ * static int
+ * nsPreProcessCpuIdLeaves(unsigned int input, struct cpu_user_regs *regs)
+ *
+ * Preprocess cpuid leaves. Both xen and Veridian use identical cpuid
+ * leaves for getting info from the hypervisor.
+ *
+ * Calling exit state:
+ * None.
+ */
+static int
+nsPreProcessCpuIdLeaves(unsigned int input, struct cpu_user_regs *regs)
+{
+ uint32_t idx;
+ struct domain *d = current->domain;
+ int extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
+
+ if (extid == 1) {
+ /*
+ * Enlightened Windows guest; need to remap and handle
+ * leaves used by PV front-end drivers.
+ */
+ if ((input >= 0x40000000) && (input <= 0x40000005)) {
+ return (0);
+ }
+ /*
+ * PV drivers use cpuid to query the hypervisor for details. On
+ * Windows we will use the following leaves for this:
+ *
+ * 4096: VMM Sinature (corresponds to 0x40000000 on Linux)
+ * 4097: VMM Version (corresponds to 0x40000001 on Linux)
+ * 4098: Hypercall details (corresponds to 0x40000002 on Linux)
+ */
+ if ((input >= 0x40001000) && (input <= 0x40001002)) {
+ idx = (input - 0x40001000);
+ switch (idx) {
+ case 0:
+ regs->eax = 0x40000002; /* Largest leaf */
+ regs->ebx = 0x566e6558;/*Signature 1: "XenV" */
+ regs->ecx = 0x65584d4d; /*Signature 2: "MMXe" */
+ regs->edx = 0x4d4d566e; /*Signature 3: "nVMM"*/
+ break;
+ case 1:
+ regs->eax =
+ (XEN_VERSION << 16) |
+ XEN_SUBVERSION;
+ regs->ebx = 0; /* Reserved */
+ regs->ecx = 0; /* Reserved */
+ regs->edx = 0; /* Reserved */
+ break;
+
+ case 2:
+ regs->eax = 1; /*Number of hypercall-transfer pages*/
+ /*In linux this is 0x40000000 */
+ regs->ebx = 0x40001000; /* MSR base address */
+ regs->ecx = 0; /* Features 1 */
+ regs->edx = 0; /* Features 2 */
+ break;
+ }
+ }
+ return (1);
+ } else {
+ /*
+ * For now this is all other "enlightened guests"
+ */
+ if ((input >= 0x40000000) && (input <= 0x40000002)) {
+ /*
+ * These leaves have already been correctly
+ * processed; just return.
+ */
+ return (1);
+ }
+ return (0);
+ }
+}
+
+/*
+ * static int
+ * nsDoCpuId(unsigned int input, struct cpu_user_regs *regs)
+ * NS intercept for cpuid instruction
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static int
+nsDoCpuId(unsigned int input, struct cpu_user_regs *regs)
+{
+ uint32_t idx;
+
+ /*
+ * hvmloader uses cpuid to set up a hypercall page; we don't want to
+ * intercept calls coming from the bootstrap (bios) code in the HVM
+ * guest; we discriminate based on the instruction pointer.
+ */
+ if (nsCallFromBios(regs)) {
+ /*
+ * We don't intercept this.
+ */
+ return (0);
+ }
+
+ if (input == 0x00000001) {
+ regs->ecx = (regs->ecx | 0x80000000);
+ return (1);
+ }
+
+ if (nsPreProcessCpuIdLeaves(input, regs)) {
+ return (0);
+ }
+ idx = (input - 0x40000000);
+
+ switch (idx) {
+ case 0:
+ /*
+ * 0x40000000: Hypervisor identification.
+ */
+ regs->eax = 0x40000005; /* For now clamp this */
+ regs->ebx = 0x65766f4e; /* "Nove" */
+ regs->ecx = 0x68536c6c; /* "llSh" */
+ regs->edx = 0x76486d69; /* "imHv" */
+ break;
+
+ case 1:
+ /*
+ * 0x40000001: Hypervisor identification.
+ */
+ regs->eax = 0x31237648; /* "Hv#1*/
+ regs->ebx = 0; /* Reserved */
+ regs->ecx = 0; /* Reserved */
+ regs->edx = 0; /* Reserved */
+ break;
+ case 2:
+ /*
+ * 0x40000002: Guest Info
+ */
+ if (nsOsRegistered()) {
+ regs->eax = nsGetGuestMajor();
+ regs->ebx =
+ (nsGetGuestMajor() << 16) | nsGetGuestMinor();
+ regs->ecx = nsGetGuestServicePack();
+ regs->edx =
+ (nsGetGuestServiceBranchInfo() << 24) |
+ nsGetGuestServiceNumber();
+ } else {
+ regs->eax = 0;
+ regs->ebx = 0;
+ regs->ecx = 0;
+ regs->edx = 0;
+ }
+ break;
+ case 3:
+ /*
+ * 0x40000003: Feature identification.
+ */
+ regs->eax = nsGetSupportedSyntheticMsrs();
+ /* We only support AcessSelfPartitionId bit 1 */
+ regs->ebx = 0x2;
+ regs->ecx = 0; /* Reserved */
+ regs->edx = 0; /*No MWAIT (bit 0), No debugging (bit 1)*/
+ break;
+ case 4:
+ /*
+ * 0x40000004: Imlementation recommendations.
+ */
+ regs->eax = nsGetRecommendations();
+ regs->ebx = 0; /* Reserved */
+ regs->ecx = 0; /* Reserved */
+ regs->edx = 0; /* Reserved */
+ break;
+ case 5:
+ /*
+ * 0x40000005: Implementation limits.
+ * Currently we retrieve maximum number of vcpus and
+ * logical processors (hardware threads) supported.
+ */
+ regs->eax = nsGetMaxVcpusSupported();
+ regs->ebx = nsGetMaxLcpusSupported();
+ regs->ecx = 0; /* Reserved */
+ regs->edx = 0; /* Reserved */
+ break;
+
+ default:
+ /*
+ * We don't handle this leaf.
+ */
+ return (0);
+
+ }
+ return (1);
+}
+
+/*
+ * static int
+ * nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs)
+ * NS intercept for reading MSRS.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static int
+nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ unsigned int vcpuIndex = nsGetCurrentVcpuIndex();
+ u64 msrContent = 0;
+ nsVcpu_t *curVcpu = &curp->nsVcpuState[vcpuIndex];
+ int synInt, timer;
+ struct domain *d = current->domain;
+ int extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
+ u64 timerCount;
+
+ /*
+ * hvmloader uses rdmsr; we don't want to
+ * intercept calls coming from the bootstrap (bios) code in the HVM
+ * guest; we descriminate based on the instruction pointer.
+ */
+ if (nsCallFromBios(regs)) {
+ /*
+ * We don't intercept this.
+ */
+ return (0);
+ }
+ if (extid > 1) {
+ /*
+ * For now this is all other "Enlightened" operating systems
+ * other than Longhorn.
+ */
+ if (idx == 0x40000000) {
+ /*
+ * PV driver hypercall setup. Let xen handle this.
+ */
+ return (0);
+ }
+ if (idx == 0x40001000) {
+ idx = 0x40000000;
+ }
+ }
+ switch (idx) {
+ case NS_MSR_GUEST_OS_ID:
+ nsLockAcquire(curVcpu, &curp->nsLock);
+ regs->eax = (u32)(curp->nsGuestIdMsr & 0xFFFFFFFF);
+ regs->edx = (u32)(curp->nsGuestIdMsr >> 32);
+ nsLockRelease(curVcpu, &curp->nsLock);
+ break;
+ case NS_MSR_HYPERCALL:
+ nsLockAcquire(curVcpu, &curp->nsLock);
+ regs->eax = (u32)(curp->nsHypercallMsr & 0xFFFFFFFF);
+ regs->edx = (u32)(curp->nsHypercallMsr >> 32);
+ nsLockRelease(curVcpu, &curp->nsLock);
+ if ((((u32)curp->nsHypercallMsr) & (0x00000001)) != 0) {
+ curVcpu->nsVcpuFlags |= NS_VCPU_UP;
+ }
+ break;
+ case NS_MSR_VP_INDEX:
+ regs->eax = (u32)(vcpuIndex);
+ regs->edx = (u32)(0x0);
+ break;
+ case NS_MSR_ICR:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+ goto msrReadError;
+ }
+ nsReadIcr(&msrContent);
+ NS_STATS_COLLECT(NS_ICR_READ, &curVcpu->nsVcpStats);
+ regs->eax = (u32)(msrContent & 0xFFFFFFFF);
+ regs->edx = (u32)(msrContent >> 32);
+ break;
+ case NS_MSR_TPR:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+ goto msrReadError;
+ }
+ nsReadTpr(&msrContent);
+ NS_STATS_COLLECT(NS_TPR_READ, &curVcpu->nsVcpStats);
+ regs->eax = (u32)(msrContent & 0xFFFFFFFF);
+ regs->edx = (u32)(msrContent >> 32);
+ break;
+ /*
+ * The following synthetic MSRs are implemented in the Novell Shim.
+ */
+ case NS_MSR_SCONTROL:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrReadError;
+ }
+ regs->eax = (u32)(curVcpu->nsVcpSControlMsr & 0xFFFFFFFF);
+ regs->edx = (u32)(curVcpu->nsVcpSControlMsr >> 32);
+ break;
+ case NS_MSR_SVERSION:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrReadError;
+ }
+ regs->eax = (u32)(curVcpu->nsVcpSVersionMsr & 0xFFFFFFFF);
+ regs->edx = (u32)(curVcpu->nsVcpSVersionMsr >> 32);
+ break;
+ case NS_MSR_SIEFP:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrReadError;
+ }
+ regs->eax = (u32)(curVcpu->nsVcpSIefpMsr & 0xFFFFFFFF);
+ regs->edx = (u32)(curVcpu->nsVcpSIefpMsr >> 32);
+ break;
+ case NS_MSR_SIMP:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrReadError;
+ }
+ regs->eax = (u32)(curVcpu->nsVcpSimpMsr & 0xFFFFFFFF);
+ regs->edx = (u32)(curVcpu->nsVcpSimpMsr >> 32);
+ break;
+ case NS_MSR_SINT0:
+ synInt = 0;
+ goto synIntReadProcess;
+ case NS_MSR_SINT1:
+ synInt = 1;
+ goto synIntReadProcess;
+ case NS_MSR_SINT2:
+ synInt = 2;
+ goto synIntReadProcess;
+ case NS_MSR_SINT3:
+ synInt = 3;
+ goto synIntReadProcess;
+ case NS_MSR_SINT4:
+ synInt = 4;
+ goto synIntReadProcess;
+ case NS_MSR_SINT5:
+ synInt = 5;
+ goto synIntReadProcess;
+ case NS_MSR_SINT6:
+ synInt = 6;
+ goto synIntReadProcess;
+ case NS_MSR_SINT7:
+ synInt = 7;
+ goto synIntReadProcess;
+ case NS_MSR_SINT8:
+ synInt = 8;
+ goto synIntReadProcess;
+ case NS_MSR_SINT9:
+ synInt = 9;
+ goto synIntReadProcess;
+ case NS_MSR_SINT10:
+ synInt = 10;
+ goto synIntReadProcess;
+ case NS_MSR_SINT11:
+ synInt = 11;
+ goto synIntReadProcess;
+ case NS_MSR_SINT12:
+ synInt = 12;
+ goto synIntReadProcess;
+ case NS_MSR_SINT13:
+ synInt = 13;
+ goto synIntReadProcess;
+ case NS_MSR_SINT14:
+ synInt = 14;
+ goto synIntReadProcess;
+ case NS_MSR_SINT15:
+ synInt = 15;
+synIntReadProcess:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrReadError;
+ }
+ regs->eax = (u32)(curVcpu->nsVcpSIntMsr[synInt] & 0xFFFFFFFF);
+ regs->edx = (u32)(curVcpu->nsVcpSIntMsr[synInt] >> 32);
+ break;
+
+ case NS_MSR_SEOM:
+ /*
+ * This is a write only register; reads return 0.
+ */
+ regs->eax = 0;
+ regs->edx = 0;
+ break;
+ case NS_MSR_TIME_REF_COUNT:
+ if (!nsAccessTimeRefCnt(curp, &msrContent)) {
+ goto msrReadError;
+ }
+ regs->eax = (u32)(msrContent & 0xFFFFFFFF);
+ regs->edx = (u32)(msrContent >> 32);
+ break;
+ /*
+ * Synthetic timer MSRs.
+ */
+ case NS_MSR_TIMER0_CONFIG:
+ timer = 0;
+ goto processTimerConfigRead;
+ case NS_MSR_TIMER1_CONFIG:
+ timer = 1;
+ goto processTimerConfigRead;
+ case NS_MSR_TIMER2_CONFIG:
+ timer = 2;
+ goto processTimerConfigRead;
+ case NS_MSR_TIMER3_CONFIG:
+ timer = 3;
+processTimerConfigRead:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+ goto msrReadError;
+ }
+ regs->eax =
+ (u32)(curVcpu->nsVcpTimers[timer].config & 0xFFFFFFFF);
+ regs->edx =
+ (u32)(curVcpu->nsVcpTimers[timer].config >> 32);
+ break;
+ case NS_MSR_TIMER0_COUNT:
+ timer = 0;
+ goto processTimerCountRead;
+ case NS_MSR_TIMER1_COUNT:
+ timer = 1;
+ goto processTimerCountRead;
+ case NS_MSR_TIMER2_COUNT:
+ timer = 2;
+ goto processTimerCountRead;
+ case NS_MSR_TIMER3_COUNT:
+ timer = 3;
+processTimerCountRead:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+ goto msrReadError;
+ }
+ timerCount = curVcpu->nsVcpTimers[timer].count;
+ if (timerCount > ((NOW())/100)) {
+ timerCount -= ((NOW())/100);
+ } else {
+ timerCount = 0;
+ }
+ regs->eax =
+ (u32)(timerCount & 0xFFFFFFFF);
+ regs->edx =
+ (u32)(timerCount >> 32);
+ break;
+ case NS_MSR_PVDRV_HCALL:
+ regs->eax = 0;
+ regs->edx = 0;
+ break;
+ case NS_MSR_SYSTEM_RESET:
+ regs->eax = 0;
+ regs->edx = 0;
+ break;
+ default:
+ /*
+ * We did not handle the MSR address specified;
+ * let the caller figure out
+ * What to do.
+ */
+ return (0);
+ }
+ return (1);
+msrReadError:
+ /*
+ * Have to inject #GP fault.
+ */
+ nsInjectException(TRAP_gp_fault);
+ return (1);
+}
+
+/*
+ * static int
+ * nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs)
+ * NS intercept for writing MSRS.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static int
+nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ unsigned int vcpuIndex = nsGetCurrentVcpuIndex();
+ u64 msrContent = 0;
+ nsVcpu_t *curVcpu = &curp->nsVcpuState[vcpuIndex];
+ int synInt, timer;
+ struct domain *d = current->domain;
+ int extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
+
+ /*
+ * hvmloader uses wrmsr; we don't want to
+ * intercept calls coming from the bootstrap (bios) code in the HVM
+ * guest; we descriminate based on the instruction pointer.
+ */
+ if (nsCallFromBios(regs)) {
+ /*
+ * We don't intercept this.
+ */
+ return (0);
+ }
+ msrContent =
+ (u32)regs->eax | ((u64)regs->edx << 32);
+ if (extid > 1) {
+ /*
+ * For now this is all other "Enlightened" operating systems
+ * other than Longhorn.
+ */
+ if (idx == 0x40000000) {
+ /*
+ * PV driver hypercall setup. Let xen handle this.
+ */
+ return (0);
+ }
+ if (idx == 0x40001000) {
+ idx = 0x40000000;
+ }
+ }
+
+ switch (idx) {
+ case NS_MSR_GUEST_OS_ID:
+ nsWriteGuestIdMsr(curp, curVcpu, msrContent);
+ break;
+ case NS_MSR_HYPERCALL:
+ nsWriteHypercallMsr(curp, curVcpu, msrContent);
+ break;
+
+ case NS_MSR_VP_INDEX:
+ goto msrWriteError;
+
+ case NS_MSR_EOI:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+ goto msrWriteError;
+ }
+ nsWriteEoi(msrContent);
+ NS_STATS_COLLECT(NS_EOI_WRITE, &curVcpu->nsVcpStats);
+ break;
+ case NS_MSR_ICR:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+ goto msrWriteError;
+ }
+ nsWriteIcr(msrContent);
+ NS_STATS_COLLECT(NS_ICR_WRITE, &curVcpu->nsVcpStats);
+ break;
+ case NS_MSR_TPR:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+ goto msrWriteError;
+ }
+ nsWriteTpr(msrContent);
+ NS_STATS_COLLECT(NS_TPR_WRITE, &curVcpu->nsVcpStats);
+ break;
+
+ /*
+ * The following MSRs are synthetic MSRs supported in the Novell Shim.
+ */
+ case NS_MSR_SCONTROL:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrWriteError;
+ }
+ curVcpu->nsVcpSControlMsr = msrContent;
+ break;
+ case NS_MSR_SVERSION:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrWriteError;
+ }
+ /*
+ * This is a read-only MSR; generate #GP
+ */
+ nsInjectException(TRAP_gp_fault);
+ break;
+ case NS_MSR_SIEFP:
+ case NS_MSR_SIMP:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrWriteError;
+ }
+ nsWriteSxMsr(idx, curp, curVcpu, msrContent);
+ break;
+ case NS_MSR_SINT0:
+ synInt = 0;
+ goto synIntWrProcess;
+ case NS_MSR_SINT1:
+ synInt = 1;
+ goto synIntWrProcess;
+ case NS_MSR_SINT2:
+ synInt = 2;
+ goto synIntWrProcess;
+ case NS_MSR_SINT3:
+ synInt = 3;
+ goto synIntWrProcess;
+ case NS_MSR_SINT4:
+ synInt = 4;
+ goto synIntWrProcess;
+ case NS_MSR_SINT5:
+ synInt = 5;
+ goto synIntWrProcess;
+ case NS_MSR_SINT6:
+ synInt = 6;
+ goto synIntWrProcess;
+ case NS_MSR_SINT7:
+ synInt = 7;
+ goto synIntWrProcess;
+ case NS_MSR_SINT8:
+ synInt = 8;
+ goto synIntWrProcess;
+ case NS_MSR_SINT9:
+ synInt = 9;
+ goto synIntWrProcess;
+ case NS_MSR_SINT10:
+ synInt = 10;
+ goto synIntWrProcess;
+ case NS_MSR_SINT11:
+ synInt = 11;
+ goto synIntWrProcess;
+ case NS_MSR_SINT12:
+ synInt = 12;
+ goto synIntWrProcess;
+ case NS_MSR_SINT13:
+ synInt = 13;
+ goto synIntWrProcess;
+ case NS_MSR_SINT14:
+ synInt = 14;
+ goto synIntWrProcess;
+ case NS_MSR_SINT15:
+ synInt = 15;
+synIntWrProcess:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrWriteError;
+ }
+ /*
+ * XXXKYS: We assume that the synInt registers will be
+ * first written before the interrupt generation can occur.
+ * Specifically if SINT is masked all interrupts that may have
+ * been generated will be lost. Also when SINT is disabled;
+ * its effects will be only felt for subsequent interrupts that
+ * may be posted. XXXKYS: CHECK
+ */
+ curVcpu->nsVcpSIntMsr[synInt] = msrContent;
+ break;
+
+ case NS_MSR_SEOM:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrWriteError;
+ }
+ curVcpu->nsVcpEomMsr = msrContent;
+ nsProcessMessageQ(curp, curVcpu);
+ break;
+ case NS_MSR_TIME_REF_COUNT:
+ /*
+ * This is a read-only msr.
+ */
+ goto msrWriteError;
+
+ /*
+ * Synthetic timer MSRs.
+ */
+ case NS_MSR_TIMER0_CONFIG:
+ timer = 0;
+ goto processTimerConfig;
+ case NS_MSR_TIMER1_CONFIG:
+ timer = 1;
+ goto processTimerConfig;
+ case NS_MSR_TIMER2_CONFIG:
+ timer = 2;
+ goto processTimerConfig;
+ case NS_MSR_TIMER3_CONFIG:
+ timer = 3;
+processTimerConfig:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+ goto msrWriteError;
+ }
+ /*
+ * Assume that the client is going to write the whole msr.
+ */
+ if (!(msrContent & 0x9)) {
+ /*
+ * We are neither setting Auto Enable or Enable;
+ * silently exit.
+ * Should this be considered to turn off a
+ * timer that may be currently
+ * active; XXXKYS: Check. For now we are
+ * not doing anything here.
+ */
+ break;
+ }
+ if (!(((u32)(msrContent >> 16)) & 0x0000000f)) {
+ /*
+ * sintx is 0; clear the enable bit(s).
+ */
+ msrContent &= ~(0x1);
+ }
+ curVcpu->nsVcpTimers[timer].config = msrContent;
+ /*
+ * XXXKYS: Can any order be assumed here;
+ * should we just act on whatever is in the
+ * count register. For now act as if the count
+ * register is valid and act on it.
+ */
+ if (msrContent & 0x1) {
+ nsScheduleTimeOut(&curVcpu->nsVcpTimers[timer]);
+ NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats);
+ }
+ break;
+ case NS_MSR_TIMER0_COUNT:
+ timer = 0;
+ goto processTimerCount;
+ case NS_MSR_TIMER1_COUNT:
+ timer = 1;
+ goto processTimerCount;
+ case NS_MSR_TIMER2_COUNT:
+ timer = 2;
+ goto processTimerCount;
+ case NS_MSR_TIMER3_COUNT:
+ timer = 3;
+processTimerCount:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+ goto msrWriteError;
+ }
+ curVcpu->nsVcpTimers[timer].count =
+ (msrContent + ((NOW())/100));
+ if ((curVcpu->nsVcpTimers[timer].config | 0x9)) {
+ nsScheduleTimeOut(&curVcpu->nsVcpTimers[timer]);
+ NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats);
+ }
+
+ break;
+ case NS_MSR_PVDRV_HCALL:
+ /*
+ * Establish the hypercall page for PV drivers.
+ */
+ nsXenVector.extWrmsrHypervisorRegs(0x40000000, regs->eax,
+ regs->edx);
+ break;
+ case NS_MSR_SYSTEM_RESET:
+ /*
+ * Shutdown the domain/partition.
+ */
+ if (msrContent & 0x1) {
+ domain_shutdown(d, SHUTDOWN_reboot);
+ }
+ break;
+
+ default:
+ /*
+ * We did not handle the MSR address;
+ * let the caller deal with this.
+ */
+ return (0);
+ }
+ return (1);
+msrWriteError:
+ /*
+ * Have to inject #GP fault.
+ */
+ nsInjectException(TRAP_gp_fault);
+ return (1);
+}
[-- Attachment #5: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
2008-03-05 22:15 [PATCH][RFC] Supporting Enlightened Windows 2008 Server Ky Srinivasan
@ 2008-03-05 22:28 ` Daniel P. Berrange
2008-03-05 22:38 ` Daniel P. Berrange
2008-03-07 1:05 ` Ky Srinivasan
2008-03-06 7:28 ` Keir Fraser
1 sibling, 2 replies; 15+ messages in thread
From: Daniel P. Berrange @ 2008-03-05 22:28 UTC (permalink / raw)
To: Ky Srinivasan; +Cc: xen-devel
On Wed, Mar 05, 2008 at 03:15:19PM -0700, Ky Srinivasan wrote:
> I am attaching updated versions of the patches that I posted a couple
> of weeks ago. These have been merged up to the current unstable tip:
> changeset 17186:854b0704962b
>
> These patches have been tested on the unstable tip.
I'm not expert enough to comment on the HV extension implementation itself,
but in terms of the userspace side, the user visible configuration file
option 'extid=1' is pretty unpleasant. It is akin to a 'magic constant'
in C code - no understandable meaning at all.
I'd like to see it accept a named extension type - if its possible to
have multiple extensions per guest, then using a list instead of a scalar
would be better. So how about something closer to
extensions = [ "win2k8" ]
> +gopts.var('extid', val='EXTID',
> + fn=set_int, default=0,
> + use="Specify extention ID for a HVM domain.")
> +
And this help message could list the valid extension names
Regards,
Dan.
--
|=- Red Hat, Engineering, Emerging Technologies, Boston. +1 978 392 2496 -=|
|=- Perl modules: http://search.cpan.org/~danberr/ -=|
|=- Projects: http://freshmeat.net/~danielpb/ -=|
|=- GnuPG: 7D3B9505 F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 -=|
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
2008-03-05 22:28 ` Daniel P. Berrange
@ 2008-03-05 22:38 ` Daniel P. Berrange
2008-03-07 1:06 ` Ky Srinivasan
2008-03-07 1:05 ` Ky Srinivasan
1 sibling, 1 reply; 15+ messages in thread
From: Daniel P. Berrange @ 2008-03-05 22:38 UTC (permalink / raw)
To: Ky Srinivasan; +Cc: xen-devel
On Wed, Mar 05, 2008 at 10:28:28PM +0000, Daniel P. Berrange wrote:
> On Wed, Mar 05, 2008 at 03:15:19PM -0700, Ky Srinivasan wrote:
> > I am attaching updated versions of the patches that I posted a couple
> > of weeks ago. These have been merged up to the current unstable tip:
> > changeset 17186:854b0704962b
> >
> > These patches have been tested on the unstable tip.
>
> I'm not expert enough to comment on the HV extension implementation itself,
> but in terms of the userspace side, the user visible configuration file
> option 'extid=1' is pretty unpleasant. It is akin to a 'magic constant'
> in C code - no understandable meaning at all.
>
> I'd like to see it accept a named extension type - if its possible to
> have multiple extensions per guest, then using a list instead of a scalar
> would be better. So how about something closer to
>
> extensions = [ "win2k8" ]
Or is there some way you can have some super light weight trap / hook
always loaded, so when Win2k8 makes it first paravirt call, you can
then automatically enable the full extension ? That could let Xen
just 'do the right thing' without needing a config param, and without
having to fully enable the full extension for non-Win2k8 guests.
>Dan.
--
|=- Red Hat, Engineering, Emerging Technologies, Boston. +1 978 392 2496 -=|
|=- Perl modules: http://search.cpan.org/~danberr/ -=|
|=- Projects: http://freshmeat.net/~danielpb/ -=|
|=- GnuPG: 7D3B9505 F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 -=|
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
2008-03-05 22:38 ` Daniel P. Berrange
@ 2008-03-07 1:06 ` Ky Srinivasan
0 siblings, 0 replies; 15+ messages in thread
From: Ky Srinivasan @ 2008-03-07 1:06 UTC (permalink / raw)
To: Daniel P. Berrange; +Cc: xen-devel
>>> On Wed, Mar 5, 2008 at 5:38 PM, in message
<20080305223827.GI19306@redhat.com>, "Daniel P. Berrange" <berrange@redhat.com>
wrote:
> On Wed, Mar 05, 2008 at 10:28:28PM +0000, Daniel P. Berrange wrote:
>> On Wed, Mar 05, 2008 at 03:15:19PM -0700, Ky Srinivasan wrote:
>> > I am attaching updated versions of the patches that I posted a couple
>> > of weeks ago. These have been merged up to the current unstable tip:
>> > changeset 17186:854b0704962b
>> >
>> > These patches have been tested on the unstable tip.
>>
>> I'm not expert enough to comment on the HV extension implementation itself,
>> but in terms of the userspace side, the user visible configuration file
>> option 'extid=1' is pretty unpleasant. It is akin to a 'magic constant'
>> in C code - no understandable meaning at all.
>>
>> I'd like to see it accept a named extension type - if its possible to
>> have multiple extensions per guest, then using a list instead of a scalar
>> would be better. So how about something closer to
>>
>> extensions = [ "win2k8" ]
>
> Or is there some way you can have some super light weight trap / hook
> always loaded, so when Win2k8 makes it first paravirt call, you can
> then automatically enable the full extension ? That could let Xen
> just 'do the right thing' without needing a config param, and without
> having to fully enable the full extension for non-Win2k8 guests.
I considered this. Unfortunately, we have no control on the mechanisms used by windows to discover the hypervisor. Furthermore, CPUID leaves used by longhorn collide with CPUID leaves used by Xen for supporting hypervisor discovery for PV drivers. Since we have no control on what Microsoft may do here, I felt the best way might be to tag the guest so that we can interpret the hypercalls, CPUID calls and MSR (read/write) calls within the guest namespace.
Regards,
K. Y
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
2008-03-05 22:28 ` Daniel P. Berrange
2008-03-05 22:38 ` Daniel P. Berrange
@ 2008-03-07 1:05 ` Ky Srinivasan
1 sibling, 0 replies; 15+ messages in thread
From: Ky Srinivasan @ 2008-03-07 1:05 UTC (permalink / raw)
To: Daniel P. Berrange; +Cc: xen-devel
>>> On Wed, Mar 5, 2008 at 5:28 PM, in message
<20080305222828.GH19306@redhat.com>, "Daniel P. Berrange" <berrange@redhat.com>
wrote:
> On Wed, Mar 05, 2008 at 03:15:19PM -0700, Ky Srinivasan wrote:
>> I am attaching updated versions of the patches that I posted a couple
>> of weeks ago. These have been merged up to the current unstable tip:
>> changeset 17186:854b0704962b
>>
>> These patches have been tested on the unstable tip.
>
> I'm not expert enough to comment on the HV extension implementation itself,
> but in terms of the userspace side, the user visible configuration file
> option 'extid=1' is pretty unpleasant. It is akin to a 'magic constant'
> in C code - no understandable meaning at all.
Agreed; I could make the extension tag more descriptive.
Regards,
K. Y
>
> I'd like to see it accept a named extension type - if its possible to
> have multiple extensions per guest, then using a list instead of a scalar
> would be better. So how about something closer to
>
> extensions = [ "win2k8" ]
>
>> +gopts.var('extid', val='EXTID',
>> + fn=set_int, default=0,
>> + use="Specify extention ID for a HVM domain.")
>> +
>
> And this help message could list the valid extension names
>
>
> Regards,
> Dan.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
2008-03-05 22:15 [PATCH][RFC] Supporting Enlightened Windows 2008 Server Ky Srinivasan
2008-03-05 22:28 ` Daniel P. Berrange
@ 2008-03-06 7:28 ` Keir Fraser
2008-03-06 10:15 ` Tim Deegan
2008-03-07 1:08 ` Ky Srinivasan
1 sibling, 2 replies; 15+ messages in thread
From: Keir Fraser @ 2008-03-06 7:28 UTC (permalink / raw)
To: Ky Srinivasan, xen-devel
Personally I think the approach is ugly, and also you have not yet presented
evidence that supporting the Viridian paravirtualisations improves
performance. If it doesn't then it's a waste of time; if it does then it
raises the question of which hypercalls provide the benefit, and do we get a
smaller neater patch by supporting just those? One final comment is that the
TLB management code that this slaps on top of the core hypervisor looks a
bit scary to me. Tim Deegan may care to comment more on that.
-- Keir
On 5/3/08 22:15, "Ky Srinivasan" <ksrinivasan@novell.com> wrote:
> I am attaching updated versions of the patches that I posted a couple of weeks
> ago. These have been merged up to the current unstable tip: changeset
> 17186:854b0704962b
>
> These patches have been tested on the unstable tip.
>
> Kier, what are your thoughts on accepting these patches.
>
> Signed-off by K. Y. Srinivasan (ksrinivasan@novell.com)
>
> Regards,
>
> K. Y
>
>
>
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
2008-03-06 7:28 ` Keir Fraser
@ 2008-03-06 10:15 ` Tim Deegan
2008-03-07 1:10 ` [PATCH][RFC] Supporting Enlightened Windows 2008Server Ky Srinivasan
2008-03-07 1:08 ` Ky Srinivasan
1 sibling, 1 reply; 15+ messages in thread
From: Tim Deegan @ 2008-03-06 10:15 UTC (permalink / raw)
To: Keir Fraser; +Cc: Ky Srinivasan, xen-devel
At 07:28 +0000 on 06 Mar (1204788507), Keir Fraser wrote:
> Personally I think the approach is ugly, and also you have not yet presented
> evidence that supporting the Viridian paravirtualisations improves
> performance. If it doesn't then it's a waste of time; if it does then it
> raises the question of which hypercalls provide the benefit, and do we get a
> smaller neater patch by supporting just those? One final comment is that the
> TLB management code that this slaps on top of the core hypervisor looks a
> bit scary to me. Tim Deegan may care to comment more on that.
Some blame lies with the mismatch between the Viridian interface and
Xen's; there needs to be a way for the TLB flush hypercall to block
indefinitely. But I can't see how that turns into more than an atomic_t
for TlbFlushInhibit and a block-and-schedule operation. In the current
patches, there's quite a lot of locking and ownership going on as well.
I'm confused by the use of wait_on_xen_event_channel(0, xyz); event
channels don't seem to come into it.
I'll mention now, since I have the patch in front of me, that I dislike
the addition of an "ext_id" field to the HVM save format header and
associated special treatment in the save/restore code; you should be
able to figure out that this is a w2k8 domain from the presence of your
other records in the save file.
More generally, I agree that the approach is very heavyweight. I don't
see the need for a framework here, since there's no other proposed user
of it that would want the same interface. It seems to duplicate a lot
of things (does it really need its own spinlock implementation?)
It's certainly not in Xen coding style, even in the framework
implementation. (The MS habit of encoding scope and type information in
variable names annoys the heck out of me. Why does a lock field in an
nsPartition_t need to be called "nsLock"?)
The naming in general could do with a kicking -- calling everything
"Novell Shim" is understandable for historical reasons but not really
descriptive of its function. But maybe that can wait.
Tim
--
Tim Deegan <Tim.Deegan@citrix.com>
Principal Software Engineer, Citrix Systems (R&D) Ltd.
[Company #02300071, SL9 0DZ, UK.]
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH][RFC] Supporting Enlightened Windows 2008Server
2008-03-06 10:15 ` Tim Deegan
@ 2008-03-07 1:10 ` Ky Srinivasan
2008-03-07 11:57 ` Tim Deegan
` (2 more replies)
0 siblings, 3 replies; 15+ messages in thread
From: Ky Srinivasan @ 2008-03-07 1:10 UTC (permalink / raw)
To: Tim Deegan, Keir Fraser; +Cc: xen-devel
>>> On Thu, Mar 6, 2008 at 5:15 AM, in message
<20080306101542.GA22422@york.uk.xensource.com>, Tim Deegan
<Tim.Deegan@citrix.com> wrote:
> At 07:28 +0000 on 06 Mar (1204788507), Keir Fraser wrote:
>> Personally I think the approach is ugly, and also you have not yet presented
>> evidence that supporting the Viridian paravirtualisations improves
>> performance. If it doesn't then it's a waste of time; if it does then it
>> raises the question of which hypercalls provide the benefit, and do we get a
>> smaller neater patch by supporting just those? One final comment is that the
>> TLB management code that this slaps on top of the core hypervisor looks a
>> bit scary to me. Tim Deegan may care to comment more on that.
>
> Some blame lies with the mismatch between the Viridian interface and
> Xen's; there needs to be a way for the TLB flush hypercall to block
> indefinitely. But I can't see how that turns into more than an atomic_t
> for TlbFlushInhibit and a block-and-schedule operation. In the current
> patches, there's quite a lot of locking and ownership going on as well.
> I'm confused by the use of wait_on_xen_event_channel(0, xyz); event
> channels don't seem to come into it.
The Veridian API allows the guest to pass in a variable list of arguments to the TLB flush call ( HvFlushVirtualAddressList). Furthermore, both forms of the flush APIs (HvFlushVirtualAddressSpace and HvFlushVirtualAdressList) can specify a list of vcpus that should be involved in the flush process. So, as you have noted we will need a mechanism to co-ordinate the flush operation amongst the set of vcpus involved which means we need to be able give up the physical CPU in the hypervisor waiting for the flush to complete. I have used wait_on_xen_event_channel() to implement this synchronization. Since we don't preserve the stack state when we block in the hypervisor, I have used a seperate per-vcpu page for dealing with hypercall input parameters for calls that can potentially block in the hypervisor. From what I have seen, win2k8 server mostly specifies all the processors in ProcessorMask. So, I chose to implement TLB flush operations using a single serialization object that keeps track of both the set of vcpus involved in the flush operation as well as the list of pages to be flushed.
>
> I'll mention now, since I have the patch in front of me, that I dislike
> the addition of an "ext_id" field to the HVM save format header and
> associated special treatment in the save/restore code; you should be
> able to figure out that this is a w2k8 domain from the presence of your
> other records in the save file.
I can fix this.
>
>
>
> More generally, I agree that the approach is very heavyweight. I don't
> see the need for a framework here, since there's no other proposed user
> of it that would want the same interface.
I agree that there is no need to isolate the shim's dependence on the base Xen code (xen_call_vector_t). I implemented this shim a year ago and at that point it was not clear what Microsoft might do with the Veridian specification. So, clearly some of the design choices that I made a year ago may not be the right choice today. However, I still think that having an intercept framework where one can implement Veridian specific functionality without cluttering up the base Xen code is still the right approach.
> It seems to duplicate a lot
> of things (does it really need its own spinlock implementation?)
Clearly not! As I noted in an earlier email to Kier, I will be the first to admit that these patches require significant cleanup and I am willing to clean them up. A lot of what you see has historical baggage and I wanted to get some feedback before I invested the time to clean things up.
>
> It's certainly not in Xen coding style, even in the framework
> implementation. (The MS habit of encoding scope and type information in
> variable names annoys the heck out of me. Why does a lock field in an
> nsPartition_t need to be called "nsLock"?)
Agreed.
>
> The naming in general could do with a kicking -- calling everything
> "Novell Shim" is understandable for historical reasons but not really
> descriptive of its function. But maybe that can wait.
Agreed.
Regards,
K. Y
>
> Tim
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH][RFC] Supporting Enlightened Windows 2008Server
2008-03-07 1:10 ` [PATCH][RFC] Supporting Enlightened Windows 2008Server Ky Srinivasan
@ 2008-03-07 11:57 ` Tim Deegan
2008-03-07 13:19 ` Keir Fraser
2008-03-07 13:30 ` Keir Fraser
2 siblings, 0 replies; 15+ messages in thread
From: Tim Deegan @ 2008-03-07 11:57 UTC (permalink / raw)
To: Ky Srinivasan; +Cc: xen-devel, Keir Fraser
At 18:10 -0700 on 06 Mar (1204827047), Ky Srinivasan wrote:
> The Veridian API allows the guest to pass in a variable list of
> arguments to the TLB flush call
> (HvFlushVirtualAddressList). Furthermore, both forms of the flush APIs
> (HvFlushVirtualAddressSpace and HvFlushVirtualAdressList) can specify
> a list of vcpus that should be involved in the flush process.
I expect that the best way to implement the list-of-addresses feature on
Xen is to do a full TLB flush if there's more than one entry in the list
(that's different from the Hyper-V shadow pagetable design, where
explicit lists of addresses to flush make much more sense).
Then all you need are the existing Xen TLB flush operations, and some
means of gating them. If you use a generation counter with each vcpu's
inhibit bit, you can probably do that without the need for any locks.
> So, as
> you have noted we will need a mechanism to co-ordinate the flush
> operation amongst the set of vcpus involved which means we need to be
> able give up the physical CPU in the hypervisor waiting for the flush
> to complete. I have used wait_on_xen_event_channel() to implement this
> synchronization.
wait_on_xen_event_channel is more than you need, since you're not going
to wake on events.
> Since we don't preserve the stack state when we block
> in the hypervisor, I have used a seperate per-vcpu page for dealing
> with hypercall input parameters for calls that can potentially block
> in the hypervisor.
Xen already has a system of hypercall continuations that might help here.
Cheers,
Tim
--
Tim Deegan <Tim.Deegan@citrix.com>
Principal Software Engineer, Citrix Systems (R&D) Ltd.
[Company #02300071, SL9 0DZ, UK.]
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH][RFC] Supporting Enlightened Windows 2008Server
2008-03-07 1:10 ` [PATCH][RFC] Supporting Enlightened Windows 2008Server Ky Srinivasan
2008-03-07 11:57 ` Tim Deegan
@ 2008-03-07 13:19 ` Keir Fraser
2008-03-07 13:30 ` Keir Fraser
2 siblings, 0 replies; 15+ messages in thread
From: Keir Fraser @ 2008-03-07 13:19 UTC (permalink / raw)
To: Ky Srinivasan, Tim Deegan; +Cc: xen-devel
On 7/3/08 01:10, "Ky Srinivasan" <ksrinivasan@novell.com> wrote:
> The Veridian API allows the guest to pass in a variable list of arguments to
> the TLB flush call ( HvFlushVirtualAddressList). Furthermore, both forms of
> the flush APIs (HvFlushVirtualAddressSpace and HvFlushVirtualAdressList) can
> specify a list of vcpus that should be involved in the flush process. So, as
> you have noted we will need a mechanism to co-ordinate the flush operation
> amongst the set of vcpus involved which means we need to be able give up the
> physical CPU in the hypervisor waiting for the flush to complete. I have used
> wait_on_xen_event_channel() to implement this synchronization. Since we don't
> preserve the stack state when we block in the hypervisor, I have used a
> seperate per-vcpu page for dealing with hypercall input parameters for calls
> that can potentially block in the hypervisor. From what I have seen, win2k8
> server mostly specifies all the processors in ProcessorMask. So, I chose to
> implement TLB flush operations using a single serialization object that keeps
> track of both the set of vcpus involved in the flush operation as well as the
> list of pages to be flushed.
Clearly avoiding emulating IPI-to-all-CPUs is rather likely to be a win. But
is the very selective subset-of-CPUs and subset-of-addresses really that
useful? Do you get any significant win over just calling
hvmop_flush_tlb_all()?
Also we need to weigh up the likely penetration of NPT and EPT capable
processors by the time w2k8 is shipping in any volume. But even ignoring
that, I bet 95% of the benefit of this patch can be got with a much smaller
patch.
-- Keir
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH][RFC] Supporting Enlightened Windows 2008Server
2008-03-07 1:10 ` [PATCH][RFC] Supporting Enlightened Windows 2008Server Ky Srinivasan
2008-03-07 11:57 ` Tim Deegan
2008-03-07 13:19 ` Keir Fraser
@ 2008-03-07 13:30 ` Keir Fraser
2 siblings, 0 replies; 15+ messages in thread
From: Keir Fraser @ 2008-03-07 13:30 UTC (permalink / raw)
To: Ky Srinivasan, Tim Deegan; +Cc: xen-devel
On 7/3/08 01:10, "Ky Srinivasan" <ksrinivasan@novell.com> wrote:
> I agree that there is no need to isolate the shim's dependence on the base Xen
> code (xen_call_vector_t). I implemented this shim a year ago and at that point
> it was not clear what Microsoft might do with the Veridian specification. So,
> clearly some of the design choices that I made a year ago may not be the right
> choice today. However, I still think that having an intercept framework where
> one can implement Veridian specific functionality without cluttering up the
> base Xen code is still the right approach.
Clearly putting the Viridian hypercall shims in a different file/directory
makes sense. But I think the shims would need to go on a diet. The TLB
flushing implementation is a good example -- the useful extra features of
the Viridian flush hypercall (if there are any, when partnered with Xen's
shadow code) should be pushed into core Xen HVM TLB-flush handling code.
Otherwise it sits out on the periphery with a correspndingly greater
tendency to rot, and for no benefit (certainly I would strongly argue it is
not cleaner!).
-- Keir
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH][RFC] Supporting Enlightened Windows 2008Server
2008-03-06 7:28 ` Keir Fraser
2008-03-06 10:15 ` Tim Deegan
@ 2008-03-07 1:08 ` Ky Srinivasan
1 sibling, 0 replies; 15+ messages in thread
From: Ky Srinivasan @ 2008-03-07 1:08 UTC (permalink / raw)
To: Keir Fraser, xen-devel
>>> On Thu, Mar 6, 2008 at 2:28 AM, in message
<C3F54D9B.14C64%keir.fraser@eu.citrix.com>, Keir Fraser
<keir.fraser@eu.citrix.com> wrote:
> Personally I think the approach is ugly, and also you have not yet presented
> evidence that supporting the Viridian paravirtualisations improves
> performance.
When I first implemented this (about a year ago), it was not clear if Microsoft would be open sourcing the Veridian specification. Given that, I wanted to have a narrow set of interfaces both to the adapter as well as from the adapter. I take it that you don't care much for this exercise in attempting to isolate the adapter. Now that Veridian specification has been open sourced, I agree there is no need to isolate the adapter from the base hypervisor the way it is currently done. However, given that:
(a) Veridian specification is evolving and Microsoft may define additional functionality to improve guest performance
(b) CPUID namespace, MSR namespace and hypercall namespace collisions between Xen and Veridian. This is the case today and it can only get worse over time.
I feel having a framework that allows you to implement these kinds of mapping layers in complete isolation from the base hypervisor may in fact be cleaner than trying to implement the mapping code inline in the base Xen code.
With regards to performance, we have only run NetBench and on NetBench we have seen a 10% improvement (on a uniprocessor system). We have had some issues with SMP PV drivers and that is the reason I don't have SMP numbers (the adapter has been tested on SMP machines though). We are currently in the process of running a range of benchmarks and I will keep you posted on what we see. Our goal here is clearly to be competitive (as far as performance goes) with Veridian hosting an enlightened windows guest.
> If it doesn't then it's a waste of time; if it does then it
> raises the question of which hypercalls provide the benefit, and do we get a
> smaller neater patch by supporting just those?
I think the only assumption we can make here is that the enlightenments will improve the guest performance. This has been confirmed with the minimal performance testing we have already done. I am also going to assume that Microsoft will continue to evolve Veridian and the set of enlightenments visible to their guests to improve performance. The question that we need to answer, I think is how are we going to support these enlightenments and not if we are going to support Microsoft specific enlightenments. I will be the first one to admit the patches I submitted need to be cleaned up:
1) Fix coding style
2) Get rid of code that is not being exercised. Based on the Veridian specification I identified a set of functionality that I thought an enlightened guest may depend on. It looks like the current shipping windows server 2008 does not use all the functionality that is currently supported. I am somewhat hesitant to get rid of unused functionality since I don't know what the next release of windows will use. In fact, the current shipping windows server 2008 (32 bit version) is already using an undocumented hypercall!
I do think however that having an environment in which we can implement and evolve the support for windows enlightenments without constantly churning the base Xen code is the right approach.
Regards,
K. Y
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH][RFC] Supporting Enlightened Windows 2008 Server
@ 2008-02-19 22:11 Ky Srinivasan
2008-02-20 9:44 ` Keir Fraser
0 siblings, 1 reply; 15+ messages in thread
From: Ky Srinivasan @ 2008-02-19 22:11 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 1426 bytes --]
During the spring Xen Summit in New York, I presented our work on hosting Enlightened windows 2008 server on Xen based platforms. Now that Microsoft has published their Hypervisor specification, we can release our code to the community. The goal of this effort has been to host Enlightened windows 2008 server as a guest on a Xen based platform. We have addressed this requirement by :
A) Leveraging the I/O framework in the Xen based platform. Appropriate PV drivers will be loaded up in the guest to improve the I/O performance.
B) Implement an adapter that implements the required Hyper-V functionality.
We have implemented only a subset of Hyper-V functionality that is required for enlightened windows 2008 guest today. However, we have the framework in place to implement any additional functionality that the windows guests may leverage going forward. The framework is extensible and one can easily implement OS specific enlightenments.
I am enclosing three patches that implement our adapter for your consideration:
1) ns_tools.patch: Modifications to xen tools to support the the adapter
2) ns_xen_base.patch: Modifications to the base Xen code to support the adapter
3) ns_xen_extension.patch: New code implementing the adapter
These patches have been tested on a xen 3.2 based system (SLES10 SP2).
Signed-off by K. Y. Srinivasan (ksrinivasan@novell.com)
Regards,
K. Y
[-- Attachment #2: ns_tools.patch --]
[-- Type: text/plain, Size: 4939 bytes --]
Index: xen-3.2-testing/tools/python/xen/lowlevel/xc/xc.c
===================================================================
--- xen-3.2-testing.orig/tools/python/xen/lowlevel/xc/xc.c
+++ xen-3.2-testing/tools/python/xen/lowlevel/xc/xc.c
@@ -632,14 +632,14 @@ static PyObject *pyxc_hvm_build(XcObject
int i;
#endif
char *image;
- int memsize, vcpus = 1, acpi = 0, apic = 1;
+ int memsize, vcpus = 1, acpi = 0, apic = 1, extid = 0;
static char *kwd_list[] = { "domid",
- "memsize", "image", "vcpus", "acpi",
+ "memsize", "image", "vcpus", "extid", "acpi",
"apic", NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iii", kwd_list,
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiii", kwd_list,
&dom, &memsize,
- &image, &vcpus, &acpi, &apic) )
+ &image, &vcpus, &extid, &acpi, &apic) )
return NULL;
if ( xc_hvm_build(self->xc_handle, dom, memsize, image) != 0 )
@@ -664,6 +664,7 @@ static PyObject *pyxc_hvm_build(XcObject
va_hvm->checksum = -sum;
munmap(va_map, XC_PAGE_SIZE);
#endif
+ xc_set_hvm_param(self->xc_handle, dom, HVM_PARAM_EXTEND_HYPERVISOR, extid);
return Py_BuildValue("{}");
}
Index: xen-3.2-testing/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-3.2-testing.orig/tools/python/xen/xend/XendConfig.py
+++ xen-3.2-testing/tools/python/xen/xend/XendConfig.py
@@ -126,7 +126,7 @@ LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(
# Platform configuration keys.
XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'display',
'fda', 'fdb', 'keymap', 'isa', 'localtime', 'monitor',
- 'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
+ 'nographic', 'pae', 'extid', 'rtc_timeoffset', 'serial', 'sdl',
'soundhw','stdvga', 'usb', 'usbdevice', 'vnc',
'vncconsole', 'vncdisplay', 'vnclisten', 'timer_mode',
'vncpasswd', 'vncunused', 'xauthority', 'pci', 'vhpt',
Index: xen-3.2-testing/tools/python/xen/xend/image.py
===================================================================
--- xen-3.2-testing.orig/tools/python/xen/xend/image.py
+++ xen-3.2-testing/tools/python/xen/xend/image.py
@@ -426,6 +426,7 @@ class HVMImageHandler(ImageHandler):
self.apic = int(vmConfig['platform'].get('apic', 0))
self.acpi = int(vmConfig['platform'].get('acpi', 0))
+ self.extid = int(vmConfig['platform'].get('extid', 0))
self.guest_os_type = vmConfig['platform'].get('guest_os_type')
# Return a list of cmd line args to the device models based on the
@@ -516,6 +517,7 @@ class HVMImageHandler(ImageHandler):
log.debug("store_evtchn = %d", store_evtchn)
log.debug("memsize = %d", mem_mb)
log.debug("vcpus = %d", self.vm.getVCpuCount())
+ log.debug("extid = %d", self.extid)
log.debug("acpi = %d", self.acpi)
log.debug("apic = %d", self.apic)
@@ -523,6 +525,7 @@ class HVMImageHandler(ImageHandler):
image = self.kernel,
memsize = mem_mb,
vcpus = self.vm.getVCpuCount(),
+ extid = self.extid,
acpi = self.acpi,
apic = self.apic)
rc['notes'] = { 'SUSPEND_CANCEL': 1 }
Index: xen-3.2-testing/tools/python/xen/xm/create.py
===================================================================
--- xen-3.2-testing.orig/tools/python/xen/xm/create.py
+++ xen-3.2-testing/tools/python/xen/xm/create.py
@@ -199,6 +199,10 @@ gopts.var('timer_mode', val='TIMER_MODE'
use="""Timer mode (0=delay virtual time when ticks are missed;
1=virtual time is always wallclock time.""")
+gopts.var('extid', val='EXTID',
+ fn=set_int, default=0,
+ use="Specify extention ID for a HVM domain.")
+
gopts.var('acpi', val='ACPI',
fn=set_int, default=1,
use="Disable or enable ACPI of HVM domain.")
@@ -719,7 +723,7 @@ def configure_vifs(config_devs, vals):
def configure_hvm(config_image, vals):
"""Create the config for HVM devices.
"""
- args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb', 'timer_mode',
+ args = [ 'device_model', 'pae', 'extid', 'vcpus', 'boot', 'fda', 'fdb', 'timer_mode',
'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw',
'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor',
[-- Attachment #3: ns_xen_base.patch --]
[-- Type: text/plain, Size: 13115 bytes --]
%patch
Index: xen-3.2-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/hvm/svm/svm.c 2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/hvm/svm/svm.c 2008-02-09 21:24:32.000000000 -0500
@@ -50,6 +50,7 @@
#include <asm/hvm/vpt.h>
#include <asm/hvm/trace.h>
#include <asm/hap.h>
+#include <asm/hvm/hvm_extensions.h>
u32 svm_feature_flags;
@@ -73,6 +74,7 @@
/* vmcb used for extended host state */
static void *root_vmcb[NR_CPUS] __read_mostly;
+
static void inline __update_guest_eip(
struct cpu_user_regs *regs, unsigned int inst_len)
{
@@ -882,7 +884,7 @@
.set_tsc_offset = svm_set_tsc_offset,
.inject_exception = svm_inject_exception,
.init_hypercall_page = svm_init_hypercall_page,
- .event_pending = svm_event_pending
+ .event_pending = svm_event_pending,
};
int start_svm(struct cpuinfo_x86 *c)
@@ -1044,6 +1046,7 @@
HVMTRACE_3D(CPUID, v, input,
((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
+ ext_intercept_do_cpuid(input, regs);
inst_len = __get_instruction_length(v, INSTR_CPUID, NULL);
__update_guest_eip(regs, inst_len);
}
@@ -1739,6 +1742,11 @@
/* is it a read? */
if (vmcb->exitinfo1 == 0)
{
+ if (ext_intercept_do_msr_read(ecx, regs))
+ {
+ goto done;
+ }
+
switch (ecx) {
case MSR_IA32_TSC:
msr_content = hvm_get_guest_time(v);
@@ -1829,6 +1837,11 @@
}
else
{
+ if (ext_intercept_do_msr_write(ecx, regs))
+ {
+ goto done_1;
+ }
+
msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
hvmtrace_msr_write(v, ecx, msr_content);
@@ -1889,6 +1902,7 @@
}
break;
}
+done_1:
inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
}
Index: xen-3.2-testing/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/hvm/vmx/vmx.c 2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/hvm/vmx/vmx.c 2008-02-09 21:24:32.000000000 -0500
@@ -50,6 +50,7 @@
#include <asm/hvm/vpt.h>
#include <public/hvm/save.h>
#include <asm/hvm/trace.h>
+#include <asm/hvm/hvm_extensions.h>
enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
@@ -62,6 +63,7 @@
static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr);
static void vmx_update_guest_efer(struct vcpu *v);
+
static int vmx_domain_initialise(struct domain *d)
{
return vmx_alloc_vlapic_mapping(d);
@@ -1238,7 +1240,8 @@
unsigned int count = *ecx;
#ifdef VMXASSIST
- if ( input == 0x40000003 )
+ if (( input == 0x40000003 ) &&
+ (vmx_guest_x86_mode(current) == 0))
{
/*
* NB. Unsupported interface for private use of VMXASSIST only.
@@ -1319,12 +1322,13 @@
static void vmx_do_cpuid(struct cpu_user_regs *regs)
{
- unsigned int eax, ebx, ecx, edx;
+ unsigned int eax, ebx, ecx, edx, input;
eax = regs->eax;
ebx = regs->ebx;
ecx = regs->ecx;
edx = regs->edx;
+ input = eax;
vmx_cpuid_intercept(&eax, &ebx, &ecx, &edx);
@@ -1332,6 +1336,7 @@
regs->ebx = ebx;
regs->ecx = ecx;
regs->edx = edx;
+ ext_intercept_do_cpuid(input, regs);
}
#define CASE_GET_REG_P(REG, reg) \
@@ -2316,6 +2321,9 @@
HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
+ if (ext_intercept_do_msr_read(ecx, regs))
+ goto done;
+
switch ( ecx )
{
case MSR_IA32_TSC:
@@ -2499,6 +2507,9 @@
HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x",
ecx, (u32)regs->eax, (u32)regs->edx);
+ if (ext_intercept_do_msr_write(ecx, regs))
+ return 1;
+
msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
hvmtrace_msr_write(v, ecx, msr_content);
Index: xen-3.2-testing/xen/include/asm-x86/hvm/domain.h
===================================================================
--- xen-3.2-testing.orig/xen/include/asm-x86/hvm/domain.h 2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/include/asm-x86/hvm/domain.h 2008-02-09 21:24:32.000000000 -0500
@@ -73,6 +73,10 @@
/* Pass-through */
struct hvm_iommu hvm_iommu;
+ /* Hvm extension handle */
+ void *ext_handle; /* will be NULL on creation (memset)*/
+ struct extension_intercept_vector *ext_vector;
+
};
#endif /* __ASM_X86_HVM_DOMAIN_H__ */
Index: xen-3.2-testing/xen/include/public/hvm/params.h
===================================================================
--- xen-3.2-testing.orig/xen/include/public/hvm/params.h 2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/include/public/hvm/params.h 2008-02-09 21:24:32.000000000 -0500
@@ -50,10 +50,12 @@
#define HVM_PARAM_BUFIOREQ_PFN 6
+#define HVM_PARAM_EXTEND_HYPERVISOR 7
+
#ifdef __ia64__
-#define HVM_PARAM_NVRAM_FD 7
-#define HVM_PARAM_VHPT_SIZE 8
-#define HVM_PARAM_BUFPIOREQ_PFN 9
+#define HVM_PARAM_NVRAM_FD 8
+#define HVM_PARAM_VHPT_SIZE 9
+#define HVM_PARAM_BUFPIOREQ_PFN 10
#endif
/*
@@ -75,12 +77,13 @@
* Missed interrupts are collapsed together and delivered as one 'late tick'.
* Guest time always tracks wallclock (i.e., real) time.
*/
-#define HVM_PARAM_TIMER_MODE 10
+//KYS Check the modifications done to this file
+#define HVM_PARAM_TIMER_MODE 11
#define HVMPTM_delay_for_missed_ticks 0
#define HVMPTM_no_delay_for_missed_ticks 1
#define HVMPTM_no_missed_ticks_pending 2
#define HVMPTM_one_missed_tick_pending 3
-#define HVM_NR_PARAMS 11
+#define HVM_NR_PARAMS 12
#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
Index: xen-3.2-testing/xen/arch/x86/hvm/Makefile
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/hvm/Makefile 2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/hvm/Makefile 2008-02-09 21:24:32.000000000 -0500
@@ -1,5 +1,6 @@
subdir-y += svm
subdir-y += vmx
+subdir-y += hvm_ext
obj-y += hvm.o
obj-y += i8254.o
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/hvm/hvm.c 2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm.c 2008-02-09 21:37:20.000000000 -0500
@@ -42,6 +42,7 @@
#include <asm/mc146818rtc.h>
#include <asm/spinlock.h>
#include <asm/hvm/hvm.h>
+#include <asm/hvm/hvm_extensions.h>
#include <asm/hvm/vpt.h>
#include <asm/hvm/support.h>
#include <asm/hvm/cacheattr.h>
@@ -118,6 +119,7 @@
rtc_migrate_timers(v);
hpet_migrate_timers(v);
pt_migrate(v);
+ ext_intercept_do_migrate_timers(v);
}
void hvm_do_resume(struct vcpu *v)
@@ -279,6 +281,7 @@
void hvm_domain_destroy(struct domain *d)
{
+ ext_intercept_domain_destroy(d);
hvm_funcs.domain_destroy(d);
vioapic_deinit(d);
hvm_destroy_cacheattr_region_list(d);
@@ -447,8 +450,14 @@
{
int rc;
+ if ((rc = ext_intercept_vcpu_initialize(v)) != 0)
+ goto fail1;
+
if ( (rc = vlapic_init(v)) != 0 )
+ {
+ ext_intercept_vcpu_destroy(v);
goto fail1;
+ }
if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
goto fail2;
@@ -496,12 +505,14 @@
hvm_funcs.vcpu_destroy(v);
fail2:
vlapic_destroy(v);
+ ext_intercept_vcpu_destroy(v);
fail1:
return rc;
}
void hvm_vcpu_destroy(struct vcpu *v)
{
+ ext_intercept_vcpu_destroy(v);
vlapic_destroy(v);
hvm_funcs.vcpu_destroy(v);
@@ -1573,6 +1584,10 @@
case 0:
break;
}
+ if (ext_intercept_do_hypercall(regs))
+ {
+ return HVM_HCALL_completed;
+ }
if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
{
@@ -1964,6 +1979,9 @@
if ( a.value > HVMPTM_one_missed_tick_pending )
goto param_fail;
break;
+ case HVM_PARAM_EXTEND_HYPERVISOR:
+ if (hvm_ext_bind(d, (int)a.value))
+ goto param_fail;
}
d->arch.hvm_domain.params[a.index] = a.value;
rc = 0;
Index: xen-3.2-testing/xen/arch/x86/x86_64/asm-offsets.c
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/x86_64/asm-offsets.c 2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/x86_64/asm-offsets.c 2008-02-09 21:24:32.000000000 -0500
@@ -146,4 +146,7 @@
BLANK();
OFFSET(CPUINFO_ext_features, struct cpuinfo_x86, x86_capability[1]);
+ BLANK();
+
+ OFFSET(DOM_ext_vector, struct domain, arch.hvm_domain.ext_vector);
}
Index: xen-3.2-testing/xen/arch/x86/hvm/vmx/x86_64/exits.S
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/hvm/vmx/x86_64/exits.S 2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/hvm/vmx/x86_64/exits.S 2008-02-09 21:24:32.000000000 -0500
@@ -112,6 +112,14 @@
ALIGN
ENTRY(vmx_asm_do_vmentry)
GET_CURRENT(%rbx)
+ mov VCPU_domain(%rbx),%rax
+ mov DOM_ext_vector(%rax),%rdx
+ test %rdx,%rdx
+ je vmx_no_ext_vector
+ sti
+ callq *(%rdx)
+vmx_no_ext_vector:
+
cli # tests must not race interrupts
movl VCPU_processor(%rbx),%eax
Index: xen-3.2-testing/xen/arch/x86/hvm/svm/x86_64/exits.S
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/hvm/svm/x86_64/exits.S 2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/hvm/svm/x86_64/exits.S 2008-02-09 21:24:32.000000000 -0500
@@ -37,6 +37,14 @@
ENTRY(svm_asm_do_resume)
GET_CURRENT(%rbx)
+ mov VCPU_domain(%rbx),%rax
+ mov DOM_ext_vector(%rax),%rdx
+ test %rdx,%rdx
+ je svm_no_ext_vector
+ sti
+ callq *(%rdx)
+svm_no_ext_vector:
+
CLGI
movl VCPU_processor(%rbx),%eax
Index: xen-3.2-testing/xen/arch/x86/hvm/save.c
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/hvm/save.c 2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/hvm/save.c 2008-02-09 21:24:32.000000000 -0500
@@ -23,6 +23,8 @@
#include <asm/hvm/support.h>
#include <public/hvm/save.h>
+#include <public/hvm/params.h>
+#include <asm/hvm/hvm_extensions.h>
void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr)
{
@@ -31,8 +33,7 @@
/* Save some CPUID bits */
cpuid(1, &eax, &ebx, &ecx, &edx);
hdr->cpuid = eax;
-
- hdr->pad0 = 0;
+ hdr->ext_id = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
}
int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
@@ -61,6 +62,9 @@
/* VGA state is not saved/restored, so we nobble the cache. */
d->arch.hvm_domain.stdvga.cache = 0;
+ d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] = hdr->ext_id;
+ if (hvm_ext_bind(d, hdr->ext_id))
+ return -1;
return 0;
}
Index: xen-3.2-testing/xen/include/public/arch-x86/hvm/save.h
===================================================================
--- xen-3.2-testing.orig/xen/include/public/arch-x86/hvm/save.h 2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/include/public/arch-x86/hvm/save.h 2008-02-09 21:24:32.000000000 -0500
@@ -38,7 +38,7 @@
uint32_t version; /* File format version */
uint64_t changeset; /* Version of Xen that saved this file */
uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */
- uint32_t pad0;
+ uint32_t ext_id; /* extension ID */
};
DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header);
@@ -422,9 +422,30 @@
DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr);
+struct hvm_ns_veridian_dom {
+ uint64_t guestid_msr;
+ uint64_t hypercall_msr;
+ uint32_t long_mode;
+ uint32_t pad0;
+};
+DECLARE_HVM_SAVE_TYPE(NS_VERIDIAN_DOM, 15, struct hvm_ns_veridian_dom);
+
+struct hvm_ns_veridian_cpu {
+ uint64_t control_msr;
+ uint64_t version_msr;
+ uint64_t sief_msr;
+ uint64_t simp_msr;
+ uint64_t eom_msr;
+ uint64_t int_msr[16];
+ struct {
+ uint64_t config;
+ uint64_t count;
+ } timers[4];
+};
+DECLARE_HVM_SAVE_TYPE(NS_VERIDIAN_CPU, 16, struct hvm_ns_veridian_cpu);
/*
* Largest type-code in use
*/
-#define HVM_SAVE_CODE_MAX 14
+#define HVM_SAVE_CODE_MAX 16
#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */
[-- Attachment #4: ns_xen_extension.patch --]
[-- Type: text/plain, Size: 125710 bytes --]
%patch
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/Makefile
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/Makefile 2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,3 @@
+subdir-y += novell
+
+obj-y += hvm_ext.o
Index: xen-3.2-testing/xen/include/asm-x86/hvm/hvm_extensions.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/include/asm-x86/hvm/hvm_extensions.h 2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,239 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * hvm_extensions.h
+ * This file implements a framework for extending the hypervisor
+ * functionality in a modular fashion. The framework is comprised of
+ * two components: A) A set of intercepts that will allow the extension
+ * module to implement its functionality by intercepting the corresponding
+ * code paths in Xen and B) A controlled runtime for the extension module.
+ * Initially the goal was to pacakage the extension module as a boot-time
+ * loadable module. This may not be the way we wend up packaging it.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef HVM_EXTENSION_H
+#define HVM_EXTENSION_H
+
+#include <xen/sched.h>
+#include <asm/domain.h>
+#include <xen/timer.h>
+#include <xen/time.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/domain.h>
+
+
+
+/*
+ * Hypervisor extension hooks.
+ */
+typedef struct extension_intercept_vector {
+ /* Do not move the first field (do_continuation). Offset
+ * hardcoded in assembly files exits.S (VMX and SVM).
+ */
+ void (*do_continuation)(void);
+ int (*domain_create)(struct domain *d);
+ void (*domain_destroy)(struct domain *d);
+ int (*vcpu_initialize)(struct vcpu *v);
+ void (*vcpu_destroy)(struct vcpu *v);
+ int (*do_cpuid)(uint32_t idx, struct cpu_user_regs *regs);
+ int (*do_msr_read)(uint32_t idx, struct cpu_user_regs *regs);
+ int (*do_msr_write)(uint32_t idx, struct cpu_user_regs *regs);
+ int (*do_hypercall)(struct cpu_user_regs *pregs);
+ void (*do_migrate_timers)(struct vcpu *v);
+} extension_intercept_vector_t;
+
+static inline int
+ext_intercept_domain_create(struct domain *d)
+{
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.ext_vector->domain_create(d));
+ }
+ return (0);
+}
+
+static inline void
+ext_intercept_domain_destroy(struct domain *d)
+{
+ if (d->arch.hvm_domain.ext_vector) {
+ d->arch.hvm_domain.ext_vector->domain_destroy(d);
+ }
+}
+
+static inline int
+ext_intercept_vcpu_initialize(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.ext_vector->vcpu_initialize(v));
+ }
+ return (0);
+}
+
+static inline void
+ext_intercept_vcpu_destroy(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ d->arch.hvm_domain.ext_vector->vcpu_destroy(v);
+ }
+}
+
+static inline int
+ext_intercept_do_cpuid(uint32_t idx, struct cpu_user_regs *regs)
+{
+ struct domain *d = current->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.ext_vector->do_cpuid(
+ idx, regs));
+ }
+ return (0);
+}
+
+static inline int
+ext_intercept_do_msr_read(uint32_t idx, struct cpu_user_regs *regs)
+{
+ struct domain *d = current->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.
+ ext_vector->do_msr_read(idx, regs));
+ }
+ return (0);
+}
+static inline int
+ext_intercept_do_msr_write(uint32_t idx, struct cpu_user_regs *regs)
+{
+ struct domain *d = current->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.
+ ext_vector->do_msr_write(idx, regs));
+ }
+ return (0);
+}
+
+static inline int
+ext_intercept_do_hypercall(struct cpu_user_regs *regs)
+{
+ struct domain *d = current->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.
+ ext_vector->do_hypercall(regs));
+ }
+ return (0);
+}
+
+static inline void
+ext_intercept_do_migrate_timers(struct vcpu *v)
+{
+ struct domain *d = current->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ return(d->arch.hvm_domain.
+ ext_vector->do_migrate_timers(v));
+ }
+}
+static inline void
+ext_intercept_do_continuation(void)
+{
+ struct domain *d = current->domain;
+ if (d->arch.hvm_domain.ext_vector) {
+ d->arch.hvm_domain.
+ ext_vector->do_continuation();
+ }
+}
+
+/*
+ * Base hypervisor support available to extension modules.
+ * We may choose to do away with this level of indirection!
+ * It may still be useful to have a controlled environment for the
+ * extension modules.
+ */
+typedef struct xen_call_vector {
+ /*
+ * We may want to embed version/compiler info here to avoid mismatches
+ */
+ struct hvm_function_table *hvmFuncTable;
+ struct hvm_mmio_handler *mmIoHandler;
+ void (*extPanic)(const char *s, ...);
+ void (*extPrintk)(const char *format, ...);
+ void (*extPostInterrupt)(struct vcpu *v, int vector, int type);
+ void (*extSetTimer)(struct timer *timer, s_time_t expires);
+ s_time_t (*extGetTimeSinceBoot)(void);
+ void * (*extGetVirtFromGmfn)(struct domain *d, unsigned long gmfn);
+ unsigned long (*extGetMfnFromGmfn)(struct domain *d, unsigned long gmfn);
+ unsigned long (*extGetMfnFromGva)(unsigned long va);
+ void (*extUnmapDomainPage)(void *p);
+ void *(*extAllocMem)(size_t size);
+ void (*extFreeMem)(void *ptr);
+ enum hvm_copy_result (*extCopyToGuestPhysical)(paddr_t paddr, void *buf, int size);
+ enum hvm_copy_result (*extCopyFromGuestPhysical)(void *buf, paddr_t paddr, int size);
+ void *(*extAllocDomHeapPage)(void);
+ void (*extFreeDomHeapPage)(void *);
+ void * (*extGetVirtFromPagePtr)(void *);
+ void (*extVcpuPause)(struct vcpu *v);
+ void (*extVcpuUnPause)(struct vcpu *v);
+ void (*extArchGetDomainInfoCtxt)(struct vcpu *v,
+ struct vcpu_guest_context *);
+ int (*extArchSetDomainInfoCtxt)(struct vcpu *v,
+ struct vcpu_guest_context *);
+ int (*extCpuIsIntel)(void );
+ int (*extWrmsrHypervisorRegs)(uint32_t idx, uint32_t eax,
+ uint32_t edx);
+ void (*extKillTimer)(struct timer *timer);
+ void (*extMigrateTimer)(struct timer *timer, unsigned int new_cpu);
+} xen_call_vector_t;
+
+#define MAX_EXTENSION_ID 1
+
+/*
+ * int hvm_ext_bind(struct domain *d, int ext_id)
+ * Bind the specified domain to the specified extension module.
+ *
+ * Calling/Exit State:
+ * None.
+ *
+ * Remarks:
+ * The goal is to support per-domain extension modules. Domain
+ * creating tools will have to specify the needed extension
+ * module ID. For now it is hard coded.
+ */
+int hvm_ext_bind(struct domain *d, int ext_id);
+
+/*
+ * int hvm_ext_register(int ext_id,
+ * struct extension_intercept_vector *ext_vector,
+ * struct xen_call_vector *xen_vector)
+ * Register the extension module with the hypervisor
+ * Calling/Exit State:
+ * None.
+ */
+
+int hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector,
+ struct xen_call_vector *xen_vector);
+
+
+#endif
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/hvm_ext.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/hvm_ext.c 2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,350 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * hvm_ext.c
+ * Glue code for implementing the extension module.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#include <asm/hvm/hvm_extensions.h>
+#include <xen/lib.h>
+#include <asm/event.h>
+#include <asm/shadow.h>
+#include <asm/hvm/support.h>
+#include <xen/domain_page.h>
+#include <xen/domain.h>
+#include <xen/mm.h>
+#include <xen/event.h>
+#include <xen/sched.h>
+
+
+struct extension_intercept_vector *intercept_vector;
+
+/*
+ * static void
+ * hvm_ext_inject_interrupt(struct vcpu *v, int vector, int type)
+ * Inject the specified exception to the specified virtual cpu.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void
+hvm_ext_inject_interrupt(struct vcpu *v, int vector, int type)
+{
+ struct vlapic *vlapic = vcpu_vlapic(v);
+
+ /*
+ * XXXKYS: Check the trigger mode.
+ */
+ if (vlapic_set_irq(vlapic, vector, 1)) {
+ vcpu_kick(v);
+ }
+}
+
+/*
+ * static void
+ * hvm_ext_set_timer(struct timer *timer, s_time_t expires)
+ * Set a timeout.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void
+hvm_ext_set_timer(struct timer *timer, s_time_t expires)
+{
+ set_timer(timer, expires);
+}
+
+/*
+ * static void
+ * hvm_ext_kill_timer(struct timer *timer)
+ * Kill the specified timer.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void
+hvm_ext_kill_timer(struct timer *timer)
+{
+ kill_timer(timer);
+}
+
+/*
+ * static void
+ * hvm_ext_migrate_timer(struct timer *timer, unsigned int new_cpu)
+ * Migrate the timer to the new cpu.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void
+hvm_ext_migrate_timer(struct timer *timer, unsigned int new_cpu)
+{
+ migrate_timer(timer, new_cpu);
+}
+
+
+/*
+ * static void *
+ * hvm_ext_get_virt_from_gmfn(struct domain *d, unsigned long gmfn)
+ * Given a guest frame number return a virtual address at which
+ * the specified page can be accessed in the hypervisor.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void *
+hvm_ext_get_virt_from_gmfn(struct domain *d, unsigned long gmfn)
+{
+ unsigned long mfn = gmfn_to_mfn(d, gmfn);
+ if (mfn == INVALID_MFN) {
+ return (NULL);
+ }
+ return (map_domain_page_global(mfn));
+}
+
+/*
+ * static unsigned long
+ * hvm_ext_get_mfn_from_gmfn(struct domain *d, unsigned long gmfn)
+ * Get the machine frame number given the guest frame number.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static unsigned long
+hvm_ext_get_mfn_from_gmfn(struct domain *d, unsigned long gmfn)
+{
+ return (gmfn_to_mfn(d, gmfn));
+}
+
+/*
+ * static unsigned long
+ * hvm_ext_get_mfn_from_gva(unsigned long va)
+ * Given the guest virtual address return the machine frame number backing the
+ * address.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static unsigned long
+hvm_ext_get_mfn_from_gva(unsigned long va)
+{
+ uint32_t pfec = PFEC_page_present;
+ unsigned long gfn;
+ gfn = paging_gva_to_gfn(current, va, &pfec);
+ return (gmfn_to_mfn((current->domain), gfn));
+}
+
+/*
+ * static void *
+ * hvm_ext_alloc_mem(size_t size)
+ * Allocate specified bytes of memory.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void *
+hvm_ext_alloc_mem(size_t size)
+{
+ return (xmalloc_bytes(size));
+}
+
+/*
+ * static void *
+ * hvm_ext_alloc_domheap_page(void)
+ * Allocate a page from the per-domain heap.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void *
+hvm_ext_alloc_domheap_page(void)
+{
+ return (alloc_domheap_page(NULL));
+}
+
+/*
+ * static void
+ * hvm_ext_free_domheap_page(void *p)
+ * Free a dom heap page.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void
+hvm_ext_free_domheap_page(void *p)
+{
+ free_domheap_pages(p, 0);
+}
+
+/*
+ * static void *
+ * hvm_ext_get_virt_from_page_ptr(void *page)
+ * Map the specified page a return a hypervisor VA.
+ *
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void *
+hvm_ext_get_virt_from_page_ptr(void *page)
+{
+ struct page_info *pg = page;
+ unsigned long mfn = page_to_mfn(pg);
+ return (map_domain_page_global(mfn));
+}
+
+extern struct cpuinfo_x86 boot_cpu_data;
+
+/*
+ * static int
+ * hvm_ext_cpu_is_intel(void)
+ * Check if the CPU vendor is Intel.
+ *
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static int
+hvm_ext_cpu_is_intel(void)
+{
+ if (boot_cpu_data.x86_vendor == 0) {
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * int
+ * hvm_ext_bind(struct domain *d, int ext_id)
+ * Bind the specified domain with the specified extension module.
+ *
+ *
+ * Calling/Exit State:
+ * None.
+ */
+int
+hvm_ext_bind(struct domain *d, int ext_id)
+{
+ int i;
+ /*
+ * XXXKYS: Assuming that this function will be called before the
+ * new domain begins to run. It is critical that this be the case.
+ */
+ if (ext_id == 0) {
+ /*
+ * This is the default value for this parameter.
+ */
+ return (0);
+ }
+ d->arch.hvm_domain.ext_vector = intercept_vector;
+ /*
+ * Let the extension initialize its state.
+ */
+ if (intercept_vector->domain_create(d)) {
+ return (1);
+ }
+ for (i=0; i < MAX_VIRT_CPUS; i++) {
+ if (d->vcpu[i] != NULL) {
+ if (intercept_vector->vcpu_initialize(d->vcpu[i])) {
+ int j;
+ for (j= (i-1); j >=0; j--) {
+ intercept_vector->vcpu_destroy(
+ d->vcpu[j]);
+ }
+ intercept_vector->domain_destroy(d);
+ return (1);
+ }
+ }
+ }
+ return (0);
+}
+
+
+void extPanic(const char *fmt, ...)
+{
+ domain_crash_synchronous();
+}
+
+/*
+ * For now we will support only one extension; id==1!
+ */
+
+extern struct hvm_function_table hvm_funcs;
+extern struct hvm_mmio_handler vlapic_mmio_handler;
+
+/*
+ * int
+ * hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector,
+ *
+ * Register the invoking extension module with the hypervisor.
+ *
+ *
+ * Calling/Exit State:
+ * None.
+ */
+int
+hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector,
+ struct xen_call_vector *xen_vector)
+{
+ ASSERT(ext_id == 1);
+ intercept_vector = ext_vector;
+ /*
+ * Populate the vector of services from the xen side; ultimately
+ * we may decide to get rid of this level of indirection; it may
+ * still be useful to limit the breadth of xen dependency here.
+ */
+ xen_vector->hvmFuncTable = &hvm_funcs;
+ xen_vector->mmIoHandler = &vlapic_mmio_handler;
+ xen_vector->extPanic = extPanic;
+ xen_vector->extPrintk = printk;
+ xen_vector->extPostInterrupt = hvm_ext_inject_interrupt;
+ xen_vector->extSetTimer = hvm_ext_set_timer;
+ xen_vector->extKillTimer = hvm_ext_kill_timer;
+ xen_vector->extMigrateTimer = hvm_ext_migrate_timer;
+ xen_vector->extGetTimeSinceBoot = get_s_time;
+ xen_vector->extGetVirtFromGmfn = hvm_ext_get_virt_from_gmfn;
+ xen_vector->extGetMfnFromGmfn = hvm_ext_get_mfn_from_gmfn;
+
+ xen_vector->extGetMfnFromGva = hvm_ext_get_mfn_from_gva;
+#ifdef CONFIG_DOMAIN_PAGE
+ xen_vector->extUnmapDomainPage = unmap_domain_page_global;
+#endif
+ xen_vector->extAllocMem = hvm_ext_alloc_mem;
+ xen_vector->extFreeMem = xfree;
+ xen_vector->extCopyToGuestPhysical = hvm_copy_to_guest_phys;
+ xen_vector->extCopyFromGuestPhysical = hvm_copy_from_guest_phys;
+ xen_vector->extAllocDomHeapPage = hvm_ext_alloc_domheap_page;
+ xen_vector->extFreeDomHeapPage = hvm_ext_free_domheap_page;
+ xen_vector->extGetVirtFromPagePtr = hvm_ext_get_virt_from_page_ptr;
+ xen_vector->extVcpuPause = vcpu_pause;
+ xen_vector->extVcpuUnPause = vcpu_unpause;
+ xen_vector->extArchGetDomainInfoCtxt = arch_get_info_guest;
+ xen_vector->extArchSetDomainInfoCtxt = arch_set_info_guest;
+ xen_vector->extCpuIsIntel = hvm_ext_cpu_is_intel;
+ xen_vector->extWrmsrHypervisorRegs = wrmsr_hypervisor_regs;
+
+ return 0;
+}
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/Makefile
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/Makefile 2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,2 @@
+obj-y += nsintercept.o
+obj-y += nshypercall.o
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/ns_errno.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/ns_errno.h 2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,62 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * ns_errno.h
+ * Error codes for the Novell Shim.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef NS_ERRNO_H
+#define NS_ERRNO_H
+
+#define NS_STATUS_SUCCESS 0x0000
+#define NS_STATUS_INVALID_HYPERCALL_CODE 0x0002
+#define NS_STATUS_INVALID_HYPERCALL_INPUT 0x0003
+#define NS_STATUS_INVALID_ALIGNMENT 0x0004
+#define NS_STATUS_INVALID_PARAMETER 0x0005
+#define NS_STATUS_ACCESS_DENIED 0x0006
+#define NS_STATUS_INVALID_PARTITION_STATE 0x0007
+#define NS_STATUS_OPERATION_DENIED 0x0008
+#define NS_STATUS_UNKNOWN_PROPERTY 0x0009
+#define NS_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0x000A
+#define NS_STATUS_INSUFFICIENT_MEMORY 0x000B
+#define NS_STATUS_PARTITION_TOO_DEEP 0x000C
+#define NS_STATUS_INVALID_PARTITION_ID 0x000D
+#define NS_STATUS_INVALID_VP_INDEX 0x000E
+#define NS_STATUS_UNABLE_TO_RESTORE_STATE 0x000F
+#define NS_STATUS_NOT_FOUND 0x0010
+#define NS_STATUS_INVALID_PORT_ID 0x0011
+#define NS_STATUS_INVALID_CONNECTION_ID 0x0012
+#define NS_STATUS_INSUFFICIENT_BUFFERS 0x0013
+#define NS_STATUS_NOT_ACKNOWLEDGED 0x0014
+#define NS_STATUS_INVALID_VP_STATE 0x0015
+#define NS_STATUS_ACKNOWLEDGED 0x0016
+#define NS_STATUS_INVALID_SAVE_RESTORE_STATE 0x0017
+#define NS_STATUS_NO_MEMORY_4PAGES 0x0100
+#define NS_STATUS_NO_MEMORY_16PAGES 0x0101
+#define NS_STATUS_NO_MEMORY_64PAGES 0x0102
+#define NS_STATUS_NO_MEMORY_256PAGES 0x0103
+#define NS_STATUS_NO_MEMORY_1024PAGES 0x0104
+#endif
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/ns_shim.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/ns_shim.h 2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,480 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * Novell Shim Implementation.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef NS_SHIM_H
+#define NS_SHIM_H
+
+#include <xen/sched.h>
+#include <xen/types.h>
+#include <xen/timer.h>
+#include <asm/current.h>
+#include <asm/domain.h>
+#include <asm/shadow.h>
+#include <public/xen.h>
+
+#include "nshypercall.h"
+
+/*
+ * Synthetic MSR addresses
+ */
+#define NS_MSR_GUEST_OS_ID 0x40000000
+#define NS_MSR_HYPERCALL 0x40000001
+#define NS_MSR_VP_INDEX 0x40000002
+#define NS_MSR_SYSTEM_RESET 0x40000003
+#define NS_MSR_TIME_REF_COUNT 0x40000020
+#define NS_MSR_EOI 0x40000070
+#define NS_MSR_ICR 0x40000071
+#define NS_MSR_TPR 0x40000072
+
+#define NS_MSR_SCONTROL 0x40000080
+#define NS_MSR_SVERSION 0x40000081
+#define NS_MSR_SIEFP 0x40000082
+#define NS_MSR_SIMP 0x40000083
+#define NS_MSR_SEOM 0x40000084
+#define NS_MSR_SINT0 0x40000090
+#define NS_MSR_SINT1 0x40000091
+#define NS_MSR_SINT2 0x40000092
+#define NS_MSR_SINT3 0x40000093
+#define NS_MSR_SINT4 0x40000094
+#define NS_MSR_SINT5 0x40000095
+#define NS_MSR_SINT6 0x40000096
+#define NS_MSR_SINT7 0x40000097
+#define NS_MSR_SINT8 0x40000098
+#define NS_MSR_SINT9 0x40000099
+#define NS_MSR_SINT10 0x4000009A
+#define NS_MSR_SINT11 0x4000009B
+#define NS_MSR_SINT12 0x4000009C
+#define NS_MSR_SINT13 0x4000009D
+#define NS_MSR_SINT14 0x4000009E
+#define NS_MSR_SINT15 0x4000009F
+
+#define NS_MSR_TIMER0_CONFIG 0x400000B0
+#define NS_MSR_TIMER0_COUNT 0x400000B1
+#define NS_MSR_TIMER1_CONFIG 0x400000B2
+#define NS_MSR_TIMER1_COUNT 0x400000B3
+#define NS_MSR_TIMER2_CONFIG 0x400000B4
+#define NS_MSR_TIMER2_COUNT 0x400000B5
+#define NS_MSR_TIMER3_CONFIG 0x400000B6
+#define NS_MSR_TIMER3_COUNT 0x400000B7
+
+/*
+ * MSR for supporting PV drivers on longhorn.
+ */
+#define NS_MSR_PVDRV_HCALL 0x40001000
+
+/*
+ * MSR for supporting other enlightened oses.
+ */
+#define NS_MSR_NONLH_GUEST_OS_ID 0x40001000
+
+/*
+ * Novell Shim VCPU flags.
+ * A VCPU is considered up when it is capable of invoking hypercalls.
+ */
+#define NS_VCPU_BOOT_CPU 0x00000001
+#define NS_VCPU_UP 0x00000002
+
+/*
+ * Novell shim flush flags.
+ */
+
+#define NS_FLUSH_TLB 0X01
+#define NS_FLUSH_INVLPG 0X02
+
+/*
+ * We use the following global state to manage TLB flush requests from the
+ * guest. At most only one flush can be active in the guest; we may have to
+ * revisit this if this is a bottleneck.
+ */
+typedef struct nsGlobalFlushState {
+ int cpuCount; //0 unused; else #cpus participating
+ cpumask_t waiters; //Cpus waiting for the flush block
+ struct vcpu *currentOwner;
+ u64 retVal;
+ flushVa_t *flushParam;
+ unsigned short repCount;
+} nsGlobalFlushState_t;
+
+typedef struct nsSpinLock {
+ unsigned long flags;
+ spinlock_t spinLock;
+ struct nsVcpu *owner;
+ void *retAddr;
+} nsSpinLock_t;
+
+/*
+ * Novell shim message structure.
+ */
+typedef enum {
+ /*
+ * For now we only support timer messages
+ */
+ nsMessageTypeNone = 0x00000000,
+ nsMessageTimerExpired = 0x80000010
+} nsMessageType;
+
+typedef struct nsTimerMessage {
+ nsMessageType messageType;
+ u8 pad1[3];
+ u8 messageSize;
+ u32 timerIndex;
+ u32 pad2;
+ u64 expirationTime;
+} nsTimerMessage_t;
+
+typedef struct nsMessage {
+ nsMessageType messageType;
+ uint8_t messageSize;
+ uint8_t flags;
+ uint8_t reserved[2];
+ uint32_t reserved1;
+ uint64_t payLoad[30];
+} nsMessage_t;
+
+
+typedef struct nsVcpTimerState {
+ u64 config;
+ u64 count; /*expiration time in 100ns units*/
+ int timerIndex;
+ struct nsVcpu *thisCpu;
+ struct timer vcpuTimer;
+} nsVcpTimerState_t;
+
+/*
+ * Stats structure.
+ */
+
+typedef struct {
+ u64 numSwitches;
+ u64 numFlushes;
+ u64 numFlushesPosted;
+ u64 numFlushRanges;
+ u64 numFlushRangesPosted;
+
+ u64 numTprReads;
+ u64 numIcrReads;
+ u64 numEoiWrites;
+ u64 numTprWrites;
+ u64 numIcrWrites;
+
+ u64 numGFSAcquires;
+ u64 numGFSReleases;
+ u64 numTlbFlushes;
+ u64 numInvlPages;
+ u64 numTimeOuts;
+} nsVcpuStats_t;
+
+typedef struct nsVcpu {
+ /*
+ * Per-vcpu state to support the Novell shim;
+ */
+ int nsVcplockDepth;
+ unsigned long nsVcpuFlags;
+ unsigned char nsVcpFlushRequest;
+ unsigned char nsVcpWaitingOnGFS;
+ unsigned char nsVcpFlushPending;
+ unsigned char nsVcpWaitingForCleanup;
+ unsigned short nsVcpRepCount;
+ /*
+ * Synthetic msrs.
+ */
+ u64 nsVcpSControlMsr;
+ u64 nsVcpSVersionMsr;
+ u64 nsVcpSIefpMsr;
+ u64 nsVcpSimpMsr;
+ u64 nsVcpEomMsr;
+
+ u64 nsVcpSIntMsr[16];
+ /*
+ * Timer MSRs.
+ */
+ nsVcpTimerState_t nsVcpTimers[4];
+ void *nsVcpSiefPage;
+ void *nsVcpSimPage;
+ /*
+ * Hypercall input/output processing.
+ * We keep these pages mapped in the hypervisor space.
+ */
+ void *nsVcpInputBuffer; /*input buffer virt address*/
+ void *nsVcpInputBufferPage; /*input buffer struct page */
+ void *nsVcpOutputBuffer; /*output buffer virt address*/
+ void *nsVcpOutputBufferPage; /*output buffer struct page */
+ struct vcpu *nsVcpXenVcpu; /*corresponding xen vcpu*/
+ nsVcpuStats_t nsVcpStats;
+} nsVcpu_t;
+
+/*
+ * Events of interest for gathering stats.
+ */
+#define NS_CSWITCH 1
+#define NS_FLUSH_VA_STAT 2
+#define NS_FLUSH_RANGE 3
+#define NS_FLUSH_VA_POSTED 4
+#define NS_FLUSH_RANGE_POSTED 5
+#define NS_TPR_READ 6
+#define NS_ICR_READ 7
+#define NS_TPR_WRITE 8
+#define NS_ICR_WRITE 9
+#define NS_EOI_WRITE 10
+
+#define NS_GFS_ACQUIRE 11
+#define NS_GFS_RELEASE 12
+#define NS_TLB_FLUSH 13
+#define NS_INVL_PG 14
+#define NS_TIMEOUTS 15
+
+void nsCollectStats(int event, nsVcpuStats_t *ststp);
+
+#define NS_STATS //KYS: Temporary
+
+#ifdef NS_STATS
+#define NS_STATS_COLLECT(event, statp) nsCollectStats(event, statp)
+#else
+define NS_STATS_COLLECT(event, statp)
+#endif
+
+typedef struct nsPartition {
+ /*
+ * State maintained on a per guest basis to implement
+ * the Novell shim.
+ */
+ nsSpinLock_t nsLock;
+ atomic_t nsNumVcpusActive;
+ u64 nsGuestIdMsr;
+ u64 nsHypercallMsr;
+ u64 nsPrivileges;
+ u64 nsSupportedFeatures;
+ unsigned long nsHypercallMfn;
+ int nsLongModeGuest;
+ /*
+ * Each VCPU here corresponds to the vcpu in the underlying hypervisor;
+ * they share the same ID.
+ */
+ nsVcpu_t nsVcpuState[MAX_VIRT_CPUS];
+ nsGlobalFlushState_t nsFlushState;
+} nsPartition_t;
+
+/*
+ * Max CPUID leaves supported.
+ */
+
+#define NX_MAX_CPUID_LEAVES 5
+
+/*
+ * We don't want to intercept instructions coming from the hvm bootstrap code.
+ *
+ */
+#define NS_BIOS_HIGH_ADDR
+/*
+ * Privilege flags.
+ */
+
+#define NS_ACCESS_VP_RUNTIME (1ULL << 0)
+#define NS_ACCESS_TIME_REF_CNT (1ULL << 1)
+#define NS_ACCESS_SYNC_MSRS (1ULL << 2)
+#define NS_ACCESS_SYNC_TIMERS (1ULL << 3)
+#define NS_ACCESS_APIC_MSRS (1ULL << 4)
+#define NS_ACCESS_PARTITION_ID (1ULL << 33)
+
+#define nsGetCurrentPartition() \
+((current)->domain->arch.hvm_domain.ext_handle)
+
+#define nsGetCurrentVcpuIndex() (current)->vcpu_id
+
+#define NS_PANIC(x) \
+do {\
+ nsXenVector.extPrintk("File is: %s\n", __FILE__);\
+ nsXenVector.extPrintk("Line is: %d\n", __LINE__);\
+ nsXenVector.extPanic((x));\
+} while (0);
+
+#define NS_ASSERT(x) \
+do {\
+ if (!(x)) \
+ NS_PANIC("ASSERTION FAILED\n")\
+} while (0);
+
+#define nsDebugPrint(x) \
+do { \
+ nsXenVector.extPrintk("File is: %s\n", __FILE__);\
+ nsXenVector.extPrintk("Line is: %d\n", __LINE__);\
+ nsXenVector.extPrintk((x));\
+} while (0);
+
+/* Hooks into Xen */
+extern xen_call_vector_t nsXenVector;
+
+/*
+ * static inline int
+ * nsInvalidCpuState(void)
+ * Check to see if the calling CPU is in the "correct state" to invoke
+ * the functionality implemented in the Novell Shim (Adaptor).
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline int
+nsInvalidCpuState(void)
+{
+ int cpuState;
+ cpuState = nsXenVector.hvmFuncTable->guest_x86_mode(current);
+ if ((cpuState == 4) || (cpuState == 8)) {
+ return (0);
+ }
+ return (1);
+}
+
+/*
+ * inline u64
+ * nsBuildHcallRetVal(int code, int reps)
+ *
+ * Given the return code and the number of successfully completed count,
+ * compose a return value compliant with the Viridian specification.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline u64
+nsBuildHcallRetVal(int code, int reps)
+{
+ u64 retVal=0;
+ retVal |= (code & 0xff);
+ retVal |= (((long long)(reps & 0xfff)) << 32);
+ return (retVal);
+}
+
+
+/*
+ * static inline void nsSetSysCallRetVal(struct cpu_user_regs *pregs,
+ * int longModeGuest, u64 retVal)
+ * Set the return value in the saved guest registers
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline void nsSetSysCallRetVal(struct cpu_user_regs *pregs,
+ int longModeGuest, u64 retVal)
+{
+ if (longModeGuest) {
+ pregs->eax = retVal;
+ } else {
+ pregs->edx = (u32)(retVal >> 32);
+ pregs->eax = (u32)(retVal);
+ }
+}
+
+/*
+ * static inline int
+ * nsPrivilegeCheck(nsPartition_t *curp, u64 flags)
+ * Check if the caller is privileged to perform the operation
+ * specified by the flags argument.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline int
+nsPrivilegeCheck(nsPartition_t *curp, u64 flags)
+{
+ return ((curp->nsPrivileges & flags)? 1: 0);
+}
+
+/* void
+ * nsHandleHyperCall(u64 opcode, u64 input, u64 output,
+ * u64 *retVal);
+ * Common entry point for handling all the extension hypercalls.
+ *
+ * Calling/Exit State:
+ * Based on the hypercall; the caller may give up the CPU while
+ * processing the hypercall. No locks should be held on entry and
+ * no locks will be held on return.
+ *
+ */
+void
+nsHandleHyperCall(u64 opcode, u64 input, u64 output,
+ u64 *retVal);
+
+/*
+ * void nsDoTlbFlush(void);
+ * Perform TLB flush on the invoking virtual CPU.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+void nsDoTlbFlush(void);
+
+/*
+ * void
+ * nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Acquire the specified lock.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+void nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *lock);
+
+/*
+ * void
+ * nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Release the specified spin lock.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+void nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *lock);
+
+/*
+ * void
+ * nsLockInit(nsSpinLock_t *nsLock)
+ * Initialize the specified spin lock.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+void nsLockInit(nsSpinLock_t *lock);
+
+/*
+ * void nsPrintStats(nsPartition_t *curp, int i)
+ * Print the per-vcpu stats for the specified partition.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+void nsPrintStats(nsPartition_t *curp, int i);
+
+#define NS_LOCK_OWNED(v, l) \
+((l)->owner == (v))
+#endif /*NS_SHIM_H */
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.c 2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,1220 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * nshypercall.c.
+ * This file implements the hypercall component of the Novell Shim. Hopefully
+ * we can host this component either as a driver in the guest or an extension
+ * to the Xen hypervisor.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/hvm/support.h>
+#include <xen/cpumask.h>
+#include <xen/event.h>
+
+#include <asm/hvm/hvm_extensions.h>
+#include "ns_shim.h"
+#include "ns_errno.h"
+#include "nshypercall.h"
+
+
+
+void nsDoTlbFlush(void);
+static void
+nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup);
+
+
+
+/*
+ * void nsCollectStats(int event, nsVcpuStats_t *statsp)
+ * Collect stats.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+void nsCollectStats(int event, nsVcpuStats_t *statsp)
+{
+ switch (event) {
+ case NS_CSWITCH:
+ statsp->numSwitches++;
+ return;
+ case NS_FLUSH_VA:
+ statsp->numFlushes++;
+ return;
+ case NS_FLUSH_RANGE:
+ statsp->numFlushRanges++;
+ return;
+ case NS_FLUSH_VA_POSTED:
+ statsp->numFlushesPosted++;
+ return;
+ case NS_FLUSH_RANGE_POSTED:
+ statsp->numFlushRangesPosted++;
+ return;
+ case NS_TPR_READ:
+ statsp->numTprReads++;
+ return;
+ case NS_ICR_READ:
+ statsp->numIcrReads++;
+ return;
+ case NS_TPR_WRITE:
+ statsp->numTprWrites++;
+ return;
+ case NS_ICR_WRITE:
+ statsp->numIcrWrites++;
+ return;
+ case NS_EOI_WRITE:
+ statsp->numEoiWrites++;
+ return;
+
+ case NS_GFS_ACQUIRE:
+ statsp->numGFSAcquires++;
+ return;
+ case NS_GFS_RELEASE:
+ statsp->numGFSReleases++;
+ return;
+ case NS_TLB_FLUSH:
+ statsp->numTlbFlushes++;
+ return;
+ case NS_INVL_PG:
+ statsp->numInvlPages++;
+ return;
+ }
+}
+
+/*
+ * void
+ * nsPrintStats(nsPartition_t *curp, int i)
+ * Print stats.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+void
+nsPrintStats(nsPartition_t *curp, int i)
+{
+ nsVcpu_t *v;
+ v = &curp->nsVcpuState[i];
+ printk("Printing stats for vcpu ID: %d\n", i);
+ printk("Flush pending: %d\n", (int)v->nsVcpFlushPending);
+ printk("Flush Request: %d\n", (int)v->nsVcpFlushRequest);
+ printk("Waiting on GFS: %d\n", (int)v->nsVcpWaitingOnGFS);
+ printk("Waiting for cleanup: %d\n", (int)v->nsVcpWaitingForCleanup);
+
+ printk("Number of context switches: %lu\n", v->nsVcpStats.numSwitches);
+ printk("Number of flushes: %lu\n", v->nsVcpStats.numFlushes);
+ printk("Number of flushes posted: %lu\n", v->nsVcpStats.numFlushesPosted);
+ printk("Number of flush ranges: %lu\n", v->nsVcpStats.numFlushRanges);
+ printk("Number of flush ranges posted: %lu\n", v->nsVcpStats.numFlushRangesPosted);
+ printk("Number of TPR reads: %lu\n", v->nsVcpStats.numTprReads);
+ printk("Number of ICR reads: %lu\n", v->nsVcpStats.numIcrReads);
+ printk("Number of Eoi writes: %lu\n", v->nsVcpStats.numEoiWrites);
+ printk("Number of Tpr writes: %lu\n", v->nsVcpStats.numTprWrites);
+ printk("Number of Icr writes: %lu\n", v->nsVcpStats.numIcrWrites);
+ printk("Number of GFS acuires: %lu\n", v->nsVcpStats.numGFSAcquires);
+ printk("Number of GFS releases: %lu\n", v->nsVcpStats.numGFSReleases);
+ printk("Number of TLB flushes: %lu\n", v->nsVcpStats.numTlbFlushes);
+ printk("Number of INVLPG flushes: %lu\n", v->nsVcpStats.numInvlPages);
+ printk("Number of TIMEOUTS: %lu\n", v->nsVcpStats.numTimeOuts);
+
+}
+
+/*
+ * static inline void nsWakeupWaiters(nsPartition_t *curp)
+ * Wakeup all the VCPUs that may be blocked on the Global
+ * flush state waiting to exclusively own the global flush
+ * state.
+ *
+ * Calling/Exit State:
+ * The partition-wide spin lock nsLock is held on entry and
+ * this lock is held on exit.
+ */
+static inline void nsWakeupWaiters(nsPartition_t *curp)
+{
+ int i;
+ if (!cpus_empty(curp->nsFlushState.waiters)) {
+ /*
+ * Need to wakeup potential waiters that
+ * are waiting for the
+ * flush block to become available.
+ */
+ for (i=0; i < MAX_VIRT_CPUS; i++) {
+ struct vcpu *curVcpu;
+ if (!cpu_isset(i, curp->nsFlushState.waiters))
+ continue;
+ curVcpu =
+ curp->nsVcpuState[i].nsVcpXenVcpu;
+ NS_ASSERT(curVcpu != NULL);
+ if ( test_and_clear_bit(_VPF_blocked_in_xen,
+ &curVcpu->pause_flags) ) {
+ vcpu_wake(curVcpu);
+ }
+ }
+ cpus_clear(curp->nsFlushState.waiters);
+ }
+}
+
+/*
+ * static void nsAcquireGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup)
+ * Acquire the global flush state for exclusive use by the calling
+ * VCPU.
+ *
+ * Calling/Exit State:
+ * On entry nsLock is held and this lock is held on exit. If the calling
+ * VCPU is required to give up the CPU, this lock will be dropped.
+ */
+static void nsAcquireGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup)
+{
+acquireGFSAgain:
+ NS_ASSERT(vcpup->nsVcpWaitingOnGFS == 0);
+ NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0);
+ NS_ASSERT(NS_LOCK_OWNED(vcpup, &curp->nsLock));
+ if (curp->nsFlushState.currentOwner != NULL) {
+ /*
+ * Somebody is in the midst of flushing; deal with this
+ * situation.
+ */
+ /*
+ * We need to wait for the current flush sequence
+ * to end.
+ */
+ vcpup->nsVcpWaitingOnGFS = 0;
+ NS_ASSERT(curp->nsFlushState.currentOwner != current);
+ NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0);
+ if (vcpup->nsVcpFlushPending) {
+ nsLockRelease(vcpup, &curp->nsLock);
+ nsDoTlbFlush();
+ nsLockAcquire(vcpup, &curp->nsLock);
+ }
+ vcpup->nsVcpWaitingOnGFS = 1;
+ cpu_set(current->vcpu_id, curp->nsFlushState.waiters);
+ nsLockRelease(vcpup, &curp->nsLock);
+ wait_on_xen_event_channel(0, (curp->nsFlushState.currentOwner == NULL));
+ nsLockAcquire(vcpup, &curp->nsLock);
+ vcpup->nsVcpWaitingOnGFS = 0;
+ NS_ASSERT(curp->nsFlushState.currentOwner != current);
+ NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0);
+ if (vcpup->nsVcpFlushPending) {
+ nsLockRelease(vcpup, &curp->nsLock);
+ nsDoTlbFlush();
+ nsLockAcquire(vcpup, &curp->nsLock);
+ }
+ goto acquireGFSAgain;
+ }
+ vcpup->nsVcpWaitingOnGFS = 0;
+ curp->nsFlushState.repCount = vcpup->nsVcpRepCount;
+ curp->nsFlushState.flushParam =
+ vcpup->nsVcpInputBuffer;
+ NS_STATS_COLLECT(NS_GFS_ACQUIRE, &vcpup->nsVcpStats);
+}
+
+/*
+ * static void nsReleaseGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup,
+ * int lockOwned)
+ * There can at most be one TLB flush event active in the system. All of the
+ * VCPUs that are part of the flush sequence need to relaese their hold
+ * on the global flush object before the global flush object can be freed.
+ * This function manages the release of the global flush object.
+ * If the "lockOwned" parameter is non-zero; on entry the nsLock is held.
+ *
+ * Calling/Exit State:
+ * The current owner of GFS may be forced to give up the CPU.
+ * On exit nsLock is held.
+ */
+static void nsReleaseGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup,
+ int lockOwned)
+{
+ if (!lockOwned) {
+ nsLockAcquire(vcpup, &curp->nsLock);
+ }
+ NS_ASSERT(curp->nsFlushState.cpuCount >= 0);
+ NS_ASSERT(curp->nsFlushState.currentOwner != NULL);
+
+ if (vcpup->nsVcpFlushPending) {
+ curp->nsFlushState.cpuCount--;
+ NS_ASSERT(curp->nsFlushState.cpuCount >= 0);
+ vcpup->nsVcpFlushPending = 0;
+ }
+
+nsReleaseGFS:
+ if (curp->nsFlushState.cpuCount > 0) {
+ if (curp->nsFlushState.currentOwner == current) {
+ /*
+ * We are the initiator; need to wait for
+ * others to complete.
+ */
+ nsWakeupWaiters(curp);
+ vcpup->nsVcpWaitingForCleanup = 1;
+ nsLockRelease(vcpup, &curp->nsLock);
+ wait_on_xen_event_channel(0,(curp->nsFlushState.cpuCount == 0));
+ nsLockAcquire(vcpup, &curp->nsLock);
+ vcpup->nsVcpWaitingForCleanup = 0;
+ goto nsReleaseGFS;
+ } else {
+ return;
+ }
+ }
+ NS_ASSERT(curp->nsFlushState.cpuCount == 0);
+ if (curp->nsFlushState.currentOwner == current) {
+ /* We are the current owner; do the final cleanup.
+ * But first set the return value. This has been stashed
+ * before we blocked.
+ */
+ NS_STATS_COLLECT(NS_GFS_RELEASE, &vcpup->nsVcpStats);
+ vcpup->nsVcpFlushRequest = 0;
+ vcpup->nsVcpFlushPending = 0;
+ vcpup->nsVcpWaitingForCleanup = 0;
+ nsSetSysCallRetVal(guest_cpu_user_regs(),
+ curp->nsLongModeGuest,
+ curp->nsFlushState.retVal);
+ curp->nsFlushState.cpuCount = 0;
+ curp->nsFlushState.currentOwner = NULL;
+ curp->nsFlushState.retVal = 0;
+ curp->nsFlushState.flushParam = NULL;
+ curp->nsFlushState.repCount = 0;
+ nsWakeupWaiters(curp);
+ } else {
+ /*
+ * We are not the owner; wakeup the owner.
+ */
+ if ( test_and_clear_bit(_VPF_blocked_in_xen,
+ &(curp->nsFlushState.currentOwner->pause_flags))){
+ vcpu_wake(curp->nsFlushState.currentOwner);
+ }
+ }
+}
+
+
+/*
+ * static inline int nsFlushPermitted(nsVcpu_t *vcpup)
+ * Check to see if we can execute a TLB flush on the calling vcpu.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline int nsFlushPermitted(nsVcpu_t *vcpup)
+{
+ if (!hvm_paging_enabled(current)) {
+ return (0);
+ }
+ if (current->arch.hvm_vmx.vmxassist_enabled) {
+ return (0);
+ }
+ if (nsInvalidCpuState()) {
+ return (0);
+ }
+
+ return (1);
+}
+
+/*
+ * void
+ * nsDoTlbFlush(void)
+ * Perform flush operations based on the state of GFS. VCPUs may be
+ * forced to relinquish the physical CPU while attempting to flush; in
+ * those events, thi is also the continuation point for execution.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+void
+nsDoTlbFlush(void)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ nsVcpu_t *vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+ flushVa_t *flushArgp;
+ int i,j, numPages;
+ u64 *pgList;
+ long baseVa;
+ unsigned short repCount;
+
+ NS_ASSERT(local_irq_is_enabled());
+
+ NS_ASSERT(vcpup->nsVcplockDepth == 0);
+
+ nsLockAcquire(vcpup, &curp->nsLock);
+ if (vcpup->nsVcpWaitingForCleanup) {
+ /*
+ * This is the continuation point for us; cleanup
+ * the global flush state.
+ */
+ vcpup->nsVcpWaitingForCleanup =0;
+ NS_ASSERT(curp->nsFlushState.currentOwner == current);
+ nsReleaseGlobalFlushState(curp, vcpup, 1);
+ } else if (vcpup->nsVcpWaitingOnGFS) {
+ /*
+ * This is the continuation point for us; acquire
+ * GFS and proceed with our flush operation.
+ */
+ vcpup->nsVcpWaitingOnGFS =0;
+ nsAcquireGlobalFlushState(curp, vcpup);
+ /*
+ * Now do the rest of the syscall processing
+ */
+ nsFlushPostProcess(curp, vcpup);
+ }
+ if (!vcpup->nsVcpFlushPending) {
+ nsLockRelease(vcpup, &curp->nsLock);
+ return;
+ }
+ flushArgp = curp->nsFlushState.flushParam;
+ repCount = curp->nsFlushState.repCount;
+ /*
+ * At this point a flush has been posted; see if we can perform a
+ * flush given our state.
+ */
+ if (!nsFlushPermitted(vcpup)) {
+ nsReleaseGlobalFlushState(curp, vcpup, 1);
+ nsLockRelease(vcpup, &curp->nsLock);
+ NS_ASSERT(vcpup->nsVcplockDepth == 0);
+ return;
+ }
+ nsLockRelease(vcpup, &curp->nsLock);
+ if (vcpup->nsVcpFlushPending & NS_FLUSH_TLB) {
+ NS_STATS_COLLECT(NS_TLB_FLUSH, &vcpup->nsVcpStats);
+ paging_update_cr3(current);
+ } else {
+ pgList = &flushArgp->gva;
+ NS_ASSERT(vcpup->nsVcpFlushPending == NS_FLUSH_INVLPG);
+ NS_ASSERT(pgList != NULL);
+ NS_ASSERT(repCount >=1);
+ NS_STATS_COLLECT(NS_INVL_PG, &vcpup->nsVcpStats);
+ for (i = 0; i < repCount; i++) {
+ baseVa = (long)(pgList[i] & PAGE_MASK);
+ numPages = (int)(~baseVa & pgList[i]);
+ for (j = 0; j <= numPages; j++) {
+ if (paging_invlpg(current,
+ (baseVa + (j << PAGE_SHIFT)))) {
+ flush_tlb_one_local((baseVa +
+ (j<< PAGE_SHIFT)));
+ }
+ //KYS: need to deal with ASIDS
+ }
+ }
+ }
+ /*
+ * Do post processing on the global flush state.
+ */
+ nsReleaseGlobalFlushState(curp, vcpup, 0);
+ nsLockRelease(vcpup, &curp->nsLock);
+ NS_ASSERT(vcpup->nsVcplockDepth == 0);
+}
+
+/*
+ * static int
+ * nsGetVpRegisters(paddr_t input, paddr_t output)
+ * Get the VCP register state.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static int
+nsGetVpRegisters(paddr_t input, paddr_t output)
+{
+ nsVcpu_t *vcpup, *targetp;
+ nsPartition_t *curp = nsGetCurrentPartition();
+ getVpRegistersInput_t *inBuf;
+ getVpRegistersOutput_t *outBuf;
+ struct vcpu_guest_context *vcpuCtx;
+ u32 *regIndexp;
+ getVpRegistersOutput_t *outRegp;
+ u32 numOutputBytes = 0;
+
+ vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+ inBuf = vcpup->nsVcpInputBuffer;
+ outBuf = vcpup->nsVcpOutputBuffer;
+ outRegp = outBuf;
+ /*
+ * Copy the input data to the per-cpu input buffer.
+ * This may be an overkill; obviously it is better to only
+ * copy what we need. XXXKYS: Check with Mike.
+ */
+ if (nsXenVector.extCopyFromGuestPhysical(inBuf, input, PAGE_SIZE)) {
+ return (NS_STATUS_INVALID_ALIGNMENT);
+ }
+ /*
+ * If the partition ID specified does not match with the current
+ * domain return appropriate error.
+ */
+ if ((u64)current->domain->domain_id != inBuf-> partitionId) {
+ return (NS_STATUS_ACCESS_DENIED);
+ }
+ if (inBuf->vpIndex > MAX_VIRT_CPUS) {
+ return (NS_STATUS_INVALID_VP_INDEX);
+ }
+ targetp = &curp->nsVcpuState[inBuf->vpIndex];
+ if (!(targetp->nsVcpuFlags & NS_VCPU_UP)) {
+ return (NS_STATUS_INVALID_VP_STATE);
+ }
+ if ((vcpuCtx =
+ nsXenVector.extAllocMem(sizeof(struct vcpu_guest_context)))
+ == NULL) {
+ return (NS_STATUS_INSUFFICIENT_MEMORY);
+ }
+
+ /*
+ * Get the register state of the specified vcp.
+ */
+ if (current->vcpu_id != inBuf->vpIndex) {
+ nsXenVector.extVcpuPause(targetp->nsVcpXenVcpu);
+ }
+ nsXenVector.extArchGetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx);
+ if (current->vcpu_id != inBuf->vpIndex) {
+ nsXenVector.extVcpuUnPause(targetp->nsVcpXenVcpu);
+ }
+ /*
+ * Now that we have the register state; select what we want and
+ * populate the output buffer.
+ */
+ regIndexp = &inBuf->regIndex;
+ while (*regIndexp != 0) {
+ switch (*regIndexp) {
+ /*
+ * XXXKYS: need mapping code here; populate
+ * outBuf.
+ */
+ NS_PANIC("nsGetVpRegisters not supported\n");
+ }
+ regIndexp++;
+ outRegp++ ; /*128 bit registers */
+ numOutputBytes +=16;
+ if ((char *)regIndexp > ((char *)inBuf + PAGE_SIZE)) {
+ /*
+ *input list not reminated correctly; bail out.
+ */
+ NS_PANIC("nsGetVpRegisters:input list not terminated\n");
+ break;
+ }
+ }
+ if (nsXenVector.extCopyToGuestPhysical(output, outBuf,
+ numOutputBytes)) {
+ /* Some problem copying data out*/
+ NS_PANIC("nsGetVpRegisters:copyout problem\n");
+ }
+ nsXenVector.extFreeMem(vcpuCtx);
+ return (NS_STATUS_SUCCESS);
+}
+
+/*
+ * static int
+ * nsSetVpRegisters(paddr_t input, paddr_t output)
+ * Set the VCPU register state.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static int
+nsSetVpRegisters(paddr_t input, paddr_t output)
+{
+ nsVcpu_t *vcpup, *targetp;
+ nsPartition_t *curp = nsGetCurrentPartition();
+ setVpRegistersInput_t *inBuf;
+ struct vcpu_guest_context *vcpuCtx;
+ setVpRegisterSpec_t *regIndexp;
+ int retVal = NS_STATUS_SUCCESS;
+
+ vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+ inBuf = vcpup->nsVcpInputBuffer;
+ /*
+ * Copy the input data to the per-cpu input buffer.
+ * This may be an overkill; obviously it is better to only
+ * copy what we need. XXXKYS: Check with Mike.
+ */
+ if (nsXenVector.extCopyFromGuestPhysical(inBuf, input, PAGE_SIZE)) {
+ return (NS_STATUS_INVALID_ALIGNMENT);
+ }
+ /*
+ * If the partition ID specified does not match with the current
+ * domain return appropriate error.
+ */
+ if ((u64)current->domain->domain_id != inBuf-> partitionId) {
+ return (NS_STATUS_ACCESS_DENIED);
+ }
+ if (inBuf->vpIndex > MAX_VIRT_CPUS) {
+ return (NS_STATUS_INVALID_VP_INDEX);
+ }
+ targetp = &curp->nsVcpuState[inBuf->vpIndex];
+ if (!(targetp->nsVcpuFlags & NS_VCPU_UP)) {
+ return (NS_STATUS_INVALID_VP_STATE);
+ }
+ if ((vcpuCtx =
+ nsXenVector.extAllocMem(sizeof(struct vcpu_guest_context)))
+ == NULL) {
+ return (NS_STATUS_INSUFFICIENT_MEMORY);
+ }
+ /*
+ * XXXKYS: Is it sufficient to just pause the target vcpu; on the
+ * xen side domain is paused for this call. CHECK.
+ */
+ if (current->vcpu_id != inBuf->vpIndex) {
+ nsXenVector.extVcpuPause(targetp->nsVcpXenVcpu);
+ }
+
+ nsXenVector.extArchGetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx);
+ /*
+ * Now that we have the register state; update the register state
+ * based on what we are given.
+ */
+ regIndexp = &inBuf->regSpec;
+ /*
+ * XXXKYS: Assuming the list is terminated by a regName that is 0.
+ * Check with Mike.
+ */
+ while (regIndexp->regName != 0) {
+ switch (regIndexp->regName) {
+ /*
+ * XXXKYS: need mapping code here; populate
+ * vcpuCtx
+ */
+ NS_PANIC("nsSetVpRegisters not supported\n");
+ }
+ regIndexp++;
+ if ((char *)regIndexp > ((char *)inBuf + PAGE_SIZE)) {
+ /*
+ *input list not reminated correctly; bail out.
+ */
+ NS_PANIC("nsSetVpRegisters:input list not terminated\n");
+ break;
+ }
+ }
+ /*
+ * Now set register state.
+ *
+ * XXXKYS: Is it sufficient to just pause the target vcpu; on the
+ * xen side domain is paused for this call. CHECK.
+ */
+
+ if (nsXenVector.extArchSetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx)) {
+ retVal = NS_STATUS_INVALID_PARAMETER;
+ }
+ if (current->vcpu_id != inBuf->vpIndex) {
+ nsXenVector.extVcpuUnPause(targetp->nsVcpXenVcpu);
+ }
+ nsXenVector.extFreeMem(vcpuCtx);
+ return (retVal);
+}
+
+/*
+ * static int
+ * nsSwitchVa(paddr_t input)
+ *
+ * Switch the page table base of the calling vcpu.
+ *
+ * Calling/Exit State:
+ * None.
+ *
+ * Remarks:
+ * The spec specifies that the input register is pointing to a guest
+ * physical that has the new page table base. However it appears that the
+ * page table base is being passed in the input register.
+ */
+static int
+nsSwitchVa(paddr_t input)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ nsVcpu_t *vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+
+ /*
+ * XXXKYS: the spec sys the asID is passed via memory at offset 0 of
+ * the page whose GPA is in the input register. However, it appears
+ * the current build of longhorn (longhorn-2007-02-06-x86_64-fv-02)
+ * passes the asID in the input register instead. Need to check if
+ * future builds do this.
+ */
+ hvm_set_cr3(input);
+ NS_STATS_COLLECT(NS_CSWITCH, &vcpup->nsVcpStats);
+ return (NS_STATUS_SUCCESS);
+}
+
+/*
+ * static int
+ * nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup)
+ *
+ * Perform the flush operation once GFS is acquired.
+ *
+ * Calling/Exit State:
+ * On entry nsLock is held; on exit this lock continues to be held.
+ */
+
+static void
+nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup)
+{
+ int target;
+ nsVcpu_t *vcpup;
+ cpumask_t vcpuMask;
+ struct flushVa *flushArgp;
+
+ flushArgp = curVcpup->nsVcpInputBuffer;
+ vcpuMask = flushArgp->vMask;
+ /*
+ * On entry we must own the global flush state.
+ */
+ NS_ASSERT(NS_LOCK_OWNED(curVcpup, &curp->nsLock));
+ NS_ASSERT(curp->nsFlushState.cpuCount == 0);
+ NS_ASSERT(curp->nsFlushState.currentOwner == NULL);
+
+ curp->nsFlushState.retVal =
+ nsBuildHcallRetVal(NS_STATUS_SUCCESS, curVcpup->nsVcpRepCount);
+ curp->nsFlushState.currentOwner = current;
+ if (cpu_isset(current->vcpu_id, vcpuMask)) {
+ curp->nsFlushState.cpuCount = 1;
+ curVcpup->nsVcpFlushPending =
+ curVcpup->nsVcpFlushRequest;
+#ifdef NS_STATS
+ if (curVcpup->nsVcpFlushRequest == NS_FLUSH_TLB) {
+ NS_STATS_COLLECT(NS_FLUSH_VA_POSTED, &curVcpup->nsVcpStats);
+ } else {
+ NS_STATS_COLLECT(NS_FLUSH_RANGE_POSTED, &curVcpup->nsVcpStats);
+ }
+#endif
+
+ cpu_clear(current->vcpu_id, vcpuMask);
+ }
+ if (cpus_empty(vcpuMask)) {
+ /*
+ * We are done.
+ */
+ goto flushVaDone;
+ }
+ while (!cpus_empty(vcpuMask)) {
+ target = first_cpu(vcpuMask);
+ vcpup = &curp->nsVcpuState[target];
+ cpu_clear(target, vcpuMask);
+ if (!(vcpup->nsVcpuFlags & NS_VCPU_UP)) {
+ continue;
+ }
+ if (!nsFlushPermitted(vcpup)) {
+ continue;
+ }
+ curp->nsFlushState.cpuCount++;
+ vcpup->nsVcpFlushPending =
+ curVcpup->nsVcpFlushRequest;
+#ifdef NS_STATS
+ if (curVcpup->nsVcpFlushRequest == NS_FLUSH_TLB) {
+ NS_STATS_COLLECT(NS_FLUSH_VA_POSTED, &vcpup->nsVcpStats);
+ } else {
+ NS_STATS_COLLECT(NS_FLUSH_RANGE_POSTED, &vcpup->nsVcpStats);
+ }
+#endif
+
+ /*
+ * We need to force these VCPUs into the hypervisor for
+ * them to act on the pending request.
+ */
+
+ vcpu_kick(vcpup->nsVcpXenVcpu);
+ }
+ /*
+ * Now that we have posted the state; wait for other CPUs to perform
+ * flushes; we need to wait for all the CPUs to complete the flush
+ * before returning.
+ */
+flushVaDone:
+ /*
+ * If we are included in this round of tlb flush; we will wait for
+ * other CPUs in the tlb flush function; else we wait right here.
+ */
+ if (!curVcpup->nsVcpFlushPending) {
+ nsReleaseGlobalFlushState(curp, curVcpup, 1);
+ }
+ return;
+}
+
+/*
+ * static int
+ * nsFlushVa(paddr_t input)
+ * Perform a TLB flush on the specified set of VCPUs.
+ *
+ * Calling/Exit State:
+ * No locks can be held on entry and no locks will be held on return.
+ * The calling VCPU may relinquish the physical CPU.
+ */
+static int
+nsFlushVa(paddr_t input)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ int i;
+ nsVcpu_t *curVcpup;
+
+ flushVa_t *flushArgp;
+ cpumask_t vcpuMask;
+ u64 asId, inputMask, retVal;
+ int flushGlobal = 1;
+
+ curVcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+ flushArgp = curVcpup->nsVcpInputBuffer;
+
+ NS_ASSERT(curVcpup->nsVcplockDepth == 0);
+ NS_ASSERT(curVcpup->nsVcpFlushRequest == 0);
+ NS_ASSERT(curVcpup->nsVcpWaitingForCleanup == 0);
+ NS_ASSERT(curVcpup->nsVcpWaitingOnGFS == 0);
+
+ if (nsXenVector.extCopyFromGuestPhysical(flushArgp, input,
+ sizeof(*flushArgp))) {
+ return (NS_STATUS_INVALID_ALIGNMENT);
+ }
+ inputMask = flushArgp->pMask;
+ asId = flushArgp->asHandle;
+ cpus_clear(vcpuMask);
+ /*
+ * Deal with all trivial error conditions.
+ */
+ if (flushArgp->flags != 0 && (!(flushArgp->flags &
+ (NS_FLUSH_ALL_PROCESSORS |
+ NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY)))) {
+ return (NS_STATUS_INVALID_PARAMETER);
+ }
+ if (((flushArgp->pMask) == 0) &&
+ !(flushArgp->flags & NS_FLUSH_ALL_PROCESSORS)) {
+ return (NS_STATUS_INVALID_PARAMETER);
+ }
+
+ if (flushArgp->flags & NS_FLUSH_ALL_PROCESSORS) {
+ for (i=0; i< MAX_VIRT_CPUS; i++) {
+ if (current->domain->vcpu[i] != NULL) {
+ cpu_set(i, vcpuMask);
+ }
+ }
+ } else {
+ i = 0;
+ while (inputMask) {
+ if (inputMask &0x1) {
+ cpu_set(i, vcpuMask);
+ }
+ inputMask = (inputMask >> 1);
+ i++;
+ }
+ }
+
+ if (flushArgp->flags & NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES) {
+ asId = NS_ALL_AS;
+ }
+ if (flushArgp->flags & NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY) {
+ flushGlobal = 0;
+ }
+ /*
+ * Now operate on what we are given
+ * XXXKYS: For now we are ignoring asId and fushGlobal flag.
+ * May have to revisit this. But first stash away the processed
+ * parameters for subsequent use.
+ */
+ flushArgp->asHandle = asId;
+ flushArgp->flags = flushGlobal;
+ flushArgp->vMask = vcpuMask;
+
+ curVcpup->nsVcpRepCount = 0;
+ curVcpup->nsVcpFlushRequest = NS_FLUSH_TLB;
+
+ retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, 0);
+ nsSetSysCallRetVal(guest_cpu_user_regs(),
+ curp->nsLongModeGuest,
+ retVal);
+ NS_STATS_COLLECT(NS_FLUSH_VA_STAT, &curVcpup->nsVcpStats);
+ nsLockAcquire(curVcpup, &curp->nsLock);
+ nsAcquireGlobalFlushState(curp, curVcpup);
+ nsFlushPostProcess(curp, curVcpup);
+ nsLockRelease(curVcpup, &curp->nsLock);
+ return (NS_STATUS_SUCCESS);
+}
+
+/*
+ * static int
+ * nsFlushVaRange(paddr_t input, unsigned short startIndex,
+ * unsigned short repCount, unsigned short *repsDone)
+ * Perform a INVLPG flush on the specified set of VCPUs.
+ *
+ * Calling/Exit State:
+ * No locks can be held on entry and no locks will be held on return.
+ * The calling VCPU may relinquish the physical CPU.
+ */
+static int
+nsFlushVaRange(paddr_t input, unsigned short startIndex,
+unsigned short repCount, unsigned short *repsDone)
+{
+ nsVcpu_t *curVcpup;
+ nsPartition_t *curp = nsGetCurrentPartition();
+ flushVa_t *flushArgp;
+ cpumask_t vcpuMask;
+ u64 asId, inputMask, retVal;
+ int flushGlobal = 1;
+ int flushAllProc = 0;
+ int i;
+
+ curVcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+ flushArgp = curVcpup->nsVcpInputBuffer;
+ NS_ASSERT(curVcpup->nsVcplockDepth == 0);
+ NS_ASSERT(curVcpup->nsVcpFlushRequest == 0);
+ NS_ASSERT(curVcpup->nsVcpWaitingForCleanup == 0);
+ NS_ASSERT(curVcpup->nsVcpWaitingOnGFS == 0);
+ NS_ASSERT(repCount >=1);
+ NS_ASSERT(((sizeof(*flushArgp)) + 8*(repCount -1)) <= PAGE_SIZE);
+ if (nsXenVector.extCopyFromGuestPhysical(flushArgp, input,
+ ((sizeof(*flushArgp)) + 8*(repCount -1)))) {
+ return (NS_STATUS_INVALID_ALIGNMENT);
+ }
+ *repsDone = repCount;
+ inputMask = flushArgp->pMask;
+ asId = flushArgp->asHandle;
+ cpus_clear(vcpuMask);
+ /*
+ * Deal with all trivial error conditions.
+ */
+ if (flushArgp->flags != 0 && (!(flushArgp->flags &
+ (NS_FLUSH_ALL_PROCESSORS |
+ NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY)))) {
+ return (NS_STATUS_INVALID_PARAMETER);
+ }
+ if ((flushArgp->pMask == 0) &&
+ !(flushArgp->flags & NS_FLUSH_ALL_PROCESSORS)) {
+ return (NS_STATUS_INVALID_PARAMETER);
+ }
+
+ if (flushArgp->flags & NS_FLUSH_ALL_PROCESSORS) {
+ flushAllProc = 1;
+ for (i=0; i< MAX_VIRT_CPUS; i++) {
+ if (current->domain->vcpu[i] != NULL) {
+ cpu_set(i, vcpuMask);
+ }
+ }
+ } else {
+ i = 0;
+ /*
+ * populate the vcpu mask based on the input.
+ */
+ while (inputMask) {
+ if (inputMask & 0x1) {
+ cpu_set(i, vcpuMask);
+ }
+ inputMask = (inputMask >> 1);
+ i++;
+ }
+ }
+ if (flushArgp->flags & NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES) {
+ asId = NS_ALL_AS;
+ }
+ if (flushArgp->flags & NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY) {
+ flushGlobal = 0;
+ }
+ /*
+ * Now operate on what we are given
+ * XXXKYS: For now we are ignoring asId and fushGlobal flag.
+ * May have to revisit this.
+ * May have to revisit this. But first stash away the processed
+ * parameters for subsequent use.
+ */
+ flushArgp->asHandle = asId;
+ flushArgp->flags = flushGlobal;
+ flushArgp->vMask = vcpuMask;
+
+ curVcpup->nsVcpRepCount = repCount;
+ curVcpup->nsVcpFlushRequest = NS_FLUSH_INVLPG;
+
+ retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, repCount);
+ nsSetSysCallRetVal(guest_cpu_user_regs(),
+ curp->nsLongModeGuest,
+ retVal);
+
+
+ NS_STATS_COLLECT(NS_FLUSH_RANGE, &curVcpup->nsVcpStats);
+ nsLockAcquire(curVcpup, &curp->nsLock);
+ nsAcquireGlobalFlushState(curp, curVcpup);
+ nsFlushPostProcess(curp, curVcpup);
+ nsLockRelease(curVcpup, &curp->nsLock);
+ return (NS_STATUS_SUCCESS);
+}
+
+/* void
+ * nsHandleHyperCall(u64 opcode, u64 input, u64 output,
+ * u64 *retVal);
+ * Common entry point for handling all the extension hypercalls.
+ *
+ * Calling/Exit State:
+ * Based on the hypercall; the caller may give up the CPU while
+ * processing the hypercall. No locks should be held on entry and
+ * no locks will be held on return.
+ *
+ */
+
+void
+nsHandleHyperCall(u64 opcode, u64 input, u64 output,
+ u64 *retVal)
+{
+ unsigned short verb;
+ unsigned short repCount;
+ unsigned short repsDone =0;
+ unsigned short startIndex;
+ nsPartition_t *curp = nsGetCurrentPartition();
+ u64 partitionId;
+ int value;
+
+
+ verb = (short)(opcode & 0xffff);
+ repCount = (short)((opcode >>32) & 0xfff);
+ startIndex = (short)((opcode >> 48) & 0xfff);
+ switch (verb) {
+ case NS_CREATE_PARTITION:
+ /*
+ * Xen only allows dom0 to create domains.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_INITIALIZE_PARTITION:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_DELETE_PARTITION:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_GET_PARTITION_PROPERTY:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_SET_PARTITION_PROPERTY:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_GET_PARTITION_ID:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_PARTITION_ID)) {
+ *retVal =
+ nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ }
+ partitionId = (u64)current->domain->domain_id;
+ if (nsXenVector.extCopyToGuestPhysical(output,
+ &partitionId, 8)) {
+ /*
+ * Invalid output area.
+ */
+ *retVal =
+ nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ }
+ *retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, 0);
+ return;
+ case NS_GET_NEXT_CHILD_PARTITION:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_SET_LOGICAL_PROCESSOR_RUN_TIME_GROUP:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_CLEAR_LOGICAL_PROCESSOR_RUN_TIME_GROUP:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_NOTIFY_LOGICAL_PROCESSOR_POWER_STATE:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_GET_LOGICAL_PROCESSOR_RUN_TIME:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_DEPOSIT_MEMORY:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_WITHDRAW_MEMORY:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_GET_MEMORY_BALANCE:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_MAP_GPA_PAGES:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_UNMAP_GPA_PAGES:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_INSTALL_INTERCEPT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_CREATE_VP:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_TERMINATE_VP:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_DELETE_VP:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_GET_NEXT_VP:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_GET_VP_REGISTERS:
+ *retVal = nsBuildHcallRetVal(
+ nsGetVpRegisters(input, output), 0);
+ return;
+ case NS_SET_VP_REGISTERS:
+ *retVal = nsBuildHcallRetVal(
+ nsSetVpRegisters(input, output), 0);
+ case NS_SWITCH_VA:
+ *retVal =
+ nsBuildHcallRetVal(nsSwitchVa(input), 0);
+ return;
+ case NS_FLUSH_VA:
+ *retVal =
+ nsBuildHcallRetVal(nsFlushVa(input), 0);
+ return;
+ case NS_FLUSH_VA_LIST:
+ value = nsFlushVaRange(input, startIndex,
+ repCount, &repsDone);
+ *retVal = nsBuildHcallRetVal(value, repsDone);
+ return;
+
+ case NS_TRASLATE_VA:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_READ_GPA:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_WRITE_GPA:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_ASSERT_VIRTUAL_INTERRUPT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_CLEAR_VIRTUAL_INTERRUPT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_CREATE_PORT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_DELETE_PORT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_CONNECT_PORT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_GET_PORT_PROPERTY:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_DISCONNECT_PORT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_POST_MESSAGE:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ case NS_POST_EVENT:
+ /*
+ * We don't support this.
+ */
+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+ return;
+ default:
+ nsXenVector.extPrintk("Unkown hypercall: verb is: %d\n", verb);
+ *retVal =
+ nsBuildHcallRetVal(NS_STATUS_INVALID_HYPERCALL_CODE, 0);
+ return;
+ }
+}
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.h 2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,125 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * nshypercall.h
+ * Memory layouts for the various hypercalls supported.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef NS_HYPERCALL_H
+#define NS_HYPERCALL_H
+
+#include <xen/cpumask.h>
+
+
+typedef struct getVpRegistersInput {
+ u64 partitionId;
+ u64 vpIndex;
+ u32 regIndex;
+} getVpRegistersInput_t;
+
+typedef struct getVpRegistersOutput {
+ u64 lowValue;
+ u64 highValue;
+} getVpRegistersOutput_t;
+
+
+
+typedef struct setVpRegisterSpec {
+ u32 regName;
+ u32 pad;
+ u64 pad1;
+ u64 lowValue;
+ u64 highValue;
+} setVpRegisterSpec_t;
+typedef struct setVpRegistersInput {
+ u64 partitionId;
+ u64 vpIndex;
+ setVpRegisterSpec_t regSpec;
+} setVpRegistersInput_t;
+
+
+typedef struct flushVa {
+ u64 asHandle;
+ u64 flags;
+ union {
+ u64 processorMask;
+ cpumask_t vcpuMask;
+ } procMask;
+#define pMask procMask.processorMask
+#define vMask procMask.vcpuMask
+ u64 gva;
+} flushVa_t;
+
+#define NS_FLUSH_ALL_PROCESSORS 0x00000001
+#define NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES 0x00000002
+#define NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY 0x00000004
+
+#define NS_ALL_AS (-1)
+
+/*
+ * Hypercall verbs.
+ */
+
+#define NS_CREATE_PARTITION 0x0010
+#define NS_INITIALIZE_PARTITION 0x0011
+#define NS_DELETE_PARTITION 0x0014
+#define NS_GET_PARTITION_PROPERTY 0x0017
+#define NS_SET_PARTITION_PROPERTY 0x0018
+#define NS_GET_PARTITION_ID 0x0015
+#define NS_GET_NEXT_CHILD_PARTITION 0x0016
+#define NS_SET_LOGICAL_PROCESSOR_RUN_TIME_GROUP 0x0005
+#define NS_CLEAR_LOGICAL_PROCESSOR_RUN_TIME_GROUP 0x0006
+#define NS_NOTIFY_LOGICAL_PROCESSOR_POWER_STATE 0x0007
+#define NS_GET_LOGICAL_PROCESSOR_RUN_TIME 0x0004
+#define NS_DEPOSIT_MEMORY 0x001C
+#define NS_WITHDRAW_MEMORY 0x001D
+#define NS_GET_MEMORY_BALANCE 0x001E
+#define NS_MAP_GPA_PAGES 0x001A
+#define NS_UNMAP_GPA_PAGES 0x001B
+#define NS_INSTALL_INTERCEPT 0x0019
+#define NS_CREATE_VP 0x001F
+#define NS_TERMINATE_VP 0x0020
+#define NS_DELETE_VP 0x0021
+#define NS_GET_NEXT_VP 0x0027
+#define NS_GET_VP_REGISTERS 0x0022
+#define NS_SET_VP_REGISTERS 0x0023
+#define NS_SWITCH_VA 0x0001
+#define NS_FLUSH_VA 0x0002
+#define NS_FLUSH_VA_LIST 0x0003
+#define NS_TRASLATE_VA 0x0024
+#define NS_READ_GPA 0x0025
+#define NS_WRITE_GPA 0x0026
+#define NS_ASSERT_VIRTUAL_INTERRUPT 0x002A
+#define NS_CLEAR_VIRTUAL_INTERRUPT 0x002C
+#define NS_CREATE_PORT 0x002D
+#define NS_DELETE_PORT 0x002E
+#define NS_CONNECT_PORT 0x002F
+#define NS_GET_PORT_PROPERTY 0x0031
+#define NS_DISCONNECT_PORT 0x0030
+#define NS_POST_MESSAGE 0x0032
+#define NS_POST_EVENT 0x0034
+
+#endif /* NS_HYPERCALL_H */
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nsintercept.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nsintercept.c 2008-02-15 18:28:34.000000000 -0500
@@ -0,0 +1,2077 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * nsintercept.c.
+ * This file implements the intercepts to support the Novell Shim.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#include <asm/hvm/hvm_extensions.h>
+
+
+#include <asm/config.h>
+#include <asm/hvm/io.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/apicdef.h>
+#include <asm/regs.h>
+#include <asm/msr.h>
+
+#include <xen/string.h>
+#include <xen/init.h>
+#include <xen/compile.h>
+#include <xen/hvm/save.h>
+#include <public/sched.h>
+
+
+/*
+ * Local includes; extension specific.
+ */
+#include "ns_errno.h"
+#include "ns_shim.h"
+
+
+/*
+ * Implement Novell Shim.
+ */
+
+
+/*
+ * Hypervisor intercept vector.
+ */
+static int
+nsDomainCreate(struct domain *d);
+static void
+nsDomainDestroy(struct domain *d);
+static int
+nsVcpuInitialize(struct vcpu *v);
+static void
+nsVcpuDestroy(struct vcpu *v);
+static int
+nsDoCpuId(uint32_t input, struct cpu_user_regs *regs);
+static int
+nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs);
+static int
+nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs);
+static int
+nsDoHyperCall(struct cpu_user_regs *pregs);
+static void
+nsDoMigrateTimers(struct vcpu *v);
+
+extension_intercept_vector_t nsExtensionVector = {
+ .domain_create = nsDomainCreate,
+ .domain_destroy = nsDomainDestroy,
+ .vcpu_initialize = nsVcpuInitialize,
+ .vcpu_destroy = nsVcpuDestroy,
+ .do_cpuid = nsDoCpuId,
+ .do_msr_read = nsDoRdMsr,
+ .do_msr_write = nsDoWrMsr,
+ .do_hypercall = nsDoHyperCall,
+ .do_continuation = nsDoTlbFlush,
+ .do_migrate_timers = nsDoMigrateTimers
+};
+
+/*
+ * Hooks into xen services; to be populated by our proxy in xen.
+ */
+
+xen_call_vector_t nsXenVector;
+
+static inline void
+nsInjectException(int trap);
+
+static inline void
+nsHypercallPageInitialize(void *hypercallPage, nsPartition_t *curp);
+
+static inline void
+nsInitEventPage(void *siefPage);
+
+static inline void
+nsInitMessagePage(void *simPage);
+
+/*
+ * static int __init nsExtensionInit(void)
+ * Initialize the extensiom module.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static int __init nsExtensionInit(void)
+{
+ int retVal;
+ retVal = hvm_ext_register(1, &nsExtensionVector, &nsXenVector);
+ NS_ASSERT(retVal == 0);
+ nsXenVector.extPrintk("NS Extension Initialized\n");
+ return 0;
+}
+__initcall(nsExtensionInit);
+
+/*
+ * Our lock primitives.
+ */
+/*
+ * void
+ * nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Acquire the specified lock.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+void
+nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+{
+ NS_ASSERT(nsLock->owner != vcpup);
+ spin_lock_irqsave(&nsLock->spinLock, nsLock->flags);
+ nsLock->owner = vcpup;
+ nsLock->retAddr = __builtin_return_address(0);
+ vcpup->nsVcplockDepth++;
+}
+
+/*
+ * void
+ * nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Release the specified spin lock.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+void
+nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+{
+ NS_ASSERT((nsLock->owner == vcpup));
+ nsLock->owner = NULL;
+ vcpup->nsVcplockDepth--;
+ NS_ASSERT(vcpup->nsVcplockDepth >= 0);
+ spin_unlock_irqrestore(&nsLock->spinLock, nsLock->flags);
+}
+
+/*
+ * void
+ * nsLockInit(nsSpinLock_t *nsLock)
+ * Initialize the specified spin lock.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+void
+nsLockInit(nsSpinLock_t *nsLock)
+{
+ spin_lock_init(&nsLock->spinLock);
+ nsLock->owner = NULL;
+ nsLock->retAddr = NULL;
+}
+
+/*
+ * static inline void nsWriteGuestIdMsr(nsPartition_t *curp,
+ * nsVcpu_t *curVcpu,
+ * u64 msrContent)
+ * Write the guest ID.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsWriteGuestIdMsr(nsPartition_t *curp, nsVcpu_t *curVcpu, u64 msrContent)
+{
+ curp->nsGuestIdMsr = msrContent;
+ if (curp->nsGuestIdMsr == 0) {
+ /*
+ * Guest has cleared the guest ID;
+ * clear the hypercall page.
+ */
+ if (curp->nsHypercallMsr) {
+ curVcpu->nsVcpuFlags &= ~NS_VCPU_UP;
+ }
+ }
+}
+
+/*
+ * static inline void nsWriteHypercallMsr(nsPartition_t *curp,
+ * nsVcpu_t *curVcpu,
+ * u64 msrContent)
+ * Write hypercall msr.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline void
+nsWriteHypercallMsr(nsPartition_t *curp,
+ nsVcpu_t *curVcpu,
+ u64 msrContent)
+{
+ unsigned long gmfn;
+ void *hypercallPage;
+ struct domain *d = curVcpu->nsVcpXenVcpu->domain;
+
+ nsLockAcquire(curVcpu, &curp->nsLock);
+ gmfn = (msrContent >> 12);
+ if (curp->nsGuestIdMsr == 0) {
+ /* Nothing to do if the guest is not registered*/
+ nsLockRelease(curVcpu, &curp->nsLock);
+ return;
+ }
+ /*
+ * Guest is registered; see if we can turn-on the
+ * hypercall page.
+ * XXXKYS: Can the guest write the GPA in one call and
+ * subsequently enable it? Check. For now assume that all the
+ * info is specified in one call.
+ */
+ if (((u32)msrContent & (0x00000001)) == 0) {
+ /*
+ * The client is not enabling the hypercall; just
+ * ignore everything.
+ */
+ nsLockRelease(curVcpu, &curp->nsLock);
+ return;
+ }
+ hypercallPage = nsXenVector.extGetVirtFromGmfn(d,gmfn);
+ if (hypercallPage == NULL) {
+ /*
+ * The guest specified a bogus GPA; inject a GP fault
+ * into the guest.
+ */
+ nsInjectException(TRAP_gp_fault);
+ nsLockRelease(curVcpu, &curp->nsLock);
+ return;
+ }
+ nsHypercallPageInitialize(hypercallPage, curp);
+ curp->nsHypercallMfn = nsXenVector.extGetMfnFromGmfn(d, gmfn);
+#ifdef CONFIG_DOMAIN_PAGE
+ nsXenVector.extUnmapDomainPage(hypercallPage);
+#endif
+ curp->nsHypercallMsr = msrContent;
+ nsLockRelease(curVcpu, &curp->nsLock);
+ curVcpu->nsVcpuFlags |= NS_VCPU_UP;
+}
+
+/*
+ * static inline void nsWriteSxMsr(uint32_t idx, nsPartition_t *curp,
+ * nsVcpu_t *curVcpu,
+ * u64 msrContent)
+ * Write SIEFP or SIMP msr.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline void nsWriteSxMsr(uint32_t idx, nsPartition_t *curp,
+ nsVcpu_t *curVcpu,
+ u64 msrContent)
+{
+ unsigned long gmfn;
+ void *sxPage;
+ struct domain *d = curVcpu->nsVcpXenVcpu->domain;
+ gmfn = (msrContent >> 12);
+ /*
+ * Can the client enable the siefp and specify
+ * the base address in two
+ * different calls? XXXKYS: For now assume
+ * that it is done in one call.
+ */
+ if (!((u32)msrContent & (0x00000001))) {
+ /*
+ * The client is not enabling the sx page; just
+ * ignore everything.
+ */
+ return;
+ }
+ sxPage = nsXenVector.extGetVirtFromGmfn(d, gmfn);
+ if (sxPage == NULL) {
+ /*
+ * The guest specified a bogus GPA; inject a GP fault
+ * into the guest.
+ */
+ nsInjectException(TRAP_gp_fault);
+ return;
+ }
+ switch (idx) {
+ case NS_MSR_SIEFP:
+ nsInitEventPage(sxPage);
+ curVcpu->nsVcpSIefpMsr = msrContent;
+ curVcpu->nsVcpSiefPage = sxPage;
+ break;
+ case NS_MSR_SIMP:
+ nsInitMessagePage(sxPage);
+ curVcpu->nsVcpSimpMsr = msrContent;
+ curVcpu->nsVcpSimPage = sxPage;
+ break;
+ }
+
+}
+
+/*
+ * Time this domain booted.
+ */
+s_time_t nsDomainBootTime;
+
+/*
+ * static inline u64
+ * nsGetTimeSinceDomainBoot(void)
+ * Retrieve the time since boot in 100ns units.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline u64
+nsGetTimeSinceDomainBoot(void)
+{
+ u64 curTime = nsXenVector.extGetTimeSinceBoot();
+ return ((curTime - nsDomainBootTime)/100) ;
+}
+
+/*
+ * static inline int
+ * nsCallFromBios(struct cpu_user_regs *regs)
+ * Check if the caller is in the right state to consumE the services of the
+ * extension module.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline int
+nsCallFromBios(struct cpu_user_regs *regs)
+{
+ if (hvm_paging_enabled(current)) {
+ return (0);
+ } else {
+ return (1);
+ }
+}
+
+/*
+ * static inline void
+ * nsInjectException(int trap)
+ * Injecct the specified exception into the invoking virtual CPU.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline void
+nsInjectException(int trap)
+{
+ nsXenVector.hvmFuncTable->inject_exception(trap, 0, 0);
+}
+
+
+/*
+ * static inline int
+ * nsOsRegistered(void)
+ * Check to see if the guest has registered itself with the Novell Shim.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline int
+nsOsRegistered(void)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ return (curp->nsGuestIdMsr != 0?1:0);
+}
+
+
+/*
+ * static inline void
+ * nsSetPartitionPrivileges(nsPartition_t *nspp)
+ * Set the partitionwide privileges. Currently it is harcoded.
+ * We could perhaps make this an attribute of the domain and have the
+ * configuration tools manage it.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline void
+nsSetPartitionPrivileges(nsPartition_t *nspp)
+{
+ /*
+ * This is based on the hypervisor spec under section 5.2.3.
+ */
+ nspp->nsPrivileges = 0x000000020000007f;
+}
+
+/*
+ * static inline u32
+ * nsGetRecommendations(void)
+ * Get the recommendations.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline u32
+nsGetRecommendations(void)
+{
+ /*
+ *For now we recommend all the features. Need to validate.
+ */
+ if ( paging_mode_hap(current->domain)) {
+ /*
+ * If HAP is enabled; the guest should not use TLB flush
+ * related enlightenments.
+ */
+ return (0x19);
+ } else {
+ return (0x1f);
+ }
+}
+
+/*
+ * static inline void
+ * nsSetPartitionFeatures(nsPartition_t *nspp)
+ * Set the partitionwide features. Currently it is harcoded.
+ * We could perhaps make this an attribute of the domain and have the
+ * configuration tools manage it.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline void
+nsSetPartitionFeatures(nsPartition_t *nspp)
+{
+ nspp->nsSupportedFeatures = 0x1f;
+}
+
+static inline u16
+nsGetGuestMajor(void)
+{
+ return (0);
+}
+static inline u16
+nsGetGuestMinor(void)
+{
+ return (0);
+}
+static inline u32
+nsGetGuestServicePack(void)
+{
+ return (0);
+}
+
+static inline u8
+nsGetGuestServiceBranchInfo(void)
+{
+ return (0);
+}
+static inline u32
+nsGetGuestServiceNumber(void)
+{
+ return (0);
+}
+
+/*
+ * static inline u32
+ * nsGetSupportedSyntheticMsrs(void)
+ * Get the synthetic MSRs supported by the Novell Shim. Currently
+ * it is hardcoded.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline u32
+nsGetSupportedSyntheticMsrs(void)
+{
+ /*
+ * All MSRS in the spec version 0.83 including RESET MSR.
+ */
+ return (0xff);
+}
+
+
+/*
+ * static inline u32
+ * nsGetMaxVcpusSupported(void)
+ * Retrieve the maximum vcpus supported.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline u32
+nsGetMaxVcpusSupported(void)
+{
+ return MAX_VIRT_CPUS;
+}
+
+/*
+ * static inline u32
+ * nsGetMaxLcpusSupported(void)
+ * Retrieve the maximum physical cpus supported.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline u32
+nsGetMaxLcpusSupported(void)
+{
+ return NR_CPUS;
+}
+
+
+/*
+ * static inline void
+ * nsReadIcr(u64 *icrContent)
+ * Read the ICR of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsReadIcr(u64 *icrContent)
+{
+ u32 icrLow, icrHigh;
+ u64 retVal;
+
+
+ icrLow = nsXenVector.mmIoHandler->read_handler(current,
+ (vlapic_base_address(vcpu_vlapic(current)) + 0x300), 4);
+ icrHigh = nsXenVector.mmIoHandler->read_handler(current,
+ (vlapic_base_address(vcpu_vlapic(current)) + 0x310), 4);
+ retVal = icrHigh;
+ *icrContent = ((retVal << 32) | icrLow);
+
+}
+
+/*
+ * static inline void
+ * nsReadTpr(u64 *tprContent)
+ * Read the TPR of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsReadTpr(u64 *tprContent)
+{
+ u32 tprLow;
+
+
+ tprLow = nsXenVector.mmIoHandler->read_handler(current,
+ (vlapic_base_address(vcpu_vlapic(current)) + 0x80), 4);
+ *tprContent = (u64)tprLow;
+
+}
+
+/*
+ * static inline void
+ * nsWriteEoi(u64 msrContent)
+ * Write the EOI register of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsWriteEoi(u64 msrContent)
+{
+ u32 eoi = (u32)msrContent;
+
+ nsXenVector.mmIoHandler->write_handler(current,
+ (vlapic_base_address(vcpu_vlapic(current)) + 0xb0), 4, eoi);
+
+}
+
+/*
+ * static inline void
+ * nsWriteIcr(u64 msrContent)
+ * Write the ICR register of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsWriteIcr(u64 msrContent)
+{
+ u32 icrLow, icrHigh;
+ icrLow = (u32)msrContent;
+ icrHigh = (u32)(msrContent >> 32);
+
+ if (icrHigh != 0) {
+ nsXenVector.mmIoHandler->write_handler(current,
+ (vlapic_base_address(vcpu_vlapic(current)) + 0x310), 4,
+ icrHigh);
+ }
+ if (icrLow != 0) {
+ nsXenVector.mmIoHandler->write_handler(current,
+ (vlapic_base_address(vcpu_vlapic(current)) + 0x300), 4,
+ icrLow);
+ }
+
+}
+
+/*
+ * static inline void
+ * nsWriteTpr(u64 msrContent)
+ * Write the TPR register of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsWriteTpr(u64 msrContent)
+{
+ u32 tpr = (u32)msrContent;
+
+
+ nsXenVector.mmIoHandler->write_handler(current,
+ (vlapic_base_address(vcpu_vlapic(current)) + 0x80), 4, tpr);
+
+}
+
+/*
+ * static inline void
+ * nsHypercallPageInitialize(void *hypercallPage, nsPartition_t *curp)
+ * Initialize the hypercall page to support the Novell Shim Hypercalls.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsHypercallPageInitialize(void *hypercallPage, nsPartition_t *curp)
+{
+ char *p;
+
+ if (nsXenVector.hvmFuncTable->guest_x86_mode(current) == 8) {
+ curp->nsLongModeGuest = 1;
+ } else {
+ curp->nsLongModeGuest = 0;
+ }
+
+ memset(hypercallPage, 0, PAGE_SIZE);
+ p = (char *)(hypercallPage) ;
+ *(u8 *)(p + 0) = 0x0f; /* vmcall */
+ *(u8 *)(p + 1) = 0x01;
+ if (nsXenVector.extCpuIsIntel()) {
+ *(u8 *)(p + 2) = 0xc1;
+ } else {
+ *(u8 *)(p + 2) = 0xd9;
+ }
+ *(u8 *)(p + 3) = 0xc3; /* ret */
+}
+
+/*
+ * static inline void
+ * nsInitEventPage(void *siefPage)
+ * Initialize the per-vcpu event page.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsInitEventPage(void *siefPage)
+{
+ memset(siefPage, 0, PAGE_SIZE);
+}
+
+/*
+ * static inline void
+ * nsInitMessagePage(void *siefPage)
+ * Initialize the per-vcpu message page.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsInitMessagePage(void *simPage)
+{
+ memset(simPage, 0, PAGE_SIZE);
+}
+
+
+/*
+ * static inline void
+ * nsProcessMessageQ(nsPartition_t *curp, nsVcpu_t *curVcpu)
+ * Process the message queue.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsProcessMessageQ(nsPartition_t *curp, nsVcpu_t *curVcpu)
+{
+ /*
+ * XXXKYS: we currently do not support queued messages.
+ */
+}
+
+/*
+ * static inline void
+ * nsScheduleTimeOut(nsVcpTimerState_t *timer)
+ * Schedule a timeout based on the specified timer.
+ *
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static inline void
+nsScheduleTimeOut(nsVcpTimerState_t *timer)
+{
+ /*
+ * We maintain the count in the units of 100ns. Furthermore,
+ * this is not relative to NOW() but rather absolute.
+ */
+ nsXenVector.extSetTimer(&timer->vcpuTimer, (timer->count * 100));
+}
+
+/*
+ * static void
+ * nsTimeOutHandler(void *arg)
+ * The timeout handler for Novell Shim/Adaptor.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static void
+nsTimeOutHandler(void *arg)
+{
+ nsVcpTimerState_t *timerData = arg;
+ nsVcpu_t *curVcpu = timerData->thisCpu;
+ int sIntNum;
+ int vector;
+ if (!(curVcpu->nsVcpSControlMsr & 0x9)) {
+ goto nsToPostProcess;
+ }
+ /*
+ * SynIC is enabled; do further processing. Timeouts are posted as
+ * messages; verify if the message page is enabled.
+ */
+ if (!(curVcpu->nsVcpSimpMsr & 0x1)) {
+ goto nsToPostProcess;
+ }
+ sIntNum = (((u32)(timerData->config >> 16)) & 0x0000000f);
+ /*
+ * First post the message and then optionally deal with the
+ * interrupt notification.
+ */
+ if (curVcpu->nsVcpSimPage == NULL) {
+ NS_PANIC("Novell Shim: Sim page not setup\n");
+ }
+ if ((((nsMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageType !=
+ nsMessageTypeNone) {
+ /*
+ * The message slot is not empty just silently return.
+ */
+ goto nsToPostProcess;
+ }
+ /*
+ * The slot is available; post the message.
+ */
+ (((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageType =
+ nsMessageTimerExpired;
+ (((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageSize =
+ sizeof(nsTimerMessage_t);
+ (((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).timerIndex =
+ timerData->timerIndex;
+ (((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).expirationTime =
+ timerData->count;
+ if ((curVcpu->nsVcpSIntMsr[sIntNum] >> 16) &0x1) {
+ /*
+ * The designated sintx register is masked; just return.
+ */
+ goto nsToPostProcess;
+ }
+ vector = ((u32)curVcpu->nsVcpSIntMsr[sIntNum] &0xff);
+
+ /*
+ * Now post the interrupt to the VCPU.
+ * XXXKYS: What is the delivery mode for interrupts delivered here.
+ * Check with Mike?
+ */
+ nsXenVector.extPostInterrupt(current, vector, APIC_DM_FIXED);
+
+ /*
+ * If auto eoi is set; deal with that.
+ */
+ if (((u32)(curVcpu->nsVcpSIntMsr[sIntNum] >> 16)) & 0x1) {
+ nsWriteEoi(0);
+ }
+
+nsToPostProcess:
+ /*
+ * Prior to returning, deal with all the post timeout issues.
+ */
+ if (((u32)(timerData->config)) & 0x00000002) {
+ NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats);
+ nsScheduleTimeOut(timerData);
+ }
+}
+
+/*
+ * static inline void
+ * nsTimerInit(nsVcpu_t *vcpup, int timer)
+ * Initialize the specified timer structure.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline void
+nsTimerInit(nsVcpu_t *vcpup, int timer)
+{
+ vcpup->nsVcpTimers[timer].config = 0;
+ vcpup->nsVcpTimers[timer].count = 0;
+ vcpup->nsVcpTimers[timer].thisCpu = vcpup;
+ vcpup->nsVcpTimers[timer].timerIndex = timer;
+ /*
+ * XXXKYS: if the binding between vcpu and physical processor
+ * changes what is done about pending timeouts?
+ */
+//KYS: Need to migrate timers when the vcpu->physical CPU binding changes.
+ init_timer(&vcpup->nsVcpTimers[timer].vcpuTimer, nsTimeOutHandler,
+ &vcpup->nsVcpTimers[timer], current->processor);
+}
+
+/*
+ * static inline int
+ * nsAccessTimeRefCnt(nsPartition_t *curp, u64 *msrContent)
+ * Read the per-partition time base.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static inline int
+nsAccessTimeRefCnt(nsPartition_t *curp, u64 *msrContent)
+{
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_TIME_REF_CNT)) {
+ /*
+ * The partition does not have the privilege to
+ * read this; return error.
+ */
+ return (0);
+ }
+ *msrContent = nsGetTimeSinceDomainBoot();
+ return (1);
+}
+
+/*
+ * static void
+ * nsDoMigrateTimers(struct vcpu *v)
+ * The binding between this vcpu and the physical cpu has changed; migrate
+ * the timers for this vcpu.
+ *
+ * Calling/Exit State:
+ * The new binding is already in place.
+ */
+
+static void
+nsDoMigrateTimers(struct vcpu *v)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ nsVcpu_t *vcpup;
+ int i;
+ vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+
+ for (i=0; i<4; i++) {
+ nsXenVector.extMigrateTimer(&vcpup->nsVcpTimers[i].vcpuTimer,
+ v->processor);
+ }
+}
+
+/*
+ * static int
+ * nsDoHyperCall(struct cpu_user_regs *pregs)
+ * Intercept for implementing Extension hypercalls.
+ *
+ * Calling/Exit State:
+ * Based on the hypercall; the caller may give up the CPU while
+ * processing the hypercall. No locks should be held on entry and
+ * no locks will be held on return.
+ *
+ *
+ */
+
+static int
+nsDoHyperCall(struct cpu_user_regs *pregs)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ nsVcpu_t *vcpup;
+ int longModeGuest = curp->nsLongModeGuest;
+ unsigned long hypercallMfn;
+ unsigned long gmfn;
+ gmfn = (curp->nsHypercallMsr >> 12);
+
+ hypercallMfn = nsXenVector.extGetMfnFromGva(pregs->eip);
+
+ if (hypercallMfn == curp->nsHypercallMfn) {
+ u64 opcode, input, output, retVal;
+ vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+
+ /*
+ * This is an extension hypercall; process it; but first make
+ * sure that the CPU is in the right state for invoking
+ * the hypercall - protected mode at CPL 0.
+ */
+ if (nsInvalidCpuState()) {
+ nsInjectException(TRAP_gp_fault);
+ retVal = nsBuildHcallRetVal(NS_STATUS_INVALID_VP_STATE,
+ 0);
+ nsSetSysCallRetVal(pregs, longModeGuest, retVal);
+ return (1);
+ }
+ if (longModeGuest) {
+ opcode = pregs->ecx;
+ input = pregs->edx;
+ output = pregs->r8;
+ } else {
+ opcode =
+ ((((u64)pregs->edx) << 32) | ((u64)pregs->eax));
+ input =
+ ((((u64)pregs->ebx) << 32) | ((u64)pregs->ecx));
+ output =
+ ((((u64)pregs->edi) << 32) | ((u64)pregs->esi));
+ }
+ NS_ASSERT(vcpup->nsVcplockDepth == 0);
+ nsHandleHyperCall(opcode, input, output, &retVal);
+ nsSetSysCallRetVal(pregs, longModeGuest, retVal);
+ NS_ASSERT(vcpup->nsVcplockDepth == 0);
+ return (1);
+ }
+ /*
+ * This hypercall page is not the page for extension.
+ */
+ return (0);
+}
+
+/*
+ * static int
+ * nsDomainCreate(struct domain *d)
+ * NS intercept for domain creation.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+
+static int
+nsDomainCreate(struct domain *d)
+{
+ nsPartition_t *nspp;
+ nspp = nsXenVector.extAllocMem(sizeof(nsPartition_t));
+ if (nspp == NULL) {
+ nsDebugPrint("Memory allocation failed\n");
+ return (1);
+ }
+ memset(nspp, 0, sizeof(*nspp));
+ nsLockInit(&nspp->nsLock);
+ /*
+ * Set the partition wide privilege; We can start with no privileges
+ * and progressively turn on fancier hypervisor features.
+ */
+ nsSetPartitionPrivileges(nspp);
+ nsSetPartitionFeatures(nspp);
+ /*
+ * Stash away pointer to our state in the hvm domain structure.
+ */
+ d->arch.hvm_domain.ext_handle = nspp;
+ nsDomainBootTime = nsXenVector.extGetTimeSinceBoot();
+ return (0);
+}
+
+
+
+/*
+ * static void
+ * nsDomainDestroy(struct domain *d)
+ * NS intercept for the domain destruction.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void
+nsDomainDestroy(struct domain *d)
+{
+ int i;
+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle;
+ nsXenVector.extPrintk("NS Domain Being Destroyed\n");
+ NS_ASSERT(curp != NULL);
+ nsXenVector.extPrintk("DUMP STATS\n");
+ nsXenVector.extPrintk("GFS cpucount is %d\n", curp->nsFlushState.cpuCount);
+ if (curp->nsFlushState.currentOwner != NULL) {
+ nsXenVector.extPrintk("GFS owner is %d\n", curp->nsFlushState.currentOwner->vcpu_id);
+ } else {
+ nsXenVector.extPrintk("GFS is free\n");
+ }
+ if (!cpus_empty(curp->nsFlushState.waiters)) {
+ nsXenVector.extPrintk("GFS: waiters not empty\n");
+ } else {
+ nsXenVector.extPrintk("GFS: waiters empty\n");
+ }
+ for (i=0; i < MAX_VIRT_CPUS; i++) {
+ if (d->vcpu[i] != NULL) {
+ nsPrintStats(curp, i);
+ }
+ }
+
+ nsXenVector.extFreeMem(d->arch.hvm_domain.ext_handle);
+ d->arch.hvm_domain.ext_handle = NULL;
+}
+
+/*
+ * static int
+ * nsVcpuInitialize(struct vcpu *v)
+ * NS intercept for vcpu creation.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static int
+nsVcpuInitialize(struct vcpu *v)
+{
+ nsVcpu_t *vcpup;
+ nsPartition_t *curp = v->domain->arch.hvm_domain.ext_handle;
+ int i;
+ vcpup = &curp->nsVcpuState[v->vcpu_id];
+ atomic_inc(&curp->nsNumVcpusActive);
+ if (v->vcpu_id == 0) {
+ vcpup->nsVcpuFlags |= NS_VCPU_BOOT_CPU;
+ }
+ /*
+ * Initialize all the synthetic MSRs corresponding to this VCPU.
+ * Note that all state is set to 0 to begin
+ * with.
+ */
+ vcpup->nsVcpSVersionMsr = 0x00000001;
+ /*
+ * Initialize the synthetic timet structures.
+ */
+ for (i=0; i < 4; i++) {
+ nsTimerInit(vcpup, i);
+ }
+ /*
+ * Setup the input page for handling hypercalls.
+ *
+ */
+ vcpup->nsVcpInputBufferPage =
+ nsXenVector.extAllocDomHeapPage();
+ if (vcpup->nsVcpInputBufferPage == NULL) {
+ nsDebugPrint("Memory allocation failed\n");
+ return (1);
+ }
+ vcpup->nsVcpInputBuffer =
+ nsXenVector.extGetVirtFromPagePtr(vcpup->nsVcpInputBufferPage);
+ if (vcpup->nsVcpInputBuffer == NULL) {
+ nsDebugPrint("Coud not get VA\n");
+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);
+ return (1);
+ }
+ memset(vcpup->nsVcpInputBuffer, 0, PAGE_SIZE);
+ vcpup->nsVcpOutputBufferPage =
+ nsXenVector.extAllocDomHeapPage();
+ if (vcpup->nsVcpOutputBufferPage == NULL) {
+ nsDebugPrint("Memory allocation failed\n");
+#ifdef CONFIG_DOMAIN_PAGE
+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer);
+#endif
+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);
+ return (1);
+ }
+ vcpup->nsVcpOutputBuffer =
+ nsXenVector.extGetVirtFromPagePtr(vcpup->nsVcpOutputBufferPage);
+ if (vcpup->nsVcpOutputBuffer == NULL) {
+ nsDebugPrint("Coud not get VA\n");
+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpOutputBufferPage);
+#ifdef CONFIG_DOMAIN_PAGE
+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer);
+#endif
+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);
+ return (1);
+ }
+ vcpup->nsVcpXenVcpu = v;
+ vcpup->nsVcpFlushRequest = 0;
+
+ return (0);
+}
+
+/*
+ * static void
+ * nsVcpuDestroy(struct vcpu *v)
+ * NS intercept for domain destruction.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+static void
+nsVcpuDestroy(struct vcpu *v)
+{
+ nsVcpu_t *vcpup;
+ nsPartition_t *curp = v->domain->arch.hvm_domain.ext_handle;
+ int i;
+
+ vcpup = &curp->nsVcpuState[v->vcpu_id];
+ atomic_dec(&curp->nsNumVcpusActive);
+ vcpup->nsVcpuFlags &= ~NS_VCPU_UP;
+ /*
+ * Get rid of the pages we have allocated for this VCPU.
+ */
+#ifdef CONFIG_DOMAIN_PAGE
+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpSiefPage);
+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpSimPage);
+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer);
+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpOutputBuffer);
+#endif
+
+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);
+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpOutputBufferPage);
+ /*
+ * Kill the timers
+ */
+ for (i=0; i < 4; i++) {
+ nsXenVector.extKillTimer(&vcpup->nsVcpTimers[i].vcpuTimer);
+ }
+ return;
+}
+
+/*
+ * static int nsVcpuSave(struct domain *d, hvm_domain_context_t *h)
+ * Save per-cpu shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ * None.
+ */
+static int
+nsVcpuSave(struct domain *d, hvm_domain_context_t *h)
+{
+ struct vcpu *v;
+ struct hvm_ns_veridian_cpu ctxt;
+
+ nsVcpu_t *vcpup;
+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle;
+ int i;
+
+ if (curp == NULL) {
+ return 0;
+ }
+ for_each_vcpu(d, v) {
+ vcpup = &curp->nsVcpuState[v->vcpu_id];
+
+ NS_ASSERT(vcpup->nsVcplockDepth == 0);
+ NS_ASSERT(vcpup->nsVcpFlushRequest == 0);
+ NS_ASSERT(vcpup->nsVcpWaitingOnGFS == 0);
+ NS_ASSERT(vcpup->nsVcpFlushPending == 0);
+ NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0);
+ /*
+ * We don't need to save state for a
+ * vcpu that is down; the restore
+ * code will leave it down if there is nothing saved.
+ */
+ if ( test_bit(_VPF_down, &v->pause_flags) )
+ continue;
+ ctxt.control_msr = vcpup->nsVcpSControlMsr;
+ ctxt.version_msr = vcpup->nsVcpSVersionMsr;
+ ctxt.sief_msr = vcpup->nsVcpSIefpMsr;
+ ctxt.simp_msr = vcpup->nsVcpSimpMsr;
+ ctxt.eom_msr = vcpup->nsVcpEomMsr;
+ for (i=0; i < 16; i++)
+ ctxt.int_msr[i] = vcpup->nsVcpSIntMsr[i];
+ for (i=0; i < 4; i++) {
+ ctxt.timers[i].config = vcpup->nsVcpTimers[i].config;
+ /*
+ * Save the count in units of 100ns relative to NOW()
+ * When we restore we will add NOW() to properly
+ * account for the elapsed time when the timer was
+ * active.
+ */
+ if (vcpup->nsVcpTimers[i].count > ((NOW())/100)) {
+ ctxt.timers[i].count =
+ (vcpup->nsVcpTimers[i].count - ((NOW())/100));
+ } else {
+ ctxt.timers[i].count = 0;
+ }
+ }
+ if ( hvm_save_entry(NS_VERIDIAN_CPU,
+ v->vcpu_id, h, &ctxt) != 0 )
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * static int nsVcpuRestore(struct domain *d, hvm_domain_context_t *h)
+ * Restore per-cpu shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ * None.
+ */
+static int
+nsVcpuRestore(struct domain *d, hvm_domain_context_t *h)
+{
+ int vcpuid, i;
+ struct hvm_ns_veridian_cpu ctxt;
+
+ nsVcpu_t *vcpup;
+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle;
+
+ if (curp == NULL) {
+ return 0;
+ }
+ /* Which vcpu is this? */
+ vcpuid = hvm_load_instance(h);
+ vcpup = &curp->nsVcpuState[vcpuid];
+ NS_ASSERT(vcpup != NULL);
+ if ( hvm_load_entry(NS_VERIDIAN_CPU, h, &ctxt) != 0 )
+ return -22;
+
+ vcpup->nsVcpSControlMsr = ctxt.control_msr;
+ vcpup->nsVcpSVersionMsr = ctxt.version_msr;
+
+ nsWriteSxMsr(NS_MSR_SIEFP, curp, vcpup, ctxt.sief_msr);
+ nsWriteSxMsr(NS_MSR_SIMP, curp, vcpup, ctxt.simp_msr);
+
+ vcpup->nsVcpEomMsr = ctxt.eom_msr;
+ for (i=0; i<16; i++)
+ vcpup->nsVcpSIntMsr[i] = ctxt.int_msr[i];
+ for (i=0; i < 4; i++) {
+ vcpup->nsVcpTimers[i].config = ctxt.timers[i].config;
+ vcpup->nsVcpTimers[i].count =
+ (ctxt.timers[i].count + ((NOW())/100));
+ if ((vcpup->nsVcpTimers[i].config | 0x9)) {
+ /*
+ * XXXKYS: Some issues with regards to time
+ * management here:
+ * 1) We will ignore the elapsed wall clock time
+ * when the domain was not running.
+ * 2) Clearly we should account fot the time that
+ * has elapsed when the domain was running with
+ * respect to the timeouts that were scheduled
+ * prior to saving the domain.
+ * We will deal with on the save side.
+ */
+ nsScheduleTimeOut(&vcpup->nsVcpTimers[i]);
+ NS_STATS_COLLECT(NS_TIMEOUTS, &vcpup->nsVcpStats);
+ }
+ }
+
+ vcpup->nsVcpuFlags |= NS_VCPU_UP;
+ return 0;
+}
+
+
+
+/*
+ * static int nsDomSave(struct domain *d, hvm_domain_context_t *h)
+ * Save per-domain shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ * None.
+ */
+
+static int
+nsDomSave(struct domain *d, hvm_domain_context_t *h)
+{
+ struct hvm_ns_veridian_dom ctxt;
+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle;
+
+ if (curp == NULL) {
+ return 0;
+ }
+
+ ctxt.guestid_msr = curp->nsGuestIdMsr;
+ ctxt.hypercall_msr = curp->nsHypercallMsr;
+ ctxt.long_mode = curp->nsLongModeGuest;
+ ctxt.pad0 = 0;
+ return (hvm_save_entry(NS_VERIDIAN_DOM, 0, h, &ctxt));
+}
+
+/*
+ * static int nsDomRestore(struct domain *d, hvm_domain_context_t *h)
+ * Restore per-domain shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ * None.
+ */
+
+static int
+nsDomRestore(struct domain *d, hvm_domain_context_t *h)
+{
+ struct hvm_ns_veridian_dom ctxt;
+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle;
+
+ if (curp == NULL) {
+ return 0;
+ }
+
+ if ( hvm_load_entry(NS_VERIDIAN_DOM, h, &ctxt) != 0 )
+ return -22;
+ curp->nsGuestIdMsr = ctxt.guestid_msr;
+ curp->nsHypercallMsr = ctxt.hypercall_msr;
+ curp->nsLongModeGuest = ctxt.long_mode;
+ curp->nsHypercallMfn =
+ nsXenVector.extGetMfnFromGmfn(d, (ctxt.hypercall_msr >> 12));
+
+ return 0;
+}
+
+HVM_REGISTER_SAVE_RESTORE(NS_VERIDIAN_DOM, nsDomSave, nsDomRestore,
+ 1, HVMSR_PER_DOM);
+
+
+HVM_REGISTER_SAVE_RESTORE(NS_VERIDIAN_CPU, nsVcpuSave , nsVcpuRestore,
+ 1, HVMSR_PER_VCPU);
+
+
+/*
+ * static int
+ * nsPreProcessCpuIdLeaves(unsigned int input, struct cpu_user_regs *regs)
+ *
+ * Preprocess cpuid leaves. Both xen and Veridian use identical cpuid
+ * leaves for getting info from the hypervisor.
+ *
+ * Calling exit state:
+ * None.
+ */
+static int
+nsPreProcessCpuIdLeaves(unsigned int input, struct cpu_user_regs *regs)
+{
+ uint32_t idx;
+ struct domain *d = current->domain;
+ int extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
+
+ if (extid == 1) {
+ /*
+ * Enlightened Windows guest; need to remap and handle
+ * leaves used by PV front-end drivers.
+ */
+ if ((input >= 0x40000000) && (input <= 0x40000005)) {
+ return (0);
+ }
+ /*
+ * PV drivers use cpuid to query the hypervisor for details. On
+ * Windows we will use the following leaves for this:
+ *
+ * 4096: VMM Sinature (corresponds to 0x40000000 on Linux)
+ * 4097: VMM Version (corresponds to 0x40000001 on Linux)
+ * 4098: Hypercall details (corresponds to 0x40000002 on Linux)
+ */
+ if ((input >= 0x40001000) && (input <= 0x40001002)) {
+ idx = (input - 0x40001000);
+ switch (idx) {
+ case 0:
+ regs->eax = 0x40000002; /* Largest leaf */
+ regs->ebx = 0x566e6558;/*Signature 1: "XenV" */
+ regs->ecx = 0x65584d4d; /*Signature 2: "MMXe" */
+ regs->edx = 0x4d4d566e; /*Signature 3: "nVMM"*/
+ break;
+ case 1:
+ regs->eax =
+ (XEN_VERSION << 16) |
+ XEN_SUBVERSION;
+ regs->ebx = 0; /* Reserved */
+ regs->ecx = 0; /* Reserved */
+ regs->edx = 0; /* Reserved */
+ break;
+
+ case 2:
+ regs->eax = 1; /*Number of hypercall-transfer pages*/
+ /*In linux this is 0x40000000 */
+ regs->ebx = 0x40001000; /* MSR base address */
+ regs->ecx = 0; /* Features 1 */
+ regs->edx = 0; /* Features 2 */
+ break;
+ }
+ }
+ return (1);
+ } else {
+ /*
+ * For now this is all other "enlightened guests"
+ */
+ if ((input >= 0x40000000) && (input <= 0x40000002)) {
+ /*
+ * These leaves have already been correctly
+ * processed; just return.
+ */
+ return (1);
+ }
+ return (0);
+ }
+}
+
+/*
+ * static int
+ * nsDoCpuId(unsigned int input, struct cpu_user_regs *regs)
+ * NS intercept for cpuid instruction
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static int
+nsDoCpuId(unsigned int input, struct cpu_user_regs *regs)
+{
+ uint32_t idx;
+
+ /*
+ * hvmloader uses cpuid to set up a hypercall page; we don't want to
+ * intercept calls coming from the bootstrap (bios) code in the HVM
+ * guest; we discriminate based on the instruction pointer.
+ */
+ if (nsCallFromBios(regs)) {
+ /*
+ * We don't intercept this.
+ */
+ return (0);
+ }
+
+ if (input == 0x00000001) {
+ regs->ecx = (regs->ecx | 0x80000000);
+ return (1);
+ }
+
+ if (nsPreProcessCpuIdLeaves(input, regs)) {
+ return (0);
+ }
+ idx = (input - 0x40000000);
+
+ switch (idx) {
+ case 0:
+ /*
+ * 0x40000000: Hypervisor identification.
+ */
+ regs->eax = 0x40000005; /* For now clamp this */
+ regs->ebx = 0x65766f4e; /* "Nove" */
+ regs->ecx = 0x68536c6c; /* "llSh" */
+ regs->edx = 0x76486d69; /* "imHv" */
+ break;
+
+ case 1:
+ /*
+ * 0x40000001: Hypervisor identification.
+ */
+ regs->eax = 0x31237648; /* "Hv#1*/
+ regs->ebx = 0; /* Reserved */
+ regs->ecx = 0; /* Reserved */
+ regs->edx = 0; /* Reserved */
+ break;
+ case 2:
+ /*
+ * 0x40000002: Guest Info
+ */
+ if (nsOsRegistered()) {
+ regs->eax = nsGetGuestMajor();
+ regs->ebx =
+ (nsGetGuestMajor() << 16) | nsGetGuestMinor();
+ regs->ecx = nsGetGuestServicePack();
+ regs->edx =
+ (nsGetGuestServiceBranchInfo() << 24) |
+ nsGetGuestServiceNumber();
+ } else {
+ regs->eax = 0;
+ regs->ebx = 0;
+ regs->ecx = 0;
+ regs->edx = 0;
+ }
+ break;
+ case 3:
+ /*
+ * 0x40000003: Feature identification.
+ */
+ regs->eax = nsGetSupportedSyntheticMsrs();
+ /* We only support AcessSelfPartitionId bit 1 */
+ regs->ebx = 0x2;
+ regs->ecx = 0; /* Reserved */
+ regs->edx = 0; /*No MWAIT (bit 0), No debugging (bit 1)*/
+ break;
+ case 4:
+ /*
+ * 0x40000004: Imlementation recommendations.
+ */
+ regs->eax = nsGetRecommendations();
+ regs->ebx = 0; /* Reserved */
+ regs->ecx = 0; /* Reserved */
+ regs->edx = 0; /* Reserved */
+ break;
+ case 5:
+ /*
+ * 0x40000005: Implementation limits.
+ * Currently we retrieve maximum number of vcpus and
+ * logical processors (hardware threads) supported.
+ */
+ regs->eax = nsGetMaxVcpusSupported();
+ regs->ebx = nsGetMaxLcpusSupported();
+ regs->ecx = 0; /* Reserved */
+ regs->edx = 0; /* Reserved */
+ break;
+
+ default:
+ /*
+ * We don't handle this leaf.
+ */
+ return (0);
+
+ }
+ return (1);
+}
+
+/*
+ * static int
+ * nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs)
+ * NS intercept for reading MSRS.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static int
+nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ unsigned int vcpuIndex = nsGetCurrentVcpuIndex();
+ u64 msrContent = 0;
+ nsVcpu_t *curVcpu = &curp->nsVcpuState[vcpuIndex];
+ int synInt, timer;
+ struct domain *d = current->domain;
+ int extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
+ u64 timerCount;
+
+ /*
+ * hvmloader uses rdmsr; we don't want to
+ * intercept calls coming from the bootstrap (bios) code in the HVM
+ * guest; we descriminate based on the instruction pointer.
+ */
+ if (nsCallFromBios(regs)) {
+ /*
+ * We don't intercept this.
+ */
+ return (0);
+ }
+ if (extid > 1) {
+ /*
+ * For now this is all other "Enlightened" operating systems
+ * other than Longhorn.
+ */
+ if (idx == 0x40000000) {
+ /*
+ * PV driver hypercall setup. Let xen handle this.
+ */
+ return (0);
+ }
+ if (idx == 0x40001000) {
+ idx = 0x40000000;
+ }
+ }
+ switch (idx) {
+ case NS_MSR_GUEST_OS_ID:
+ nsLockAcquire(curVcpu, &curp->nsLock);
+ regs->eax = (u32)(curp->nsGuestIdMsr & 0xFFFFFFFF);
+ regs->edx = (u32)(curp->nsGuestIdMsr >> 32);
+ nsLockRelease(curVcpu, &curp->nsLock);
+ break;
+ case NS_MSR_HYPERCALL:
+ nsLockAcquire(curVcpu, &curp->nsLock);
+ regs->eax = (u32)(curp->nsHypercallMsr & 0xFFFFFFFF);
+ regs->edx = (u32)(curp->nsHypercallMsr >> 32);
+ nsLockRelease(curVcpu, &curp->nsLock);
+ if ((((u32)curp->nsHypercallMsr) & (0x00000001)) != 0) {
+ curVcpu->nsVcpuFlags |= NS_VCPU_UP;
+ }
+ break;
+ case NS_MSR_VP_INDEX:
+ regs->eax = (u32)(vcpuIndex);
+ regs->edx = (u32)(0x0);
+ break;
+ case NS_MSR_ICR:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+ goto msrReadError;
+ }
+ nsReadIcr(&msrContent);
+ NS_STATS_COLLECT(NS_ICR_READ, &curVcpu->nsVcpStats);
+ regs->eax = (u32)(msrContent & 0xFFFFFFFF);
+ regs->edx = (u32)(msrContent >> 32);
+ break;
+ case NS_MSR_TPR:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+ goto msrReadError;
+ }
+ nsReadTpr(&msrContent);
+ NS_STATS_COLLECT(NS_TPR_READ, &curVcpu->nsVcpStats);
+ regs->eax = (u32)(msrContent & 0xFFFFFFFF);
+ regs->edx = (u32)(msrContent >> 32);
+ break;
+ /*
+ * The following synthetic MSRs are implemented in the Novell Shim.
+ */
+ case NS_MSR_SCONTROL:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrReadError;
+ }
+ regs->eax = (u32)(curVcpu->nsVcpSControlMsr & 0xFFFFFFFF);
+ regs->edx = (u32)(curVcpu->nsVcpSControlMsr >> 32);
+ break;
+ case NS_MSR_SVERSION:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrReadError;
+ }
+ regs->eax = (u32)(curVcpu->nsVcpSVersionMsr & 0xFFFFFFFF);
+ regs->edx = (u32)(curVcpu->nsVcpSVersionMsr >> 32);
+ break;
+ case NS_MSR_SIEFP:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrReadError;
+ }
+ regs->eax = (u32)(curVcpu->nsVcpSIefpMsr & 0xFFFFFFFF);
+ regs->edx = (u32)(curVcpu->nsVcpSIefpMsr >> 32);
+ break;
+ case NS_MSR_SIMP:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrReadError;
+ }
+ regs->eax = (u32)(curVcpu->nsVcpSimpMsr & 0xFFFFFFFF);
+ regs->edx = (u32)(curVcpu->nsVcpSimpMsr >> 32);
+ break;
+ case NS_MSR_SINT0:
+ synInt = 0;
+ goto synIntReadProcess;
+ case NS_MSR_SINT1:
+ synInt = 1;
+ goto synIntReadProcess;
+ case NS_MSR_SINT2:
+ synInt = 2;
+ goto synIntReadProcess;
+ case NS_MSR_SINT3:
+ synInt = 3;
+ goto synIntReadProcess;
+ case NS_MSR_SINT4:
+ synInt = 4;
+ goto synIntReadProcess;
+ case NS_MSR_SINT5:
+ synInt = 5;
+ goto synIntReadProcess;
+ case NS_MSR_SINT6:
+ synInt = 6;
+ goto synIntReadProcess;
+ case NS_MSR_SINT7:
+ synInt = 7;
+ goto synIntReadProcess;
+ case NS_MSR_SINT8:
+ synInt = 8;
+ goto synIntReadProcess;
+ case NS_MSR_SINT9:
+ synInt = 9;
+ goto synIntReadProcess;
+ case NS_MSR_SINT10:
+ synInt = 10;
+ goto synIntReadProcess;
+ case NS_MSR_SINT11:
+ synInt = 11;
+ goto synIntReadProcess;
+ case NS_MSR_SINT12:
+ synInt = 12;
+ goto synIntReadProcess;
+ case NS_MSR_SINT13:
+ synInt = 13;
+ goto synIntReadProcess;
+ case NS_MSR_SINT14:
+ synInt = 14;
+ goto synIntReadProcess;
+ case NS_MSR_SINT15:
+ synInt = 15;
+synIntReadProcess:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrReadError;
+ }
+ regs->eax = (u32)(curVcpu->nsVcpSIntMsr[synInt] & 0xFFFFFFFF);
+ regs->edx = (u32)(curVcpu->nsVcpSIntMsr[synInt] >> 32);
+ break;
+
+ case NS_MSR_SEOM:
+ /*
+ * This is a write only register; reads return 0.
+ */
+ regs->eax = 0;
+ regs->edx = 0;
+ break;
+ case NS_MSR_TIME_REF_COUNT:
+ if (!nsAccessTimeRefCnt(curp, &msrContent)) {
+ goto msrReadError;
+ }
+ regs->eax = (u32)(msrContent & 0xFFFFFFFF);
+ regs->edx = (u32)(msrContent >> 32);
+ break;
+ /*
+ * Synthetic timer MSRs.
+ */
+ case NS_MSR_TIMER0_CONFIG:
+ timer = 0;
+ goto processTimerConfigRead;
+ case NS_MSR_TIMER1_CONFIG:
+ timer = 1;
+ goto processTimerConfigRead;
+ case NS_MSR_TIMER2_CONFIG:
+ timer = 2;
+ goto processTimerConfigRead;
+ case NS_MSR_TIMER3_CONFIG:
+ timer = 3;
+processTimerConfigRead:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+ goto msrReadError;
+ }
+ regs->eax =
+ (u32)(curVcpu->nsVcpTimers[timer].config & 0xFFFFFFFF);
+ regs->edx =
+ (u32)(curVcpu->nsVcpTimers[timer].config >> 32);
+ break;
+ case NS_MSR_TIMER0_COUNT:
+ timer = 0;
+ goto processTimerCountRead;
+ case NS_MSR_TIMER1_COUNT:
+ timer = 1;
+ goto processTimerCountRead;
+ case NS_MSR_TIMER2_COUNT:
+ timer = 2;
+ goto processTimerCountRead;
+ case NS_MSR_TIMER3_COUNT:
+ timer = 3;
+processTimerCountRead:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+ goto msrReadError;
+ }
+ timerCount = curVcpu->nsVcpTimers[timer].count;
+ if (timerCount > ((NOW())/100)) {
+ timerCount -= ((NOW())/100);
+ } else {
+ timerCount = 0;
+ }
+ regs->eax =
+ (u32)(timerCount & 0xFFFFFFFF);
+ regs->edx =
+ (u32)(timerCount >> 32);
+ break;
+ case NS_MSR_PVDRV_HCALL:
+ regs->eax = 0;
+ regs->edx = 0;
+ break;
+ case NS_MSR_SYSTEM_RESET:
+ regs->eax = 0;
+ regs->edx = 0;
+ break;
+ default:
+ /*
+ * We did not handle the MSR address specified;
+ * let the caller figure out
+ * What to do.
+ */
+ return (0);
+ }
+ return (1);
+msrReadError:
+ /*
+ * Have to inject #GP fault.
+ */
+ nsInjectException(TRAP_gp_fault);
+ return (1);
+}
+
+/*
+ * static int
+ * nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs)
+ * NS intercept for writing MSRS.
+ *
+ * Calling/Exit State:
+ * None.
+ */
+
+static int
+nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs)
+{
+ nsPartition_t *curp = nsGetCurrentPartition();
+ unsigned int vcpuIndex = nsGetCurrentVcpuIndex();
+ u64 msrContent = 0;
+ nsVcpu_t *curVcpu = &curp->nsVcpuState[vcpuIndex];
+ int synInt, timer;
+ struct domain *d = current->domain;
+ int extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
+
+ /*
+ * hvmloader uses wrmsr; we don't want to
+ * intercept calls coming from the bootstrap (bios) code in the HVM
+ * guest; we descriminate based on the instruction pointer.
+ */
+ if (nsCallFromBios(regs)) {
+ /*
+ * We don't intercept this.
+ */
+ return (0);
+ }
+ msrContent =
+ (u32)regs->eax | ((u64)regs->edx << 32);
+ if (extid > 1) {
+ /*
+ * For now this is all other "Enlightened" operating systems
+ * other than Longhorn.
+ */
+ if (idx == 0x40000000) {
+ /*
+ * PV driver hypercall setup. Let xen handle this.
+ */
+ return (0);
+ }
+ if (idx == 0x40001000) {
+ idx = 0x40000000;
+ }
+ }
+ switch (idx) {
+ case NS_MSR_GUEST_OS_ID:
+ nsWriteGuestIdMsr(curp, curVcpu, msrContent);
+ break;
+ case NS_MSR_HYPERCALL:
+ nsWriteHypercallMsr(curp, curVcpu, msrContent);
+ break;
+
+ case NS_MSR_VP_INDEX:
+ goto msrWriteError;
+
+ case NS_MSR_EOI:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+ goto msrWriteError;
+ }
+ nsWriteEoi(msrContent);
+ NS_STATS_COLLECT(NS_EOI_WRITE, &curVcpu->nsVcpStats);
+ break;
+ case NS_MSR_ICR:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+ goto msrWriteError;
+ }
+ nsWriteIcr(msrContent);
+ NS_STATS_COLLECT(NS_ICR_WRITE, &curVcpu->nsVcpStats);
+ break;
+ case NS_MSR_TPR:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+ goto msrWriteError;
+ }
+ nsWriteTpr(msrContent);
+ NS_STATS_COLLECT(NS_TPR_WRITE, &curVcpu->nsVcpStats);
+ break;
+
+ /*
+ * The following MSRs are synthetic MSRs supported in the Novell Shim.
+ */
+ case NS_MSR_SCONTROL:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrWriteError;
+ }
+ curVcpu->nsVcpSControlMsr = msrContent;
+ break;
+ case NS_MSR_SVERSION:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrWriteError;
+ }
+ /*
+ * This is a read-only MSR; generate #GP
+ */
+ nsInjectException(TRAP_gp_fault);
+ break;
+ case NS_MSR_SIEFP:
+ case NS_MSR_SIMP:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrWriteError;
+ }
+ nsWriteSxMsr(idx, curp, curVcpu, msrContent);
+ break;
+ case NS_MSR_SINT0:
+ synInt = 0;
+ goto synIntWrProcess;
+ case NS_MSR_SINT1:
+ synInt = 1;
+ goto synIntWrProcess;
+ case NS_MSR_SINT2:
+ synInt = 2;
+ goto synIntWrProcess;
+ case NS_MSR_SINT3:
+ synInt = 3;
+ goto synIntWrProcess;
+ case NS_MSR_SINT4:
+ synInt = 4;
+ goto synIntWrProcess;
+ case NS_MSR_SINT5:
+ synInt = 5;
+ goto synIntWrProcess;
+ case NS_MSR_SINT6:
+ synInt = 6;
+ goto synIntWrProcess;
+ case NS_MSR_SINT7:
+ synInt = 7;
+ goto synIntWrProcess;
+ case NS_MSR_SINT8:
+ synInt = 8;
+ goto synIntWrProcess;
+ case NS_MSR_SINT9:
+ synInt = 9;
+ goto synIntWrProcess;
+ case NS_MSR_SINT10:
+ synInt = 10;
+ goto synIntWrProcess;
+ case NS_MSR_SINT11:
+ synInt = 11;
+ goto synIntWrProcess;
+ case NS_MSR_SINT12:
+ synInt = 12;
+ goto synIntWrProcess;
+ case NS_MSR_SINT13:
+ synInt = 13;
+ goto synIntWrProcess;
+ case NS_MSR_SINT14:
+ synInt = 14;
+ goto synIntWrProcess;
+ case NS_MSR_SINT15:
+ synInt = 15;
+synIntWrProcess:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrWriteError;
+ }
+ /*
+ * XXXKYS: We assume that the synInt registers will be
+ * first written before the interrupt generation can occur.
+ * Specifically if SINT is masked all interrupts that may have
+ * been generated will be lost. Also when SINT is disabled;
+ * its effects will be only felt for subsequent interrupts that
+ * may be posted. XXXKYS: CHECK
+ */
+ curVcpu->nsVcpSIntMsr[synInt] = msrContent;
+ break;
+
+ case NS_MSR_SEOM:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+ goto msrWriteError;
+ }
+ curVcpu->nsVcpEomMsr = msrContent;
+ nsProcessMessageQ(curp, curVcpu);
+ break;
+ case NS_MSR_TIME_REF_COUNT:
+ /*
+ * This is a read-only msr.
+ */
+ goto msrWriteError;
+
+ /*
+ * Synthetic timer MSRs.
+ */
+ case NS_MSR_TIMER0_CONFIG:
+ timer = 0;
+ goto processTimerConfig;
+ case NS_MSR_TIMER1_CONFIG:
+ timer = 1;
+ goto processTimerConfig;
+ case NS_MSR_TIMER2_CONFIG:
+ timer = 2;
+ goto processTimerConfig;
+ case NS_MSR_TIMER3_CONFIG:
+ timer = 3;
+processTimerConfig:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+ goto msrWriteError;
+ }
+ /*
+ * Assume that the client is going to write the whole msr.
+ */
+ if (!(msrContent & 0x9)) {
+ /*
+ * We are neither setting Auto Enable or Enable;
+ * silently exit.
+ * Should this be considered to turn off a
+ * timer that may be currently
+ * active; XXXKYS: Check. For now we are
+ * not doing anything here.
+ */
+ break;
+ }
+ if (!(((u32)(msrContent >> 16)) & 0x0000000f)) {
+ /*
+ * sintx is 0; clear the enable bit(s).
+ */
+ msrContent &= ~(0x1);
+ }
+ curVcpu->nsVcpTimers[timer].config = msrContent;
+ /*
+ * XXXKYS: Can any order be assumed here;
+ * should we just act on whatever is in the
+ * count register. For now act as if the count
+ * register is valid and act on it.
+ */
+ if (msrContent & 0x1) {
+ nsScheduleTimeOut(&curVcpu->nsVcpTimers[timer]);
+ NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats);
+ }
+ break;
+ case NS_MSR_TIMER0_COUNT:
+ timer = 0;
+ goto processTimerCount;
+ case NS_MSR_TIMER1_COUNT:
+ timer = 1;
+ goto processTimerCount;
+ case NS_MSR_TIMER2_COUNT:
+ timer = 2;
+ goto processTimerCount;
+ case NS_MSR_TIMER3_COUNT:
+ timer = 3;
+processTimerCount:
+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+ goto msrWriteError;
+ }
+ curVcpu->nsVcpTimers[timer].count =
+ (msrContent + ((NOW())/100));
+ if ((curVcpu->nsVcpTimers[timer].config | 0x9)) {
+ nsScheduleTimeOut(&curVcpu->nsVcpTimers[timer]);
+ NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats);
+ }
+
+ break;
+ case NS_MSR_PVDRV_HCALL:
+ /*
+ * Establish the hypercall page for PV drivers.
+ */
+ nsXenVector.extWrmsrHypervisorRegs(0x40000000, regs->eax,
+ regs->edx);
+ break;
+ case NS_MSR_SYSTEM_RESET:
+ /*
+ * Shutdown the domain/partition.
+ */
+ if (msrContent & 0x1) {
+ domain_shutdown(d, SHUTDOWN_reboot);
+ }
+ break;
+
+ default:
+ /*
+ * We did not handle the MSR address;
+ * let the caller deal with this.
+ */
+ return (0);
+ }
+ return (1);
+msrWriteError:
+ /*
+ * Have to inject #GP fault.
+ */
+ nsInjectException(TRAP_gp_fault);
+ return (1);
+}
[-- Attachment #5: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
2008-02-19 22:11 [PATCH][RFC] Supporting Enlightened Windows 2008 Server Ky Srinivasan
@ 2008-02-20 9:44 ` Keir Fraser
2008-02-20 16:14 ` Ky Srinivasan
0 siblings, 1 reply; 15+ messages in thread
From: Keir Fraser @ 2008-02-20 9:44 UTC (permalink / raw)
To: Ky Srinivasan, xen-devel
On 19/2/08 22:11, "Ky Srinivasan" <ksrinivasan@novell.com> wrote:
> B) Implement an adapter that implements the required Hyper-V functionality.
>
> We have implemented only a subset of Hyper-V functionality that is required
> for enlightened windows 2008 guest today. However, we have the framework in
> place to implement any additional functionality that the windows guests may
> leverage going forward. The framework is extensible and one can easily
> implement OS specific enlightenments.
The appearance is that you hook on every significant emulation point in the
HVM code and implement a shadow hypervisor. Is that an accurate description?
What performance wins do you get from Hyper-V emulation, ignoring the
obvious wins you will get from installing PV I/O drivers? I think this is a
particularly interesting question since it was our impression that the PV
memory-management interfaces, which you appear to jump through hoops to
support, were not of much benefit with Xen's more sophisticated pagetable
algorithm (or with Intel EPT or AMD NPT).
-- Keir
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
2008-02-20 9:44 ` Keir Fraser
@ 2008-02-20 16:14 ` Ky Srinivasan
0 siblings, 0 replies; 15+ messages in thread
From: Ky Srinivasan @ 2008-02-20 16:14 UTC (permalink / raw)
To: Keir Fraser, xen-devel
>>> On Wed, Feb 20, 2008 at 4:44 AM, in message
<C3E1A703.1CB24%Keir.Fraser@cl.cam.ac.uk>, Keir Fraser
<Keir.Fraser@cl.cam.ac.uk> wrote:
> On 19/2/08 22:11, "Ky Srinivasan" <ksrinivasan@novell.com> wrote:
>
>> B) Implement an adapter that implements the required Hyper- V functionality.
>>
>> We have implemented only a subset of Hyper- V functionality that is required
>> for enlightened windows 2008 guest today. However, we have the framework in
>> place to implement any additional functionality that the windows guests may
>> leverage going forward. The framework is extensible and one can easily
>> implement OS specific enlightenments.
>
> The appearance is that you hook on every significant emulation point in the
> HVM code and implement a shadow hypervisor. Is that an accurate description?
My initial goal was to get a framework in place where one could implement any OS specific enhancements without cluttering up the rest of the Xen code. To that end, this code does hook into various emulation points of interest in the hvm code. A secondary goal was to minimize the changes to the base Xen code. Furthermore, I wanted to reuse as much of the base Xen code as possible. So, I would not characterize this as a shadow hypervisor, but rather as an adapter that bridges the semantic gap between the APIs of interest in Hyper-V and Xen.
> What performance wins do you get from Hyper- V emulation, ignoring the
> obvious wins you will get from installing PV I/O drivers? I think this is a
> particularly interesting question since it was our impression that the PV
> memory- management interfaces, which you appear to jump through hoops to
> support, were not of much benefit with Xen's more sophisticated pagetable
> algorithm (or with Intel EPT or AMD NPT).
We have done very minimal performance evaluation to date. On NetBench I have seen a 10% improvement. You are right, TLB flush enlightenments did not make much of a difference. These results are on a uniprocessor configuration. Of the enlightenments, MSR based APIC access made the most difference. Going forward, we will be doing a more comprehensive performance evaluation with the goal of having parity with the MS platform.
Regards,
K. Y
>
> -- Keir
>
>
>
> _______________________________________________
> Xen- devel mailing list
> Xen- devel@lists.xensource.com
> http://lists.xensource.com/xen- devel
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2008-03-07 13:30 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-05 22:15 [PATCH][RFC] Supporting Enlightened Windows 2008 Server Ky Srinivasan
2008-03-05 22:28 ` Daniel P. Berrange
2008-03-05 22:38 ` Daniel P. Berrange
2008-03-07 1:06 ` Ky Srinivasan
2008-03-07 1:05 ` Ky Srinivasan
2008-03-06 7:28 ` Keir Fraser
2008-03-06 10:15 ` Tim Deegan
2008-03-07 1:10 ` [PATCH][RFC] Supporting Enlightened Windows 2008Server Ky Srinivasan
2008-03-07 11:57 ` Tim Deegan
2008-03-07 13:19 ` Keir Fraser
2008-03-07 13:30 ` Keir Fraser
2008-03-07 1:08 ` Ky Srinivasan
-- strict thread matches above, loose matches on Subject: below --
2008-02-19 22:11 [PATCH][RFC] Supporting Enlightened Windows 2008 Server Ky Srinivasan
2008-02-20 9:44 ` Keir Fraser
2008-02-20 16:14 ` Ky Srinivasan
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.