Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server

All of lore.kernel.org
 help / color / mirror / Atom feed

* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
@ 2008-03-05 22:15 Ky Srinivasan
  2008-03-05 22:28 ` Daniel P. Berrange
  2008-03-06  7:28 ` Keir Fraser
  0 siblings, 2 replies; 15+ messages in thread
From: Ky Srinivasan @ 2008-03-05 22:15 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 379 bytes --]

I am attaching updated versions of the patches that I posted a couple of weeks ago. These  have been merged up to the current unstable tip: changeset 17186:854b0704962b

These patches have been tested on the unstable tip.

Kier, what are your thoughts on accepting these patches.

Signed-off by K. Y. Srinivasan (ksrinivasan@novell.com)

Regards,

K. Y 








[-- Attachment #2: ns_tools.patch --]
[-- Type: text/plain, Size: 4957 bytes --]

Index: xen-unstable.hg/tools/python/xen/lowlevel/xc/xc.c
===================================================================
--- xen-unstable.hg.orig/tools/python/xen/lowlevel/xc/xc.c
+++ xen-unstable.hg/tools/python/xen/lowlevel/xc/xc.c
@@ -622,14 +622,14 @@ static PyObject *pyxc_hvm_build(XcObject
     int i;
 #endif
     char *image;
-    int memsize, vcpus = 1, acpi = 0, apic = 1;
+    int memsize, vcpus = 1, acpi = 0, apic = 1, extid = 0;
 
     static char *kwd_list[] = { "domid",
-				"memsize", "image", "vcpus", "acpi",
+				"memsize", "image", "vcpus", "extid", "acpi",
 				"apic", NULL };
-    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iii", kwd_list,
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiii", kwd_list,
                                       &dom, &memsize,
-                                      &image, &vcpus, &acpi, &apic) )
+                                      &image, &vcpus, &extid, &acpi, &apic) )
         return NULL;
 
     if ( xc_hvm_build(self->xc_handle, dom, memsize, image) != 0 )
@@ -654,6 +654,7 @@ static PyObject *pyxc_hvm_build(XcObject
     va_hvm->checksum = -sum;
     munmap(va_map, XC_PAGE_SIZE);
 #endif
+    xc_set_hvm_param(self->xc_handle, dom, HVM_PARAM_EXTEND_HYPERVISOR, extid);
 
     return Py_BuildValue("{}");
 }
Index: xen-unstable.hg/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-unstable.hg.orig/tools/python/xen/xend/XendConfig.py
+++ xen-unstable.hg/tools/python/xen/xend/XendConfig.py
@@ -126,7 +126,7 @@ LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(
 # Platform configuration keys.
 XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'loader', 'display', 
                         'fda', 'fdb', 'keymap', 'isa', 'localtime', 'monitor', 
-                        'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
+                        'nographic', 'pae', 'extid', 'rtc_timeoffset', 'serial', 'sdl',
                         'soundhw','stdvga', 'usb', 'usbdevice', 'hpet', 'vnc',
                         'vncconsole', 'vncdisplay', 'vnclisten', 'timer_mode',
                         'vncpasswd', 'vncunused', 'xauthority', 'pci', 'vhpt',
Index: xen-unstable.hg/tools/python/xen/xend/image.py
===================================================================
--- xen-unstable.hg.orig/tools/python/xen/xend/image.py
+++ xen-unstable.hg/tools/python/xen/xend/image.py
@@ -504,6 +504,7 @@ class HVMImageHandler(ImageHandler):
 
         self.apic = int(vmConfig['platform'].get('apic', 0))
         self.acpi = int(vmConfig['platform'].get('acpi', 0))
+	self.extid  = int(vmConfig['platform'].get('extid',  0))
         self.guest_os_type = vmConfig['platform'].get('guest_os_type')
 
     # Return a list of cmd line args to the device models based on the
@@ -602,6 +603,7 @@ class HVMImageHandler(ImageHandler):
         log.debug("store_evtchn   = %d", store_evtchn)
         log.debug("memsize        = %d", mem_mb)
         log.debug("vcpus          = %d", self.vm.getVCpuCount())
+	log.debug("extid          = %d", self.extid)
         log.debug("acpi           = %d", self.acpi)
         log.debug("apic           = %d", self.apic)
 
@@ -609,6 +611,7 @@ class HVMImageHandler(ImageHandler):
                           image          = self.loader,
                           memsize        = mem_mb,
                           vcpus          = self.vm.getVCpuCount(),
+			  extid          = self.extid,
                           acpi           = self.acpi,
                           apic           = self.apic)
         rc['notes'] = { 'SUSPEND_CANCEL': 1 }
Index: xen-unstable.hg/tools/python/xen/xm/create.py
===================================================================
--- xen-unstable.hg.orig/tools/python/xen/xm/create.py
+++ xen-unstable.hg/tools/python/xen/xm/create.py
@@ -207,6 +207,10 @@ gopts.var('timer_mode', val='TIMER_MODE'
           use="""Timer mode (0=delay virtual time when ticks are missed;
           1=virtual time is always wallclock time.""")
 
+gopts.var('extid', val='EXTID',
+          fn=set_int, default=0,
+          use="Specify extention ID for a HVM domain.")
+
 gopts.var('acpi', val='ACPI',
           fn=set_int, default=1,
           use="Disable or enable ACPI of HVM domain.")
@@ -740,7 +744,7 @@ def configure_vifs(config_devs, vals):
 def configure_hvm(config_image, vals):
     """Create the config for HVM devices.
     """
-    args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb', 'timer_mode',
+    args = [ 'device_model', 'pae', 'extid', 'vcpus', 'boot', 'fda', 'fdb', 'timer_mode',
              'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw',
              'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
              'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor',

[-- Attachment #3: ns_xen_base.patch --]
[-- Type: text/plain, Size: 12589 bytes --]

%patch
Index: xen-unstable.hg/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/hvm/svm/svm.c	2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/hvm/svm/svm.c	2008-03-03 14:03:00.000000000 -0500
@@ -50,6 +50,7 @@
 #include <asm/hvm/vpt.h>
 #include <asm/hvm/trace.h>
 #include <asm/hap.h>
+#include <asm/hvm/hvm_extensions.h>
 
 u32 svm_feature_flags;
 
@@ -941,12 +942,13 @@
 
 static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs)
 {
-    unsigned int eax, ebx, ecx, edx, inst_len;
+    unsigned int input, eax, ebx, ecx, edx, inst_len;
 
     eax = regs->eax;
     ebx = regs->ebx;
     ecx = regs->ecx;
     edx = regs->edx;
+    input = eax;
 
     svm_cpuid_intercept(&eax, &ebx, &ecx, &edx);
 
@@ -955,6 +957,7 @@
     regs->ecx = ecx;
     regs->edx = edx;
 
+    ext_intercept_do_cpuid(input, regs);
     inst_len = __get_instruction_length(current, INSTR_CPUID, NULL);
     __update_guest_eip(regs, inst_len);
 }
@@ -972,6 +975,10 @@
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
 
+    if (ext_intercept_do_msr_read(ecx, regs))
+    {
+            goto done;
+    }
     switch ( ecx )
     {
     case MSR_IA32_TSC:
@@ -1073,6 +1080,10 @@
     msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
 
     hvmtrace_msr_write(v, ecx, msr_content);
+    if (ext_intercept_do_msr_write(ecx, regs))
+    {
+            goto done_msr_write;
+    }
 
     switch ( ecx )
     {
@@ -1129,7 +1140,7 @@
         }
         break;
     }
-
+done_msr_write:
     return X86EMUL_OKAY;
 
  gpf:
Index: xen-unstable.hg/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/hvm/vmx/vmx.c	2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/hvm/vmx/vmx.c	2008-03-04 18:37:57.000000000 -0500
@@ -49,6 +49,7 @@
 #include <asm/hvm/vpt.h>
 #include <public/hvm/save.h>
 #include <asm/hvm/trace.h>
+#include <asm/hvm/hvm_extensions.h>
 
 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
 
@@ -1169,12 +1170,13 @@
 
 static void vmx_do_cpuid(struct cpu_user_regs *regs)
 {
-    unsigned int eax, ebx, ecx, edx;
+    unsigned int eax, ebx, ecx, edx, input;
 
     eax = regs->eax;
     ebx = regs->ebx;
     ecx = regs->ecx;
     edx = regs->edx;
+    input = eax;
 
     vmx_cpuid_intercept(&eax, &ebx, &ecx, &edx);
 
@@ -1182,6 +1184,7 @@
     regs->ebx = ebx;
     regs->ecx = ecx;
     regs->edx = edx;
+    ext_intercept_do_cpuid(input, regs);
 }
 
 #define CASE_GET_REG_P(REG, reg)    \
@@ -1468,6 +1471,9 @@
 
     HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
 
+    if (ext_intercept_do_msr_read(ecx, regs))
+        goto done;
+
     switch ( ecx )
     {
     case MSR_IA32_TSC:
@@ -1659,6 +1665,9 @@
     HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x",
                 ecx, (u32)regs->eax, (u32)regs->edx);
 
+    if (ext_intercept_do_msr_write(ecx, regs)) 
+        return X86EMUL_OKAY;
+
     msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
 
     hvmtrace_msr_write(v, ecx, msr_content);
Index: xen-unstable.hg/xen/include/asm-x86/hvm/domain.h
===================================================================
--- xen-unstable.hg.orig/xen/include/asm-x86/hvm/domain.h	2008-03-03 11:32:36.000000000 -0500
+++ xen-unstable.hg/xen/include/asm-x86/hvm/domain.h	2008-03-03 14:03:00.000000000 -0500
@@ -79,6 +79,8 @@
 #endif
     bool_t                 hap_enabled;
     bool_t                 qemu_mapcache_invalidate;
+    void                   *ext_handle; /* will be NULL on creation (memset)*/
+    struct extension_intercept_vector   *ext_vector;
 };
 
 #endif /* __ASM_X86_HVM_DOMAIN_H__ */
Index: xen-unstable.hg/xen/include/public/hvm/params.h
===================================================================
--- xen-unstable.hg.orig/xen/include/public/hvm/params.h	2008-03-03 11:32:39.000000000 -0500
+++ xen-unstable.hg/xen/include/public/hvm/params.h	2008-03-03 14:03:00.000000000 -0500
@@ -50,10 +50,12 @@
 
 #define HVM_PARAM_BUFIOREQ_PFN 6
 
+#define HVM_PARAM_EXTEND_HYPERVISOR 7
+
 #ifdef __ia64__
-#define HVM_PARAM_NVRAM_FD     7
-#define HVM_PARAM_VHPT_SIZE    8
-#define HVM_PARAM_BUFPIOREQ_PFN	9
+#define HVM_PARAM_NVRAM_FD     8 
+#define HVM_PARAM_VHPT_SIZE    9 
+#define HVM_PARAM_BUFPIOREQ_PFN 10	
 #endif
 
 /*
@@ -75,15 +77,16 @@
  *   Missed interrupts are collapsed together and delivered as one 'late tick'.
  *   Guest time always tracks wallclock (i.e., real) time.
  */
-#define HVM_PARAM_TIMER_MODE   10
+//KYS Check the modifications done to this file
+#define HVM_PARAM_TIMER_MODE   11
 #define HVMPTM_delay_for_missed_ticks    0
 #define HVMPTM_no_delay_for_missed_ticks 1
 #define HVMPTM_no_missed_ticks_pending   2
 #define HVMPTM_one_missed_tick_pending   3
 
 /* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
-#define HVM_PARAM_HPET_ENABLED 11
+#define HVM_PARAM_HPET_ENABLED 12
 
-#define HVM_NR_PARAMS          12
+#define HVM_NR_PARAMS          13
 
 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
Index: xen-unstable.hg/xen/arch/x86/hvm/Makefile
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/hvm/Makefile	2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/hvm/Makefile	2008-03-03 14:03:00.000000000 -0500
@@ -1,5 +1,6 @@
 subdir-y += svm
 subdir-y += vmx
+subdir-y += hvm_ext 
 
 obj-y += emulate.o
 obj-y += hvm.o
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/hvm/hvm.c	2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm.c	2008-03-03 14:03:00.000000000 -0500
@@ -43,6 +43,7 @@
 #include <asm/mc146818rtc.h>
 #include <asm/spinlock.h>
 #include <asm/hvm/hvm.h>
+#include <asm/hvm/hvm_extensions.h>
 #include <asm/hvm/vpt.h>
 #include <asm/hvm/support.h>
 #include <asm/hvm/cacheattr.h>
@@ -119,6 +120,7 @@
     rtc_migrate_timers(v);
     hpet_migrate_timers(v);
     pt_migrate(v);
+    ext_intercept_do_migrate_timers(v);
 }
 
 void hvm_do_resume(struct vcpu *v)
@@ -279,6 +281,7 @@
 
 void hvm_domain_destroy(struct domain *d)
 {
+    ext_intercept_domain_destroy(d);
     hvm_funcs.domain_destroy(d);
     vioapic_deinit(d);
     hvm_destroy_cacheattr_region_list(d);
@@ -562,8 +565,14 @@
 {
     int rc;
 
+    if ((rc = ext_intercept_vcpu_initialize(v)) != 0)
+        goto fail1;
+
     if ( (rc = vlapic_init(v)) != 0 )
+    {
+        ext_intercept_vcpu_destroy(v);
         goto fail1;
+    }
 
     if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
         goto fail2;
@@ -611,12 +620,14 @@
     hvm_funcs.vcpu_destroy(v);
  fail2:
     vlapic_destroy(v);
+    ext_intercept_vcpu_destroy(v);
  fail1:
     return rc;
 }
 
 void hvm_vcpu_destroy(struct vcpu *v)
 {
+    ext_intercept_vcpu_destroy(v);
     vlapic_destroy(v);
     hvm_funcs.vcpu_destroy(v);
 
@@ -1787,6 +1798,10 @@
     case 0:
         break;
     }
+    if (ext_intercept_do_hypercall(regs)) 
+    {
+        return HVM_HCALL_completed;
+    }
 
     if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
     {
@@ -1928,6 +1943,7 @@
         vcpu_wake(v);
 
     gdprintk(XENLOG_INFO, "AP %d bringup succeeded.\n", vcpuid);
+    ext_intercept_vcpu_up(v);
     return 0;
 }
 
@@ -2171,6 +2187,9 @@
                 if ( a.value > HVMPTM_one_missed_tick_pending )
                     goto param_fail;
                 break;
+            case HVM_PARAM_EXTEND_HYPERVISOR:
+                if (hvm_ext_bind(d, (int)a.value)) 
+                    goto param_fail;
             }
             d->arch.hvm_domain.params[a.index] = a.value;
             rc = 0;
Index: xen-unstable.hg/xen/arch/x86/x86_64/asm-offsets.c
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/x86_64/asm-offsets.c	2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/x86_64/asm-offsets.c	2008-03-03 14:03:00.000000000 -0500
@@ -146,4 +146,7 @@
     BLANK();
 
     OFFSET(CPUINFO_ext_features, struct cpuinfo_x86, x86_capability[1]);
+    BLANK();
+
+    OFFSET(DOM_ext_vector, struct domain, arch.hvm_domain.ext_vector);
 }
Index: xen-unstable.hg/xen/arch/x86/hvm/vmx/x86_64/exits.S
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/hvm/vmx/x86_64/exits.S	2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/hvm/vmx/x86_64/exits.S	2008-03-03 14:03:00.000000000 -0500
@@ -112,6 +112,14 @@
         ALIGN
 ENTRY(vmx_asm_do_vmentry)
         GET_CURRENT(%rbx)
+        mov  VCPU_domain(%rbx),%rax
+        mov  DOM_ext_vector(%rax),%rdx
+        test  %rdx,%rdx
+        je    vmx_no_ext_vector
+        sti
+        callq *(%rdx)
+vmx_no_ext_vector:
+
         cli                             # tests must not race interrupts
 
         movl  VCPU_processor(%rbx),%eax
Index: xen-unstable.hg/xen/arch/x86/hvm/svm/x86_64/exits.S
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/hvm/svm/x86_64/exits.S	2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/hvm/svm/x86_64/exits.S	2008-03-03 14:03:00.000000000 -0500
@@ -37,6 +37,14 @@
 
 ENTRY(svm_asm_do_resume)
         GET_CURRENT(%rbx)
+        mov  VCPU_domain(%rbx),%rax
+        mov  DOM_ext_vector(%rax),%rdx
+        test  %rdx,%rdx
+        je    svm_no_ext_vector
+        sti
+        callq *(%rdx)
+svm_no_ext_vector:
+
         CLGI
 
         movl VCPU_processor(%rbx),%eax
Index: xen-unstable.hg/xen/arch/x86/hvm/save.c
===================================================================
--- xen-unstable.hg.orig/xen/arch/x86/hvm/save.c	2008-03-03 11:32:34.000000000 -0500
+++ xen-unstable.hg/xen/arch/x86/hvm/save.c	2008-03-03 14:03:00.000000000 -0500
@@ -23,6 +23,8 @@
 
 #include <asm/hvm/support.h>
 #include <public/hvm/save.h>
+#include <public/hvm/params.h>
+#include <asm/hvm/hvm_extensions.h>
 
 void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr)
 {
@@ -31,8 +33,7 @@
     /* Save some CPUID bits */
     cpuid(1, &eax, &ebx, &ecx, &edx);
     hdr->cpuid = eax;
-
-    hdr->pad0 = 0;
+    hdr->ext_id = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
 }
 
 int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
@@ -61,6 +62,9 @@
 
     /* VGA state is not saved/restored, so we nobble the cache. */
     d->arch.hvm_domain.stdvga.cache = 0;
+    d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] = hdr->ext_id;
+    if (hvm_ext_bind(d, hdr->ext_id))
+        return -1;
 
     return 0;
 }
Index: xen-unstable.hg/xen/include/public/arch-x86/hvm/save.h
===================================================================
--- xen-unstable.hg.orig/xen/include/public/arch-x86/hvm/save.h	2008-03-03 11:32:39.000000000 -0500
+++ xen-unstable.hg/xen/include/public/arch-x86/hvm/save.h	2008-03-03 14:03:00.000000000 -0500
@@ -38,7 +38,7 @@
     uint32_t version;           /* File format version */
     uint64_t changeset;         /* Version of Xen that saved this file */
     uint32_t cpuid;             /* CPUID[0x01][%eax] on the saving machine */
-    uint32_t pad0;
+    uint32_t ext_id;		/* extension ID */
 };
 
 DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header);
@@ -422,9 +422,30 @@
 
 DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr);
 
+struct hvm_ns_veridian_dom {
+    uint64_t guestid_msr;
+    uint64_t hypercall_msr;
+    uint32_t long_mode;
+    uint32_t pad0;
+};
+DECLARE_HVM_SAVE_TYPE(NS_VERIDIAN_DOM, 15, struct hvm_ns_veridian_dom);
+
+struct hvm_ns_veridian_cpu {
+    uint64_t control_msr;
+    uint64_t version_msr;
+    uint64_t sief_msr;
+    uint64_t simp_msr;
+    uint64_t eom_msr;
+    uint64_t int_msr[16];
+    struct {
+        uint64_t config;
+        uint64_t count;
+    } timers[4];
+};
+DECLARE_HVM_SAVE_TYPE(NS_VERIDIAN_CPU, 16, struct hvm_ns_veridian_cpu);
 /* 
  * Largest type-code in use
  */
-#define HVM_SAVE_CODE_MAX 14
+#define HVM_SAVE_CODE_MAX 16
 
 #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */

[-- Attachment #4: ns_xen_extension.patch --]
[-- Type: text/plain, Size: 126416 bytes --]

%patch
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/Makefile
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/Makefile	2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,3 @@
+subdir-y += novell 
+
+obj-y += hvm_ext.o
Index: xen-unstable.hg/xen/include/asm-x86/hvm/hvm_extensions.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/include/asm-x86/hvm/hvm_extensions.h	2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,252 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * hvm_extensions.h  
+ * This file implements a framework for extending the hypervisor 
+ * functionality in a modular fashion. The framework is comprised of 
+ * two components: A) A set of intercepts that will allow the extension 
+ * module to implement its functionality by intercepting the corresponding
+ * code paths in Xen and B) A controlled runtime for the extension module.
+ * Initially the goal was to pacakage the extension  module as a boot-time
+ * loadable module. This may not be the way we wend up packaging it. 
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef HVM_EXTENSION_H
+#define HVM_EXTENSION_H 
+
+#include <xen/sched.h>
+#include <asm/domain.h>
+#include <xen/timer.h>
+#include <xen/time.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/domain.h>
+
+
+			
+/*
+ * Hypervisor extension hooks.
+ */
+typedef struct extension_intercept_vector {
+	/* Do not move the first field (do_continuation). Offset 
+	 * hardcoded in assembly files exits.S (VMX and SVM).
+	 */
+	void (*do_continuation)(void);
+	int (*domain_create)(struct domain *d);
+	void (*domain_destroy)(struct domain *d);
+	int (*vcpu_initialize)(struct vcpu *v);
+	void (*vcpu_destroy)(struct vcpu *v);
+	int (*do_cpuid)(uint32_t idx, struct cpu_user_regs *regs);
+	int (*do_msr_read)(uint32_t idx, struct cpu_user_regs *regs);
+	int (*do_msr_write)(uint32_t idx, struct cpu_user_regs *regs);
+	int (*do_hypercall)(struct cpu_user_regs *pregs);
+	void (*do_migrate_timers)(struct vcpu *v);
+	void (*vcpu_up)(struct vcpu *v);
+} extension_intercept_vector_t;
+
+static inline int
+ext_intercept_domain_create(struct domain *d)
+{
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.ext_vector->domain_create(d));
+	}
+	return (0);
+}
+
+static inline void 
+ext_intercept_domain_destroy(struct domain *d)
+{
+	if (d->arch.hvm_domain.ext_vector) {
+		d->arch.hvm_domain.ext_vector->domain_destroy(d);
+	}
+}
+
+static inline int
+ext_intercept_vcpu_initialize(struct vcpu *v)
+{
+	struct domain *d = v->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.ext_vector->vcpu_initialize(v));
+	}
+	return (0);
+}
+
+
+static inline void 
+ext_intercept_vcpu_up(struct vcpu *v)
+{
+	struct domain *d = current->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.
+		ext_vector->vcpu_up(v));
+	}
+}
+	
+static inline void 
+ext_intercept_vcpu_destroy(struct vcpu *v)
+{
+	struct domain *d = v->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		d->arch.hvm_domain.ext_vector->vcpu_destroy(v);
+	}
+}
+
+static inline int
+ext_intercept_do_cpuid(uint32_t idx, struct cpu_user_regs *regs)
+{
+	struct domain *d = current->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.ext_vector->do_cpuid(
+								idx, regs));
+	}
+	return (0);
+}
+
+static inline int
+ext_intercept_do_msr_read(uint32_t idx, struct cpu_user_regs *regs)
+{
+	struct domain *d = current->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.
+		ext_vector->do_msr_read(idx, regs));
+	}
+	return (0);
+}
+static inline int
+ext_intercept_do_msr_write(uint32_t idx, struct cpu_user_regs *regs)
+{
+	struct domain *d = current->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.
+		ext_vector->do_msr_write(idx, regs));
+	}
+	return (0);
+}
+
+static inline int
+ext_intercept_do_hypercall(struct cpu_user_regs *regs)
+{
+	struct domain *d = current->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.
+		ext_vector->do_hypercall(regs));
+	}
+	return (0);
+}
+
+static inline void 
+ext_intercept_do_migrate_timers(struct vcpu *v)
+{
+	struct domain *d = current->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.
+		ext_vector->do_migrate_timers(v));
+	}
+}
+
+static inline void 
+ext_intercept_do_continuation(void)
+{
+	struct domain *d = current->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		d->arch.hvm_domain.
+		ext_vector->do_continuation();
+	}
+}
+
+/*
+ * Base hypervisor support available to extension modules.
+ * We may choose to do away with this level of indirection!
+ * It may still be useful to have a controlled environment for the
+ * extension modules.
+ */
+typedef struct xen_call_vector {
+	/*
+	 * We may want to embed version/compiler info here to avoid mismatches
+	 */
+	struct hvm_function_table *hvmFuncTable;
+	struct hvm_mmio_handler	*mmIoHandler;
+	void (*extPanic)(const char *s, ...);
+	void (*extPrintk)(const char *format, ...);
+	void (*extPostInterrupt)(struct vcpu *v, int vector, int type);
+	void (*extSetTimer)(struct timer *timer, s_time_t expires);
+	s_time_t  (*extGetTimeSinceBoot)(void);
+	void * (*extGetVirtFromGmfn)(struct domain *d, unsigned long gmfn);
+	unsigned long (*extGetMfnFromGmfn)(struct domain *d, unsigned long gmfn); 
+	unsigned long (*extGetMfnFromGva)(unsigned long va); 
+	void (*extUnmapDomainPage)(void *p);
+	void *(*extAllocMem)(size_t size);
+	void (*extFreeMem)(void *ptr);
+	enum hvm_copy_result (*extCopyToGuestPhysical)(paddr_t paddr, void *buf, int size);
+	enum hvm_copy_result (*extCopyFromGuestPhysical)(void *buf, paddr_t paddr, int size);
+	void *(*extAllocDomHeapPage)(void);
+	void (*extFreeDomHeapPage)(void *);
+	void * (*extGetVirtFromPagePtr)(void *);
+	void (*extVcpuPause)(struct vcpu *v);
+	void (*extVcpuUnPause)(struct vcpu *v);
+	void (*extArchGetDomainInfoCtxt)(struct vcpu *v, 
+		struct vcpu_guest_context *);
+	int (*extArchSetDomainInfoCtxt)(struct vcpu *v, 
+		struct vcpu_guest_context *);
+	int (*extCpuIsIntel)(void );
+	int (*extWrmsrHypervisorRegs)(uint32_t idx, uint32_t eax, 
+					uint32_t edx);
+	void (*extKillTimer)(struct timer *timer);
+	void (*extMigrateTimer)(struct timer *timer, unsigned int new_cpu);
+} xen_call_vector_t;
+
+#define MAX_EXTENSION_ID	1
+
+/*
+ * int hvm_ext_bind(struct domain *d, int ext_id)
+ * Bind the specified domain to the specified extension module.
+ *
+ * Calling/Exit State:
+ *	None.
+ *
+ * Remarks:
+ *	The goal is to support per-domain extension modules. Domain
+ * 	creating tools will have to specify the needed extension 
+ * 	module ID. For now it is hard coded.
+ */ 
+int hvm_ext_bind(struct domain *d, int ext_id);
+
+/*
+ * int hvm_ext_register(int ext_id, 
+ *			struct extension_intercept_vector *ext_vector,
+ *			struct xen_call_vector *xen_vector) 
+ * Register the extension module with the hypervisor
+ * Calling/Exit State:
+ *	None.
+ */
+
+int hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector,
+			struct xen_call_vector *xen_vector); 
+
+
+#endif
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/hvm_ext.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/hvm_ext.c	2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,350 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * hvm_ext.c 
+ * Glue code for implementing the extension module. 
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#include <asm/hvm/hvm_extensions.h>
+#include <xen/lib.h>
+#include <asm/event.h>
+#include <asm/shadow.h>
+#include <asm/hvm/support.h>
+#include <xen/domain_page.h>
+#include <xen/domain.h>
+#include <xen/mm.h>
+#include <xen/event.h>
+#include <xen/sched.h>
+
+
+struct extension_intercept_vector *intercept_vector;
+
+/*
+ * static void
+ * hvm_ext_inject_interrupt(struct vcpu *v, int vector, int type)
+ * Inject the specified exception to the specified virtual cpu.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void
+hvm_ext_inject_interrupt(struct vcpu *v, int vector, int type)
+{
+	struct vlapic *vlapic = vcpu_vlapic(v);
+
+	/*
+	 * XXXKYS: Check the trigger mode.
+	 */
+	if (vlapic_set_irq(vlapic, vector, 1)) {
+		vcpu_kick(v);
+	}
+}
+
+/*
+ * static void
+ * hvm_ext_set_timer(struct timer *timer, s_time_t expires)
+ * Set a timeout.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void
+hvm_ext_set_timer(struct timer *timer, s_time_t expires)
+{
+	set_timer(timer, expires);
+}
+
+/*
+ * static void
+ * hvm_ext_kill_timer(struct timer *timer)
+ * Kill the specified timer.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void
+hvm_ext_kill_timer(struct timer *timer)
+{
+	kill_timer(timer);
+}
+
+/*
+ * static void
+ * hvm_ext_migrate_timer(struct timer *timer, unsigned int new_cpu)
+ * Migrate the  timer to the new cpu.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void
+hvm_ext_migrate_timer(struct timer *timer, unsigned int new_cpu)
+{
+	migrate_timer(timer, new_cpu);
+}
+
+
+/*
+ * static void *
+ * hvm_ext_get_virt_from_gmfn(struct domain *d, unsigned long gmfn)
+ * Given a guest frame number return a virtual address at which 
+ * the specified page can be accessed in the hypervisor.
+ *
+ * Calling/Exit State:
+ * 	None.
+ */
+static void *
+hvm_ext_get_virt_from_gmfn(struct domain *d, unsigned long gmfn)
+{
+	unsigned long mfn = gmfn_to_mfn(d, gmfn);
+	if (mfn == INVALID_MFN) {
+		return (NULL);
+	}
+	return (map_domain_page_global(mfn));
+}
+
+/*
+ * static unsigned long
+ * hvm_ext_get_mfn_from_gmfn(struct domain *d, unsigned long gmfn)
+ * Get the machine frame number given the guest frame number.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static unsigned long
+hvm_ext_get_mfn_from_gmfn(struct domain *d, unsigned long gmfn)
+{
+	return (gmfn_to_mfn(d, gmfn));
+}
+
+/*
+ * static unsigned long
+ * hvm_ext_get_mfn_from_gva(unsigned long va)
+ * Given the guest virtual address return the machine frame number backing the 
+ * address.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static unsigned long
+hvm_ext_get_mfn_from_gva(unsigned long va)
+{
+	uint32_t pfec = PFEC_page_present;
+	unsigned long gfn;
+	gfn = paging_gva_to_gfn(current, va, &pfec);
+	return (gmfn_to_mfn((current->domain), gfn));
+}
+
+/*
+ * static void *
+ * hvm_ext_alloc_mem(size_t size)
+ * Allocate specified bytes of memory.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void *
+hvm_ext_alloc_mem(size_t size)
+{
+	return (xmalloc_bytes(size));
+}
+
+/*
+ * static void *
+ * hvm_ext_alloc_domheap_page(void)
+ * Allocate a page from the per-domain heap.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void *
+hvm_ext_alloc_domheap_page(void)
+{
+	return (alloc_domheap_page(NULL));
+}
+
+/*
+ * static void
+ * hvm_ext_free_domheap_page(void *p)
+ * Free a dom heap page.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void
+hvm_ext_free_domheap_page(void *p)
+{
+	free_domheap_pages(p, 0);
+}
+
+/*
+ * static void *
+ * hvm_ext_get_virt_from_page_ptr(void *page)
+ * Map the specified page a return a hypervisor VA.
+ *
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void *
+hvm_ext_get_virt_from_page_ptr(void *page)
+{
+	struct page_info *pg = page;
+	unsigned long mfn = page_to_mfn(pg);
+	return (map_domain_page_global(mfn));
+}
+
+extern struct cpuinfo_x86 boot_cpu_data;
+
+/*
+ * static int
+ * hvm_ext_cpu_is_intel(void)
+ * Check if the CPU vendor is Intel.
+ *
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static int
+hvm_ext_cpu_is_intel(void)
+{
+	if (boot_cpu_data.x86_vendor == 0) {
+		return (1);
+	}
+	return (0);
+}
+
+/*
+ * int 
+ * hvm_ext_bind(struct domain *d, int ext_id)
+ * Bind the specified domain with the specified extension module.
+ *
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+int 
+hvm_ext_bind(struct domain *d, int ext_id)
+{
+	int i;
+	/*
+	 * XXXKYS: Assuming that this function will be called before the
+	 * new domain begins to run. It is critical that this be the case.
+	 */
+	if (ext_id == 0) {
+		/*
+		 * This is the default value for this parameter.
+		 */
+		return (0);
+	}
+	d->arch.hvm_domain.ext_vector = intercept_vector; 
+	/*
+	 * Let the extension initialize its state.
+	 */
+	if (intercept_vector->domain_create(d)) {
+		return (1);
+	}
+	for (i=0; i < MAX_VIRT_CPUS; i++) {
+		if (d->vcpu[i] != NULL) {
+			if (intercept_vector->vcpu_initialize(d->vcpu[i])) {
+				int j;
+				for (j= (i-1); j >=0; j--) {
+					intercept_vector->vcpu_destroy(
+					d->vcpu[j]);
+				}	
+				intercept_vector->domain_destroy(d);
+				return (1);
+			}
+		}
+	}
+	return (0);
+}
+
+
+void extPanic(const char *fmt, ...)
+{
+	domain_crash_synchronous();
+}
+
+/*
+ * For now we will support only one extension; id==1!
+ */
+
+extern struct hvm_function_table hvm_funcs;
+extern struct hvm_mmio_handler vlapic_mmio_handler;
+
+/*
+ * int 
+ * hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector,
+ *
+ * Register the invoking extension module with the hypervisor.
+ *
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+int 
+hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector,
+                        struct xen_call_vector *xen_vector)
+{
+	ASSERT(ext_id == 1);
+	intercept_vector = ext_vector;
+	/*
+	 * Populate the vector of services from the xen side; ultimately
+	 * we may decide to get rid of this level of indirection; it may 
+	 * still be useful to limit the breadth of xen dependency here.
+	 */
+	xen_vector->hvmFuncTable = &hvm_funcs;
+	xen_vector->mmIoHandler = &vlapic_mmio_handler;
+	xen_vector->extPanic = extPanic;
+	xen_vector->extPrintk = printk;
+	xen_vector->extPostInterrupt = hvm_ext_inject_interrupt;
+	xen_vector->extSetTimer = hvm_ext_set_timer;
+	xen_vector->extKillTimer = hvm_ext_kill_timer;
+	xen_vector->extMigrateTimer = hvm_ext_migrate_timer;
+	xen_vector->extGetTimeSinceBoot = get_s_time;
+	xen_vector->extGetVirtFromGmfn = hvm_ext_get_virt_from_gmfn;
+	xen_vector->extGetMfnFromGmfn = hvm_ext_get_mfn_from_gmfn;
+ 
+	xen_vector->extGetMfnFromGva = hvm_ext_get_mfn_from_gva;
+#ifdef CONFIG_DOMAIN_PAGE
+	xen_vector->extUnmapDomainPage = unmap_domain_page_global;
+#endif
+	xen_vector->extAllocMem = hvm_ext_alloc_mem;
+	xen_vector->extFreeMem = xfree;
+	xen_vector->extCopyToGuestPhysical = hvm_copy_to_guest_phys;
+	xen_vector->extCopyFromGuestPhysical = hvm_copy_from_guest_phys;
+	xen_vector->extAllocDomHeapPage = hvm_ext_alloc_domheap_page;
+	xen_vector->extFreeDomHeapPage = hvm_ext_free_domheap_page;
+	xen_vector->extGetVirtFromPagePtr = hvm_ext_get_virt_from_page_ptr;
+	xen_vector->extVcpuPause = vcpu_pause;
+	xen_vector->extVcpuUnPause = vcpu_unpause;
+	xen_vector->extArchGetDomainInfoCtxt = arch_get_info_guest; 
+	xen_vector->extArchSetDomainInfoCtxt = arch_set_info_guest; 
+	xen_vector->extCpuIsIntel = hvm_ext_cpu_is_intel;
+	xen_vector->extWrmsrHypervisorRegs = wrmsr_hypervisor_regs; 
+	
+	return 0;
+}
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/Makefile
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/Makefile	2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,2 @@
+obj-y += nsintercept.o
+obj-y += nshypercall.o
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/ns_errno.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/ns_errno.h	2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,62 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * ns_errno.h
+ * Error codes for the  Novell Shim.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef NS_ERRNO_H
+#define NS_ERRNO_H
+
+#define NS_STATUS_SUCCESS			0x0000
+#define NS_STATUS_INVALID_HYPERCALL_CODE	0x0002
+#define NS_STATUS_INVALID_HYPERCALL_INPUT	0x0003
+#define NS_STATUS_INVALID_ALIGNMENT		0x0004
+#define NS_STATUS_INVALID_PARAMETER		0x0005
+#define NS_STATUS_ACCESS_DENIED			0x0006
+#define NS_STATUS_INVALID_PARTITION_STATE	0x0007
+#define NS_STATUS_OPERATION_DENIED		0x0008
+#define NS_STATUS_UNKNOWN_PROPERTY		0x0009
+#define NS_STATUS_PROPERTY_VALUE_OUT_OF_RANGE	0x000A
+#define NS_STATUS_INSUFFICIENT_MEMORY		0x000B
+#define NS_STATUS_PARTITION_TOO_DEEP		0x000C
+#define NS_STATUS_INVALID_PARTITION_ID		0x000D
+#define NS_STATUS_INVALID_VP_INDEX		0x000E
+#define NS_STATUS_UNABLE_TO_RESTORE_STATE	0x000F
+#define NS_STATUS_NOT_FOUND			0x0010
+#define NS_STATUS_INVALID_PORT_ID		0x0011
+#define NS_STATUS_INVALID_CONNECTION_ID		0x0012
+#define NS_STATUS_INSUFFICIENT_BUFFERS		0x0013
+#define NS_STATUS_NOT_ACKNOWLEDGED		0x0014
+#define NS_STATUS_INVALID_VP_STATE		0x0015
+#define NS_STATUS_ACKNOWLEDGED			0x0016
+#define NS_STATUS_INVALID_SAVE_RESTORE_STATE	0x0017
+#define	NS_STATUS_NO_MEMORY_4PAGES		0x0100
+#define	NS_STATUS_NO_MEMORY_16PAGES		0x0101
+#define	NS_STATUS_NO_MEMORY_64PAGES		0x0102
+#define	NS_STATUS_NO_MEMORY_256PAGES		0x0103
+#define	NS_STATUS_NO_MEMORY_1024PAGES		0x0104
+#endif 	
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/ns_shim.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/ns_shim.h	2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,480 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * Novell Shim Implementation.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef NS_SHIM_H
+#define NS_SHIM_H
+
+#include <xen/sched.h>
+#include <xen/types.h>
+#include <xen/timer.h>
+#include <asm/current.h>
+#include <asm/domain.h>
+#include <asm/shadow.h>
+#include <public/xen.h>
+
+#include "nshypercall.h"
+
+/*
+ * Synthetic MSR addresses
+ */
+#define NS_MSR_GUEST_OS_ID	0x40000000
+#define NS_MSR_HYPERCALL	0x40000001
+#define NS_MSR_VP_INDEX		0x40000002
+#define NS_MSR_SYSTEM_RESET	0x40000003
+#define NS_MSR_TIME_REF_COUNT	0x40000020
+#define NS_MSR_EOI		0x40000070
+#define NS_MSR_ICR		0x40000071
+#define NS_MSR_TPR		0x40000072
+
+#define NS_MSR_SCONTROL		0x40000080
+#define NS_MSR_SVERSION		0x40000081
+#define NS_MSR_SIEFP		0x40000082
+#define NS_MSR_SIMP		0x40000083
+#define NS_MSR_SEOM		0x40000084
+#define NS_MSR_SINT0		0x40000090
+#define NS_MSR_SINT1		0x40000091
+#define NS_MSR_SINT2		0x40000092
+#define NS_MSR_SINT3		0x40000093
+#define NS_MSR_SINT4		0x40000094
+#define NS_MSR_SINT5		0x40000095
+#define NS_MSR_SINT6		0x40000096
+#define NS_MSR_SINT7		0x40000097
+#define NS_MSR_SINT8		0x40000098
+#define NS_MSR_SINT9		0x40000099
+#define NS_MSR_SINT10		0x4000009A
+#define NS_MSR_SINT11		0x4000009B
+#define NS_MSR_SINT12		0x4000009C
+#define NS_MSR_SINT13		0x4000009D
+#define NS_MSR_SINT14		0x4000009E
+#define NS_MSR_SINT15		0x4000009F
+
+#define NS_MSR_TIMER0_CONFIG	0x400000B0
+#define NS_MSR_TIMER0_COUNT	0x400000B1
+#define NS_MSR_TIMER1_CONFIG	0x400000B2
+#define NS_MSR_TIMER1_COUNT	0x400000B3
+#define NS_MSR_TIMER2_CONFIG	0x400000B4
+#define NS_MSR_TIMER2_COUNT	0x400000B5
+#define NS_MSR_TIMER3_CONFIG	0x400000B6
+#define NS_MSR_TIMER3_COUNT	0x400000B7
+
+/*
+ * MSR for supporting PV drivers on longhorn.
+ */
+#define NS_MSR_PVDRV_HCALL	0x40001000
+
+/*
+ * MSR for supporting other enlightened oses.
+ */
+#define NS_MSR_NONLH_GUEST_OS_ID	0x40001000
+
+/*
+ * Novell Shim VCPU flags.
+ * A VCPU is considered up when it is capable of invoking hypercalls.
+ */
+#define NS_VCPU_BOOT_CPU	0x00000001
+#define NS_VCPU_UP		0x00000002
+
+/*
+ * Novell shim flush flags.
+ */
+
+#define NS_FLUSH_TLB		0X01
+#define NS_FLUSH_INVLPG		0X02
+
+/*
+ * We use the following global state to manage TLB flush requests from the 
+ * guest. At most only one flush can be active in the guest; we may have to
+ * revisit this if this is a bottleneck.
+ */
+typedef struct nsGlobalFlushState {
+	int	cpuCount; //0 unused; else #cpus participating
+	cpumask_t	waiters; //Cpus waiting for the flush block
+	struct vcpu	*currentOwner;
+	u64	retVal;
+	flushVa_t	*flushParam;
+	unsigned short	repCount;
+} nsGlobalFlushState_t;
+	
+typedef struct nsSpinLock {
+	unsigned long	flags;
+	spinlock_t	spinLock;
+	struct nsVcpu	*owner;
+	void		*retAddr;
+} nsSpinLock_t;
+
+/*
+ * Novell shim message structure.
+ */
+typedef enum {
+	/*
+	 * For now we only support timer messages
+	 */
+	nsMessageTypeNone = 0x00000000,
+	nsMessageTimerExpired = 0x80000010
+} nsMessageType;
+
+typedef struct nsTimerMessage {
+	nsMessageType	messageType;
+	u8		pad1[3];
+	u8		messageSize;
+	u32		timerIndex;
+	u32		pad2;
+	u64		expirationTime;
+} nsTimerMessage_t;
+
+typedef struct nsMessage {
+	nsMessageType	messageType;
+	uint8_t		messageSize;
+	uint8_t		flags;
+	uint8_t		reserved[2];
+	uint32_t		reserved1;
+	uint64_t		payLoad[30];
+} nsMessage_t;
+
+
+typedef struct nsVcpTimerState {
+	u64	config;
+	u64	count;	/*expiration time in 100ns units*/
+	int	timerIndex;
+	struct nsVcpu	*thisCpu;
+	struct timer	vcpuTimer;
+} nsVcpTimerState_t;
+
+/*
+ * Stats structure.
+ */
+
+typedef struct {
+	u64	numSwitches;
+	u64	numFlushes;
+	u64	numFlushesPosted;
+	u64	numFlushRanges;
+	u64	numFlushRangesPosted;
+
+	u64	numTprReads;
+	u64	numIcrReads;
+	u64	numEoiWrites;
+	u64	numTprWrites;
+	u64	numIcrWrites;
+
+	u64	numGFSAcquires;
+	u64	numGFSReleases;
+	u64	numTlbFlushes;
+	u64	numInvlPages;
+	u64	numTimeOuts;
+} nsVcpuStats_t;
+
+typedef struct nsVcpu {
+	/*
+	 * Per-vcpu state to support the Novell shim; 
+	 */
+	int		nsVcplockDepth; 
+	unsigned long	nsVcpuFlags;
+	unsigned char 	nsVcpFlushRequest;
+	unsigned char	nsVcpWaitingOnGFS;
+	unsigned char	nsVcpFlushPending;
+	unsigned char	nsVcpWaitingForCleanup;
+	unsigned short	nsVcpRepCount;
+	/*
+	 * Synthetic msrs.
+	 */
+	u64	nsVcpSControlMsr;
+	u64	nsVcpSVersionMsr;
+	u64	nsVcpSIefpMsr;
+	u64	nsVcpSimpMsr;
+	u64	nsVcpEomMsr;
+
+	u64	nsVcpSIntMsr[16];
+	/*
+	 * Timer MSRs.
+	 */
+	nsVcpTimerState_t	nsVcpTimers[4];
+	void	*nsVcpSiefPage;
+	void	*nsVcpSimPage;
+	/*
+	 * Hypercall input/output processing.
+	 * We keep these pages mapped in the hypervisor space.
+	 */
+	void	*nsVcpInputBuffer; /*input buffer virt address*/
+	void	*nsVcpInputBufferPage; /*input buffer struct page */
+	void	*nsVcpOutputBuffer; /*output buffer virt address*/
+	void	*nsVcpOutputBufferPage; /*output buffer struct page */
+	struct vcpu	*nsVcpXenVcpu; /*corresponding xen vcpu*/
+	nsVcpuStats_t	nsVcpStats;
+} nsVcpu_t;
+
+/*
+ * Events of interest for gathering stats.
+ */
+#define NS_CSWITCH	1
+#define NS_FLUSH_VA_STAT	2
+#define NS_FLUSH_RANGE	3
+#define NS_FLUSH_VA_POSTED 4
+#define NS_FLUSH_RANGE_POSTED 5
+#define NS_TPR_READ	6
+#define NS_ICR_READ	7
+#define NS_TPR_WRITE	8	
+#define NS_ICR_WRITE	9
+#define NS_EOI_WRITE	10
+
+#define NS_GFS_ACQUIRE	11	
+#define NS_GFS_RELEASE	12
+#define NS_TLB_FLUSH	13
+#define NS_INVL_PG	14	
+#define NS_TIMEOUTS	15	
+
+void nsCollectStats(int event, nsVcpuStats_t *ststp); 
+
+#define NS_STATS //KYS: Temporary
+
+#ifdef NS_STATS
+#define NS_STATS_COLLECT(event, statp) nsCollectStats(event, statp)
+#else
+define NS_STATS_COLLECT(event, statp)
+#endif
+
+typedef struct nsPartition {
+	/*
+	 * State maintained on a per guest basis to implement 
+	 * the Novell shim.
+	 */
+	nsSpinLock_t	nsLock;
+	atomic_t	nsNumVcpusActive;
+	u64		nsGuestIdMsr;
+	u64		nsHypercallMsr;
+	u64		nsPrivileges;
+	u64		nsSupportedFeatures;
+	unsigned long	nsHypercallMfn;
+	int		nsLongModeGuest;
+	/*
+	 * Each VCPU here corresponds to the vcpu in the underlying hypervisor;
+	 * they share the same ID.
+	 */
+	nsVcpu_t	nsVcpuState[MAX_VIRT_CPUS];
+	nsGlobalFlushState_t nsFlushState;
+} nsPartition_t;
+
+/*
+ * Max CPUID leaves supported.
+ */
+
+#define NX_MAX_CPUID_LEAVES	5
+
+/*
+ * We don't want to intercept instructions coming from the hvm bootstrap code.
+ *
+ */
+#define NS_BIOS_HIGH_ADDR 
+/*
+ * Privilege flags.
+ */
+
+#define NS_ACCESS_VP_RUNTIME	(1ULL << 0)
+#define NS_ACCESS_TIME_REF_CNT	(1ULL << 1)
+#define NS_ACCESS_SYNC_MSRS	(1ULL << 2)
+#define NS_ACCESS_SYNC_TIMERS	(1ULL << 3)
+#define NS_ACCESS_APIC_MSRS	(1ULL << 4)
+#define NS_ACCESS_PARTITION_ID	(1ULL << 33)
+	
+#define nsGetCurrentPartition() \
+((current)->domain->arch.hvm_domain.ext_handle)
+
+#define nsGetCurrentVcpuIndex() (current)->vcpu_id
+
+#define NS_PANIC(x) \
+do {\
+	nsXenVector.extPrintk("File is: %s\n", __FILE__);\
+	nsXenVector.extPrintk("Line is: %d\n", __LINE__);\
+	nsXenVector.extPanic((x));\
+} while (0);
+
+#define NS_ASSERT(x) \
+do {\
+	if (!(x)) \
+		NS_PANIC("ASSERTION FAILED\n")\
+} while (0);
+
+#define nsDebugPrint(x)	\
+do { \
+	nsXenVector.extPrintk("File is: %s\n", __FILE__);\
+	nsXenVector.extPrintk("Line is: %d\n", __LINE__);\
+	nsXenVector.extPrintk((x));\
+} while (0);
+
+/* Hooks into Xen */
+extern xen_call_vector_t nsXenVector;
+
+/*
+ * static inline int
+ * nsInvalidCpuState(void)
+ * Check to see if the calling CPU is in the "correct state" to invoke 
+ * the functionality implemented in the Novell Shim (Adaptor).
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline int
+nsInvalidCpuState(void)
+{
+	int cpuState;
+	cpuState = nsXenVector.hvmFuncTable->guest_x86_mode(current);
+	if ((cpuState == 4) || (cpuState == 8)) {
+		return (0);
+	}
+	return (1);
+}
+
+/*
+ * inline u64
+ * nsBuildHcallRetVal(int code, int reps)
+ *
+ * Given the return code and the number of successfully completed count, 
+ * compose a return value compliant with the Viridian specification.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+ 
+static inline u64
+nsBuildHcallRetVal(int code, int reps)
+{
+	u64	retVal=0;
+	retVal |= (code & 0xff);
+	retVal |= (((long long)(reps & 0xfff)) << 32);
+	return (retVal);
+}
+
+
+/*
+ * static inline void  nsSetSysCallRetVal(struct cpu_user_regs *pregs, 
+ *				int longModeGuest, u64 retVal)
+ * Set the return value in the saved guest registers
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline void  nsSetSysCallRetVal(struct cpu_user_regs *pregs, 
+				int longModeGuest, u64 retVal)
+{
+	if (longModeGuest) {
+		pregs->eax = retVal;
+	} else {
+		pregs->edx = (u32)(retVal >> 32);
+		pregs->eax = (u32)(retVal);
+	}
+}
+
+/*
+ * static inline int 
+ * nsPrivilegeCheck(nsPartition_t *curp, u64 flags)
+ * Check if the caller is privileged to perform the operation 
+ * specified by the flags argument.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline int 
+nsPrivilegeCheck(nsPartition_t *curp, u64 flags)
+{
+	return ((curp->nsPrivileges & flags)? 1: 0);
+}
+
+/* void
+ * nsHandleHyperCall(u64 opcode, u64 input, u64 output, 
+ *		  u64 *retVal);
+ * Common entry point for handling all the extension hypercalls.
+ *
+ * Calling/Exit State:
+ *	Based on the hypercall; the caller may give up the CPU while 
+ * 	processing the hypercall. No locks should be held on entry and 
+ *	no locks will be held on return.
+ *		
+ */
+void
+nsHandleHyperCall(u64 opcode, u64 input, u64 output, 
+		  u64 *retVal);
+
+/*
+ * void nsDoTlbFlush(void);
+ * Perform TLB flush on the invoking virtual CPU.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+void nsDoTlbFlush(void);
+
+/*
+ * void
+ * nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Acquire the specified lock.
+ *
+ * Calling/Exit State:
+ *      None.
+ */
+
+void nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *lock);
+
+/*
+ * void
+ * nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Release the specified spin lock.
+ *
+ * Calling/Exit State:
+ *      None.
+ */
+
+void nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *lock);
+
+/*
+ * void
+ * nsLockInit(nsSpinLock_t *nsLock)
+ * Initialize the specified spin lock.
+ *
+ * Calling/Exit State:
+ *      None.
+ */
+
+void nsLockInit(nsSpinLock_t *lock); 
+
+/*
+ * void nsPrintStats(nsPartition_t *curp, int i)
+ * Print the per-vcpu stats for the specified partition.
+ *
+ * Calling/Exit State:
+ *      None.
+ */
+
+void nsPrintStats(nsPartition_t *curp, int i);
+
+#define NS_LOCK_OWNED(v, l) \
+((l)->owner == (v))
+#endif /*NS_SHIM_H */
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.c	2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,1229 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * nshypercall.c.
+ * This file implements the hypercall component of the Novell Shim. Hopefully
+ * we can host this component either as a driver in the guest or an extension 
+ * to the Xen hypervisor.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/hvm/support.h>
+#include <xen/cpumask.h>
+#include <xen/event.h>
+
+#include <asm/hvm/hvm_extensions.h>
+#include "ns_shim.h"
+#include "ns_errno.h"
+#include "nshypercall.h"
+
+
+
+void nsDoTlbFlush(void);
+static void 
+nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup);
+
+
+
+/*
+ * void nsCollectStats(int event, nsVcpuStats_t *statsp)
+ * Collect stats.
+ *
+ * Calling/Exit State:
+ * 	None.
+ */
+
+void nsCollectStats(int event, nsVcpuStats_t *statsp)
+{
+	switch (event) {
+	case NS_CSWITCH:
+		statsp->numSwitches++;
+		return;
+	case NS_FLUSH_VA:
+		statsp->numFlushes++;
+		return;
+	case NS_FLUSH_RANGE:
+		statsp->numFlushRanges++;
+		return;
+	case NS_FLUSH_VA_POSTED: 
+		statsp->numFlushesPosted++;
+		return;
+	case NS_FLUSH_RANGE_POSTED:
+		statsp->numFlushRangesPosted++;
+		return;
+	case NS_TPR_READ:
+		statsp->numTprReads++;
+		return;
+	case NS_ICR_READ:
+		statsp->numIcrReads++;
+		return;
+	case NS_TPR_WRITE: 
+		statsp->numTprWrites++;
+		return;
+	case NS_ICR_WRITE:
+		statsp->numIcrWrites++;
+		return;
+	case NS_EOI_WRITE:
+		statsp->numEoiWrites++;
+		return;
+
+	case NS_GFS_ACQUIRE:
+		statsp->numGFSAcquires++;
+		return;
+	case NS_GFS_RELEASE:
+		statsp->numGFSReleases++;
+		return;
+	case NS_TLB_FLUSH:
+		statsp->numTlbFlushes++;
+		return;
+	case NS_INVL_PG:
+		statsp->numInvlPages++;
+		return;
+	}
+}
+
+/*
+ * void
+ * nsPrintStats(nsPartition_t *curp, int i)
+ * Print stats.
+ *
+ * Calling/Exit State:
+ * 	None.
+ */
+void
+nsPrintStats(nsPartition_t *curp, int i)
+{
+	nsVcpu_t *v;
+	v = &curp->nsVcpuState[i];
+	printk("Printing stats for vcpu ID: %d\n", i);
+	printk("Flush pending: %d\n", (int)v->nsVcpFlushPending);
+	printk("Flush Request: %d\n", (int)v->nsVcpFlushRequest);
+	printk("Waiting on GFS: %d\n", (int)v->nsVcpWaitingOnGFS);
+	printk("Waiting for cleanup: %d\n", (int)v->nsVcpWaitingForCleanup);
+
+	printk("Number of context switches: %lu\n", v->nsVcpStats.numSwitches);
+	printk("Number of flushes: %lu\n", v->nsVcpStats.numFlushes);
+	printk("Number of flushes posted: %lu\n", v->nsVcpStats.numFlushesPosted);
+	printk("Number of flush ranges: %lu\n", v->nsVcpStats.numFlushRanges);
+	printk("Number of flush ranges posted: %lu\n", v->nsVcpStats.numFlushRangesPosted);
+	printk("Number of TPR reads: %lu\n", v->nsVcpStats.numTprReads);
+	printk("Number of ICR reads: %lu\n", v->nsVcpStats.numIcrReads);
+	printk("Number of Eoi writes: %lu\n", v->nsVcpStats.numEoiWrites);
+	printk("Number of Tpr writes: %lu\n", v->nsVcpStats.numTprWrites);
+	printk("Number of Icr writes: %lu\n", v->nsVcpStats.numIcrWrites);
+	printk("Number of GFS acuires: %lu\n", v->nsVcpStats.numGFSAcquires);
+	printk("Number of GFS releases: %lu\n", v->nsVcpStats.numGFSReleases);
+	printk("Number of TLB flushes: %lu\n", v->nsVcpStats.numTlbFlushes);
+	printk("Number of INVLPG flushes: %lu\n", v->nsVcpStats.numInvlPages);
+	printk("Number of TIMEOUTS: %lu\n", v->nsVcpStats.numTimeOuts);
+
+}
+
+/*
+ * static inline void nsWakeupWaiters(nsPartition_t *curp)
+ * Wakeup all the VCPUs that may be blocked on the Global
+ * flush state waiting to exclusively own the global flush
+ * state.
+ *
+ * Calling/Exit State:
+ * 	The partition-wide spin lock nsLock is held on entry and 
+ *	this lock is held on exit.
+ */
+static inline void nsWakeupWaiters(nsPartition_t *curp)
+{
+	int i;
+	if (!cpus_empty(curp->nsFlushState.waiters)) {
+		/*
+ 	 	 * Need to wakeup potential waiters that 
+	 	 * are waiting for the 
+ 	 	 * flush block to become available.
+ 	 	 */
+		for (i=0; i < MAX_VIRT_CPUS; i++) {
+			struct vcpu	*curVcpu;
+			if (!cpu_isset(i, curp->nsFlushState.waiters))
+				continue;
+			curVcpu = 
+			curp->nsVcpuState[i].nsVcpXenVcpu;
+			NS_ASSERT(curVcpu != NULL);
+			if ( test_and_clear_bit(_VPF_blocked_in_xen,
+                          	     &curVcpu->pause_flags) ) {
+               			vcpu_wake(curVcpu);
+			}
+		}
+		cpus_clear(curp->nsFlushState.waiters);
+	}
+}
+
+/*
+ * static void nsAcquireGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup)
+ * Acquire the global flush state for exclusive use by the calling
+ * VCPU.
+ *
+ * Calling/Exit State:
+ * 	On entry nsLock is held and this lock is held on exit. If the calling
+ *	VCPU is required to give up the CPU, this lock will be dropped.
+ */
+static void nsAcquireGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup)
+{
+acquireGFSAgain:
+	NS_ASSERT(vcpup->nsVcpWaitingOnGFS == 0);
+	NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0);
+	NS_ASSERT(NS_LOCK_OWNED(vcpup, &curp->nsLock));
+	if (curp->nsFlushState.currentOwner != NULL) {
+		/*
+		 * Somebody is in the midst of flushing; deal with this 
+		 * situation.
+		 */
+		/*
+		 * We need to wait for the current flush sequence
+		 * to end.
+		 */
+		NS_ASSERT(curp->nsFlushState.currentOwner != current);
+		NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0);
+		if (vcpup->nsVcpFlushPending) {
+			nsLockRelease(vcpup, &curp->nsLock);
+			nsDoTlbFlush();
+			nsLockAcquire(vcpup, &curp->nsLock);
+		}
+		vcpup->nsVcpWaitingOnGFS = 1;
+		cpu_set(current->vcpu_id, curp->nsFlushState.waiters);
+		nsLockRelease(vcpup, &curp->nsLock);
+		wait_on_xen_event_channel(0,
+			((curp->nsFlushState.currentOwner == NULL) ||
+			 (vcpup->nsVcpFlushPending) ||
+			 (cpus_empty(curp->nsFlushState.waiters))));
+		nsLockAcquire(vcpup, &curp->nsLock);
+		cpu_clear(current->vcpu_id, curp->nsFlushState.waiters);
+		vcpup->nsVcpWaitingOnGFS = 0;
+		goto acquireGFSAgain;
+	}
+	curp->nsFlushState.repCount = vcpup->nsVcpRepCount; 
+	curp->nsFlushState.flushParam = 
+	vcpup->nsVcpInputBuffer;
+	NS_STATS_COLLECT(NS_GFS_ACQUIRE, &vcpup->nsVcpStats);
+}
+
+/*
+ * static void nsReleaseGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup,
+ *					int lockOwned)
+ * There can at most be one TLB flush event active in the system. All of the
+ * VCPUs that are part of the flush sequence need to relaese their hold
+ * on the global flush object before the global flush object can be freed.
+ * This function manages the release of the global flush object.
+ * If the "lockOwned" parameter is non-zero; on entry the nsLock is held.
+ *
+ * Calling/Exit State:
+ * 	The current owner of GFS may be forced to give up the CPU.
+ *	On exit nsLock is  held.
+ */
+static void nsReleaseGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup,
+					int lockOwned)
+{
+	if (!lockOwned) {
+		nsLockAcquire(vcpup, &curp->nsLock);
+	}
+	NS_ASSERT(curp->nsFlushState.cpuCount >= 0);
+	NS_ASSERT(curp->nsFlushState.currentOwner != NULL);
+
+	if (vcpup->nsVcpFlushPending) {
+		curp->nsFlushState.cpuCount--;
+		NS_ASSERT(curp->nsFlushState.cpuCount >= 0);
+		vcpup->nsVcpFlushPending = 0;
+		mb();
+	}
+	
+nsReleaseGFS:
+	if (curp->nsFlushState.cpuCount > 0) {
+		if (curp->nsFlushState.currentOwner == current)  {
+			/*
+			 * We are the initiator; need to wait for 
+			 * others to complete.
+	 		 */
+			nsWakeupWaiters(curp);
+			vcpup->nsVcpWaitingForCleanup = 1;
+			nsLockRelease(vcpup, &curp->nsLock);
+			wait_on_xen_event_channel(0,(curp->nsFlushState.cpuCount == 0));
+			nsLockAcquire(vcpup, &curp->nsLock);
+			vcpup->nsVcpWaitingForCleanup = 0;
+			goto nsReleaseGFS;
+		} else {
+			return;
+		}
+	}
+	NS_ASSERT(curp->nsFlushState.cpuCount == 0);
+	if (curp->nsFlushState.currentOwner == current) {
+		/* We are the current owner; do the final cleanup.
+		 * But first set the return value. This has been stashed
+		 * before we blocked.
+	 	 */
+		NS_STATS_COLLECT(NS_GFS_RELEASE, &vcpup->nsVcpStats);
+		vcpup->nsVcpFlushRequest = 0;
+		vcpup->nsVcpFlushPending = 0;
+		vcpup->nsVcpWaitingForCleanup = 0;
+		nsSetSysCallRetVal(guest_cpu_user_regs(), 
+				   curp->nsLongModeGuest, 
+				   curp->nsFlushState.retVal);
+		curp->nsFlushState.cpuCount = 0;
+		curp->nsFlushState.currentOwner = NULL;
+		mb();
+		curp->nsFlushState.retVal = 0;
+		curp->nsFlushState.flushParam = NULL;
+		curp->nsFlushState.repCount = 0;
+		nsWakeupWaiters(curp);
+	} else {
+		/*
+		 * We are not the owner; wakeup the owner.
+		 */
+		if ( test_and_clear_bit(_VPF_blocked_in_xen,
+                            &(curp->nsFlushState.currentOwner->pause_flags))){
+			vcpu_wake(curp->nsFlushState.currentOwner);
+		}
+	}
+}
+	
+
+/*
+ * static inline int nsFlushPermitted(nsVcpu_t *vcpup)
+ * Check to see if we can execute a TLB flush on the calling vcpu.
+ *
+ * Calling/Exit State:
+ * 	None.
+ */
+static inline int nsFlushPermitted(nsVcpu_t *vcpup)
+{
+	if (!hvm_paging_enabled(current)) {
+		return (0);
+	}
+	if (nsInvalidCpuState()) {
+		return (0);
+	}
+
+	return (1);
+}
+	
+/*
+ * void
+ * nsDoTlbFlush(void)
+ *	Perform flush operations based on the state of GFS. VCPUs may be
+ *	forced to relinquish the physical CPU while attempting to flush; in 
+ *	those events, thi is also the continuation point for execution.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+void
+nsDoTlbFlush(void)
+{
+	nsPartition_t   *curp = nsGetCurrentPartition();
+        nsVcpu_t	*vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+	flushVa_t  	*flushArgp;
+	int 		i,j, numPages;
+	u64		*pgList;
+	long		baseVa;
+	unsigned short 	repCount;
+
+	NS_ASSERT(local_irq_is_enabled());
+
+	NS_ASSERT(vcpup->nsVcplockDepth == 0);
+
+	nsLockAcquire(vcpup, &curp->nsLock);
+	if (vcpup->nsVcpWaitingForCleanup) {
+		/*
+		 * This is the continuation point for us; cleanup
+		 * the global flush state.
+		 */
+		vcpup->nsVcpWaitingForCleanup =0;
+		NS_ASSERT(curp->nsFlushState.currentOwner == current);
+		nsReleaseGlobalFlushState(curp, vcpup, 1);
+	} else if (vcpup->nsVcpWaitingOnGFS)  { 
+		/*
+		 * This is the continuation point for us; acquire
+		 * GFS and proceed with our flush operation.
+		 */
+		vcpup->nsVcpWaitingOnGFS =0; 
+		nsAcquireGlobalFlushState(curp, vcpup);
+		/*
+		 * Now do the rest of the syscall processing
+		 */
+		nsFlushPostProcess(curp, vcpup);
+	}
+	if (!vcpup->nsVcpFlushPending) {
+		nsLockRelease(vcpup, &curp->nsLock);
+		return;
+	}
+	flushArgp = curp->nsFlushState.flushParam;
+	repCount  = curp->nsFlushState.repCount;
+	/*
+	 * At this point a flush has been posted; see if we can perform a
+	 * flush given our state.
+	 */
+	if (!nsFlushPermitted(vcpup)) {
+		nsReleaseGlobalFlushState(curp, vcpup, 1);
+		nsLockRelease(vcpup, &curp->nsLock);
+		NS_ASSERT(vcpup->nsVcplockDepth == 0);
+		return;
+	}
+	nsLockRelease(vcpup, &curp->nsLock);
+	if (vcpup->nsVcpFlushPending & NS_FLUSH_TLB) {
+		NS_STATS_COLLECT(NS_TLB_FLUSH, &vcpup->nsVcpStats);
+		paging_update_cr3(current);
+	} else {
+		pgList = &flushArgp->gva;
+		NS_ASSERT(vcpup->nsVcpFlushPending == NS_FLUSH_INVLPG);
+		NS_ASSERT(pgList != NULL);
+		NS_ASSERT(repCount >=1);
+		NS_STATS_COLLECT(NS_INVL_PG, &vcpup->nsVcpStats);
+		for (i = 0; i < repCount; i++) {
+			baseVa = (long)(pgList[i] & PAGE_MASK);
+			numPages = (int)(~baseVa & pgList[i]);
+			for (j = 0; j <= numPages; j++) {
+				if (paging_invlpg(current, 
+				    (baseVa + (j << PAGE_SHIFT)))) {
+					flush_tlb_one_local((baseVa + 
+					(j<< PAGE_SHIFT)));
+				}
+				//KYS: need to deal with ASIDS
+			}
+		}
+	}
+	/*
+	 * Do post processing on the global flush state. 
+	 */
+	nsReleaseGlobalFlushState(curp, vcpup, 0);
+	nsLockRelease(vcpup, &curp->nsLock);
+	NS_ASSERT(vcpup->nsVcplockDepth == 0);
+}				
+			
+/*
+ * static int
+ * nsGetVpRegisters(paddr_t input, paddr_t output)
+ * Get the VCP register state.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static int
+nsGetVpRegisters(paddr_t input, paddr_t output)
+{
+	nsVcpu_t        *vcpup, *targetp;
+	nsPartition_t   *curp = nsGetCurrentPartition();
+	getVpRegistersInput_t	*inBuf;
+	getVpRegistersOutput_t	*outBuf;
+	struct vcpu_guest_context	*vcpuCtx;
+	u32		*regIndexp;
+	getVpRegistersOutput_t		*outRegp;
+	u32		numOutputBytes = 0;
+
+        vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+	inBuf = vcpup->nsVcpInputBuffer;
+	outBuf = vcpup->nsVcpOutputBuffer;
+	outRegp = outBuf;
+	/*
+	 * Copy the input data to the per-cpu input buffer.
+	 * This may be an overkill; obviously it is better to only
+	 * copy what we need. XXXKYS: Check with Mike.
+	 */
+	if (nsXenVector.extCopyFromGuestPhysical(inBuf, input, PAGE_SIZE)) {
+		return (NS_STATUS_INVALID_ALIGNMENT);
+	}
+	/*
+	 * If the partition ID specified does not match with the current 
+	 * domain return appropriate error.
+	 */
+	if ((u64)current->domain->domain_id != inBuf-> partitionId) {
+		return (NS_STATUS_ACCESS_DENIED);
+	}
+	if (inBuf->vpIndex > MAX_VIRT_CPUS) { 
+		return (NS_STATUS_INVALID_VP_INDEX);
+	}
+	targetp = &curp->nsVcpuState[inBuf->vpIndex]; 
+	if (!(targetp->nsVcpuFlags & NS_VCPU_UP)) {
+		return (NS_STATUS_INVALID_VP_STATE);
+	}
+	if ((vcpuCtx = 
+	     nsXenVector.extAllocMem(sizeof(struct vcpu_guest_context))) 
+		== NULL) {
+		return (NS_STATUS_INSUFFICIENT_MEMORY);
+	}
+
+	/*
+	 * Get the register state of the specified vcp.
+	 */
+	if (current->vcpu_id != inBuf->vpIndex) {
+		nsXenVector.extVcpuPause(targetp->nsVcpXenVcpu);
+	}
+	nsXenVector.extArchGetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx);
+	if (current->vcpu_id != inBuf->vpIndex) {
+		nsXenVector.extVcpuUnPause(targetp->nsVcpXenVcpu);
+	}
+	/*
+	 * Now that we have the register state; select what we want and
+	 * populate the output buffer.
+	 */
+	regIndexp = &inBuf->regIndex;
+	while (*regIndexp != 0) {
+		switch	(*regIndexp) {
+			/*
+			 * XXXKYS: need mapping code here; populate
+			 * outBuf.
+			 */
+			NS_PANIC("nsGetVpRegisters not supported\n");
+		}
+		regIndexp++;
+		outRegp++ ;	/*128 bit registers */
+		numOutputBytes +=16;
+		if ((char *)regIndexp > ((char *)inBuf + PAGE_SIZE)) {
+			/*
+			 *input list not reminated correctly; bail out.
+			 */
+			NS_PANIC("nsGetVpRegisters:input list not terminated\n"); 
+			break;
+		}
+	}
+	if (nsXenVector.extCopyToGuestPhysical(output, outBuf, 
+		numOutputBytes)) {
+		/* Some problem copying data out*/
+		NS_PANIC("nsGetVpRegisters:copyout problem\n"); 
+	}
+	nsXenVector.extFreeMem(vcpuCtx);
+	return (NS_STATUS_SUCCESS);
+}
+		
+/*
+ * static int
+ * nsSetVpRegisters(paddr_t input, paddr_t output)
+ * Set the VCPU register state.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static int
+nsSetVpRegisters(paddr_t input, paddr_t output)
+{
+	nsVcpu_t        *vcpup, *targetp;
+	nsPartition_t   *curp = nsGetCurrentPartition();
+	setVpRegistersInput_t	*inBuf;
+	struct vcpu_guest_context	*vcpuCtx;
+	setVpRegisterSpec_t		*regIndexp;
+	int		retVal = NS_STATUS_SUCCESS;
+
+        vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+	inBuf = vcpup->nsVcpInputBuffer;
+	/*
+	 * Copy the input data to the per-cpu input buffer.
+	 * This may be an overkill; obviously it is better to only
+	 * copy what we need. XXXKYS: Check with Mike.
+	 */
+	if (nsXenVector.extCopyFromGuestPhysical(inBuf, input, PAGE_SIZE)) {
+		return (NS_STATUS_INVALID_ALIGNMENT);
+	}
+	/*
+	 * If the partition ID specified does not match with the current 
+	 * domain return appropriate error.
+	 */
+	if ((u64)current->domain->domain_id != inBuf-> partitionId) {
+		return (NS_STATUS_ACCESS_DENIED);
+	}
+	if (inBuf->vpIndex > MAX_VIRT_CPUS) { 
+		return (NS_STATUS_INVALID_VP_INDEX);
+	}
+	targetp = &curp->nsVcpuState[inBuf->vpIndex]; 
+	if (!(targetp->nsVcpuFlags & NS_VCPU_UP)) {
+		return (NS_STATUS_INVALID_VP_STATE);
+	}
+	if ((vcpuCtx = 
+	     nsXenVector.extAllocMem(sizeof(struct vcpu_guest_context))) 
+		== NULL) {
+		return (NS_STATUS_INSUFFICIENT_MEMORY);
+	}
+	/*
+	 * XXXKYS: Is it sufficient to just pause the target vcpu; on the 
+	 * xen side domain is paused for this call. CHECK.
+	 */
+	if (current->vcpu_id != inBuf->vpIndex) {
+		nsXenVector.extVcpuPause(targetp->nsVcpXenVcpu);
+	}
+
+	nsXenVector.extArchGetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx);
+	/*
+	 * Now that we have the register state; update the register state
+	 * based on what we are given. 
+	 */
+	regIndexp = &inBuf->regSpec;
+	/*
+	 * XXXKYS: Assuming the list is terminated by a regName that is 0.
+	 * Check with Mike.
+	 */
+	while (regIndexp->regName != 0) {
+		switch	(regIndexp->regName) {
+			/*
+			 * XXXKYS: need mapping code here; populate
+			 * vcpuCtx 
+			 */
+			NS_PANIC("nsSetVpRegisters not supported\n");
+		}
+		regIndexp++;
+		if ((char *)regIndexp > ((char *)inBuf + PAGE_SIZE)) {
+			/*
+			 *input list not reminated correctly; bail out.
+			 */
+			NS_PANIC("nsSetVpRegisters:input list not terminated\n"); 
+			break;
+		}
+	}
+	/*
+	 * Now set register state.
+	 *
+	 * XXXKYS: Is it sufficient to just pause the target vcpu; on the 
+	 * xen side domain is paused for this call. CHECK.
+	 */
+
+	if (nsXenVector.extArchSetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx)) { 
+		retVal = NS_STATUS_INVALID_PARAMETER;
+	}
+	if (current->vcpu_id != inBuf->vpIndex) {
+		nsXenVector.extVcpuUnPause(targetp->nsVcpXenVcpu);
+	}
+	nsXenVector.extFreeMem(vcpuCtx);
+	return (retVal);
+}
+
+/*
+ * static int
+ * nsSwitchVa(paddr_t input)
+ *
+ * Switch the page table base of the calling vcpu.
+ *
+ * Calling/Exit State:
+ *	None.
+ *
+ * Remarks:
+ *	The spec specifies that the input register is pointing to a guest 
+ * 	physical that has the new page table base. However it appears that the 
+ *	page table base is being passed in the input register.
+ */
+static int
+nsSwitchVa(paddr_t input)
+{
+	nsPartition_t   *curp = nsGetCurrentPartition();
+        nsVcpu_t *vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+
+	/*
+	 * XXXKYS: the spec sys the asID is passed via memory at offset 0 of 
+	 * the page whose GPA is in the input register. However, it appears 
+	 * the current build of longhorn (longhorn-2007-02-06-x86_64-fv-02)
+	 * passes the asID in the input register instead. Need to check if 
+	 * future builds do this.
+	 */
+	hvm_set_cr3(input); 
+	NS_STATS_COLLECT(NS_CSWITCH, &vcpup->nsVcpStats);
+	return (NS_STATUS_SUCCESS);
+}
+
+/*
+ * static int 
+ * nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup)
+ *
+ * Perform the flush operation once GFS is acquired.
+ *
+ * Calling/Exit State:
+ *	On entry nsLock is held; on exit this lock continues to be held.
+ */
+
+static void 
+nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup)
+{
+	int		target; 
+	nsVcpu_t	*vcpup;
+	cpumask_t	vcpuMask;
+	struct flushVa	*flushArgp;
+
+	flushArgp = curVcpup->nsVcpInputBuffer;
+	vcpuMask = flushArgp->vMask;
+	/*
+	 * On entry we must own the global flush state.
+	 */
+	NS_ASSERT(NS_LOCK_OWNED(curVcpup, &curp->nsLock));
+	NS_ASSERT(curp->nsFlushState.cpuCount == 0); 
+	NS_ASSERT(curp->nsFlushState.currentOwner == NULL); 
+
+	curp->nsFlushState.retVal = 
+	nsBuildHcallRetVal(NS_STATUS_SUCCESS, curVcpup->nsVcpRepCount);
+	curp->nsFlushState.currentOwner = current; 
+	if (cpu_isset(current->vcpu_id, vcpuMask)) {
+		curp->nsFlushState.cpuCount = 1;
+		curVcpup->nsVcpFlushPending = 
+		curVcpup->nsVcpFlushRequest;
+		mb();
+#ifdef NS_STATS
+		if (curVcpup->nsVcpFlushRequest == NS_FLUSH_TLB) {
+			NS_STATS_COLLECT(NS_FLUSH_VA_POSTED, &curVcpup->nsVcpStats);
+		} else {
+			NS_STATS_COLLECT(NS_FLUSH_RANGE_POSTED, &curVcpup->nsVcpStats);
+		}
+#endif
+			
+		cpu_clear(current->vcpu_id, vcpuMask);
+	}
+	if (cpus_empty(vcpuMask)) {
+		/*
+		 * We are done.
+		 */
+		goto flushVaDone;
+	}
+	while (!cpus_empty(vcpuMask)) {
+		target = first_cpu(vcpuMask);
+        	vcpup = &curp->nsVcpuState[target];
+		cpu_clear(target, vcpuMask);
+		if (!(vcpup->nsVcpuFlags & NS_VCPU_UP)) {
+			continue;
+		}
+		if (!nsFlushPermitted(vcpup)) {
+			continue;
+		}
+		curp->nsFlushState.cpuCount++;
+		vcpup->nsVcpFlushPending = 
+		curVcpup->nsVcpFlushRequest;
+		mb();
+#ifdef NS_STATS
+		if (curVcpup->nsVcpFlushRequest == NS_FLUSH_TLB) {
+			NS_STATS_COLLECT(NS_FLUSH_VA_POSTED, &vcpup->nsVcpStats);
+		} else {
+			NS_STATS_COLLECT(NS_FLUSH_RANGE_POSTED, &vcpup->nsVcpStats);
+		}
+#endif
+
+		/*
+		 * We need to force these VCPUs into the hypervisor for
+		 * them to act on the pending request.
+		 */
+
+		vcpu_kick(vcpup->nsVcpXenVcpu);
+		if ( test_and_clear_bit(_VPF_blocked_in_xen,
+                             &vcpup->nsVcpXenVcpu->pause_flags) ) {
+			vcpu_wake(vcpup->nsVcpXenVcpu);
+		}
+
+	}
+	/*
+	 * Now that we have posted the state; wait for other CPUs to perform
+	 * flushes; we need to wait for all the CPUs to complete the flush
+	 * before returning. 
+	 */
+flushVaDone:
+	/*
+	 * If we are included in this round of tlb flush; we will wait for 
+	 * other CPUs in the tlb flush function; else we wait right here.
+	 */
+	if (!curVcpup->nsVcpFlushPending) {
+		nsReleaseGlobalFlushState(curp, curVcpup, 1);
+	}
+	return;
+}
+
+/*
+ * static int
+ * nsFlushVa(paddr_t input)
+ * Perform a TLB flush on the specified set of VCPUs.
+ *
+ * Calling/Exit State:
+ *	No locks can be held on entry and no locks will be held on return. 
+ *	The calling VCPU may relinquish the physical CPU.
+ */
+static int
+nsFlushVa(paddr_t input)
+{
+	nsPartition_t   *curp = nsGetCurrentPartition();
+	int		i;
+        nsVcpu_t	*curVcpup;
+
+	flushVa_t	*flushArgp;
+	cpumask_t	vcpuMask;
+	u64		asId, inputMask, retVal;
+	int		flushGlobal = 1;
+
+	curVcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+	flushArgp = curVcpup->nsVcpInputBuffer;
+
+	NS_ASSERT(curVcpup->nsVcplockDepth == 0);
+	NS_ASSERT(curVcpup->nsVcpFlushRequest == 0);
+	NS_ASSERT(curVcpup->nsVcpWaitingForCleanup == 0);
+	NS_ASSERT(curVcpup->nsVcpWaitingOnGFS == 0);
+
+	if (nsXenVector.extCopyFromGuestPhysical(flushArgp, input, 
+			sizeof(*flushArgp))) {
+		return (NS_STATUS_INVALID_ALIGNMENT);
+	}
+	inputMask = flushArgp->pMask;
+	asId = flushArgp->asHandle;
+	cpus_clear(vcpuMask);
+	/*
+	 * Deal with all trivial error conditions.
+	 */
+	if (flushArgp->flags != 0 && (!(flushArgp->flags & 
+			      (NS_FLUSH_ALL_PROCESSORS | 
+			       NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+			       NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY)))) {
+		return (NS_STATUS_INVALID_PARAMETER);
+	}
+	if (((flushArgp->pMask) == 0) &&
+	   !(flushArgp->flags & NS_FLUSH_ALL_PROCESSORS)) {
+		return (NS_STATUS_INVALID_PARAMETER);
+	}
+				 
+	if (flushArgp->flags & NS_FLUSH_ALL_PROCESSORS) {
+		for (i=0; i< MAX_VIRT_CPUS; i++) {
+			if (current->domain->vcpu[i] != NULL) {
+				cpu_set(i, vcpuMask);
+			}
+		}
+	} else {
+		i = 0;
+		while (inputMask) {
+			if (inputMask &0x1) {
+				cpu_set(i, vcpuMask);
+			}
+			inputMask = (inputMask >> 1);
+			i++;
+		}
+	}
+		
+	if (flushArgp->flags & NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES) {
+		asId = NS_ALL_AS;
+	}
+	if (flushArgp->flags & NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY) {
+		flushGlobal = 0;
+	}
+	/*
+	 * Now operate on what we are given
+	 * XXXKYS: For now we are ignoring asId and fushGlobal flag.
+	 * May have to revisit this. But first stash away the processed 
+	 * parameters for subsequent use.
+	 */
+	flushArgp->asHandle = asId;
+	flushArgp->flags = flushGlobal;
+	flushArgp->vMask = vcpuMask;
+
+	curVcpup->nsVcpRepCount = 0;
+	curVcpup->nsVcpFlushRequest = NS_FLUSH_TLB;
+
+	retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, 0);
+	nsSetSysCallRetVal(guest_cpu_user_regs(),
+                                   curp->nsLongModeGuest,
+                                   retVal);
+	NS_STATS_COLLECT(NS_FLUSH_VA_STAT, &curVcpup->nsVcpStats);
+	nsLockAcquire(curVcpup, &curp->nsLock);
+	nsAcquireGlobalFlushState(curp, curVcpup);
+	nsFlushPostProcess(curp, curVcpup);
+	nsLockRelease(curVcpup, &curp->nsLock);
+	return (NS_STATUS_SUCCESS);	
+}
+
+/*
+ * static int
+ * nsFlushVaRange(paddr_t input, unsigned short startIndex, 
+ * unsigned short repCount, unsigned short *repsDone)
+ * Perform a INVLPG flush on the specified set of VCPUs.
+ *
+ * Calling/Exit State:
+ *	No locks can be held on entry and no locks will be held on return. 
+ *	The calling VCPU may relinquish the physical CPU.
+ */
+static int
+nsFlushVaRange(paddr_t input, unsigned short startIndex, 
+unsigned short repCount, unsigned short *repsDone)
+{
+	nsVcpu_t        *curVcpup;
+	nsPartition_t   *curp = nsGetCurrentPartition();
+	flushVa_t  	*flushArgp;
+	cpumask_t	vcpuMask;
+	u64		asId, inputMask, retVal;
+	int		flushGlobal = 1;
+	int		flushAllProc = 0;
+	int		i;
+
+        curVcpup =  &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+	flushArgp = curVcpup->nsVcpInputBuffer;
+	NS_ASSERT(curVcpup->nsVcplockDepth == 0);
+	NS_ASSERT(curVcpup->nsVcpFlushRequest == 0);
+	NS_ASSERT(curVcpup->nsVcpWaitingForCleanup == 0);
+	NS_ASSERT(curVcpup->nsVcpWaitingOnGFS == 0);
+	NS_ASSERT(repCount >=1);
+	NS_ASSERT(((sizeof(*flushArgp)) + 8*(repCount -1)) <= PAGE_SIZE);
+	if (nsXenVector.extCopyFromGuestPhysical(flushArgp, input, 
+			((sizeof(*flushArgp)) + 8*(repCount -1)))) {
+		return (NS_STATUS_INVALID_ALIGNMENT);
+	}
+	*repsDone = repCount;
+	inputMask = flushArgp->pMask;
+	asId = flushArgp->asHandle;
+	cpus_clear(vcpuMask);
+	/*
+	 * Deal with all trivial error conditions.
+	 */
+	if (flushArgp->flags != 0 && (!(flushArgp->flags & 
+			      (NS_FLUSH_ALL_PROCESSORS | 
+			       NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+			       NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY)))) {
+		return (NS_STATUS_INVALID_PARAMETER);
+	}
+	if ((flushArgp->pMask == 0) &&
+	   !(flushArgp->flags & NS_FLUSH_ALL_PROCESSORS)) {
+		return (NS_STATUS_INVALID_PARAMETER);
+	}
+				 
+	if (flushArgp->flags & NS_FLUSH_ALL_PROCESSORS) {
+		flushAllProc = 1;
+		for (i=0; i< MAX_VIRT_CPUS; i++) {
+			if (current->domain->vcpu[i] != NULL) {
+				cpu_set(i, vcpuMask);
+			}
+		}
+	} else {
+		i = 0;
+		/*
+		 * populate the vcpu mask based on the input.
+		 */
+		while (inputMask) {
+			if (inputMask & 0x1) {
+				cpu_set(i, vcpuMask);
+			}
+			inputMask = (inputMask >> 1);
+			i++;
+		}
+	}
+	if (flushArgp->flags & NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES) {
+		asId = NS_ALL_AS;
+	}
+	if (flushArgp->flags & NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY) {
+		flushGlobal = 0;
+	}
+	/*
+	 * Now operate on what we are given
+	 * XXXKYS: For now we are ignoring asId and fushGlobal flag.
+	 * May have to revisit this.
+	 * May have to revisit this. But first stash away the processed 
+	 * parameters for subsequent use.
+	 */
+	flushArgp->asHandle = asId;
+	flushArgp->flags = flushGlobal;
+	flushArgp->vMask = vcpuMask;
+	
+	curVcpup->nsVcpRepCount = repCount;
+	curVcpup->nsVcpFlushRequest = NS_FLUSH_INVLPG;
+
+	retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, repCount);
+	nsSetSysCallRetVal(guest_cpu_user_regs(),
+                                   curp->nsLongModeGuest,
+                                   retVal);
+
+
+	NS_STATS_COLLECT(NS_FLUSH_RANGE, &curVcpup->nsVcpStats);
+	nsLockAcquire(curVcpup, &curp->nsLock);
+	nsAcquireGlobalFlushState(curp, curVcpup);
+	nsFlushPostProcess(curp, curVcpup);
+	nsLockRelease(curVcpup, &curp->nsLock);
+	return (NS_STATUS_SUCCESS);	
+}
+
+/* void
+ * nsHandleHyperCall(u64 opcode, u64 input, u64 output,
+ *                u64 *retVal);
+ * Common entry point for handling all the extension hypercalls.
+ *
+ * Calling/Exit State:
+ *	Based on the hypercall; the caller may give up the CPU while
+ *	processing the hypercall. No locks should be held on entry and
+ *	no locks will be held on return.
+ *
+ */
+
+void
+nsHandleHyperCall(u64 opcode, u64 input, u64 output, 
+		  u64 *retVal)
+{
+	unsigned short	verb;
+	unsigned short	repCount;
+	unsigned short	repsDone =0;
+	unsigned short	startIndex;
+	nsPartition_t   *curp = nsGetCurrentPartition();
+	u64		partitionId;
+	int		value;
+	
+
+	verb = (short)(opcode & 0xffff);
+	repCount = (short)((opcode >>32) & 0xfff);
+	startIndex = (short)((opcode >> 48) & 0xfff);
+	switch (verb) {
+	case NS_CREATE_PARTITION:
+		/*
+		 * Xen only allows dom0 to create domains.
+		 */	
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_INITIALIZE_PARTITION:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_DELETE_PARTITION:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_GET_PARTITION_PROPERTY:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_SET_PARTITION_PROPERTY:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_GET_PARTITION_ID:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_PARTITION_ID)) {
+			*retVal = 
+			nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+			return;
+		}
+		partitionId = (u64)current->domain->domain_id;
+		if (nsXenVector.extCopyToGuestPhysical(output, 
+			&partitionId, 8)) {
+			/*
+			 * Invalid output area.
+			 */
+			*retVal = 
+			nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+			return;
+		}
+		*retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, 0);
+		return;
+	case NS_GET_NEXT_CHILD_PARTITION:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_SET_LOGICAL_PROCESSOR_RUN_TIME_GROUP:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_CLEAR_LOGICAL_PROCESSOR_RUN_TIME_GROUP:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_NOTIFY_LOGICAL_PROCESSOR_POWER_STATE:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_GET_LOGICAL_PROCESSOR_RUN_TIME:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_DEPOSIT_MEMORY:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_WITHDRAW_MEMORY:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_GET_MEMORY_BALANCE:	
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_MAP_GPA_PAGES:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_UNMAP_GPA_PAGES:	
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_INSTALL_INTERCEPT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_CREATE_VP:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_TERMINATE_VP:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_DELETE_VP:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_GET_NEXT_VP:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_GET_VP_REGISTERS:
+		*retVal = nsBuildHcallRetVal(
+		nsGetVpRegisters(input, output), 0);
+		return;
+	case NS_SET_VP_REGISTERS:
+		*retVal = nsBuildHcallRetVal(
+		nsSetVpRegisters(input, output), 0);
+	case NS_SWITCH_VA:
+		*retVal = 
+		nsBuildHcallRetVal(nsSwitchVa(input), 0);
+		return;
+	case NS_FLUSH_VA:
+		*retVal = 
+		nsBuildHcallRetVal(nsFlushVa(input), 0);
+		return;
+	case NS_FLUSH_VA_LIST:
+		value  = nsFlushVaRange(input, startIndex, 
+					repCount, &repsDone);
+		*retVal = nsBuildHcallRetVal(value, repsDone);  
+		return;
+		
+	case NS_TRASLATE_VA:	
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_READ_GPA:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_WRITE_GPA:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_ASSERT_VIRTUAL_INTERRUPT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_CLEAR_VIRTUAL_INTERRUPT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_CREATE_PORT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_DELETE_PORT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_CONNECT_PORT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_GET_PORT_PROPERTY:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_DISCONNECT_PORT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_POST_MESSAGE:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_POST_EVENT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case 0:
+		/*
+		 * 32 bit longhorn invokes hypercall with verb == 0; need to 
+		 * check with Mike (XXXKYS). For now ignore it.
+		 */
+		*retVal = 
+		nsBuildHcallRetVal(NS_STATUS_INVALID_HYPERCALL_CODE, 0);
+		return;
+	default:
+		nsXenVector.extPrintk("Unkown hypercall: verb is: %d\n", verb); 
+		*retVal = 
+		nsBuildHcallRetVal(NS_STATUS_INVALID_HYPERCALL_CODE, 0);
+		return;
+	}
+}
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.h	2008-03-03 14:03:49.000000000 -0500
@@ -0,0 +1,125 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * nshypercall.h
+ * Memory layouts for the various hypercalls supported. 
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef NS_HYPERCALL_H
+#define NS_HYPERCALL_H
+
+#include <xen/cpumask.h>
+
+
+typedef struct getVpRegistersInput {
+	u64	partitionId;
+	u64	vpIndex;
+	u32	regIndex;
+} getVpRegistersInput_t;
+
+typedef struct getVpRegistersOutput {
+	u64	lowValue;
+	u64	highValue;
+} getVpRegistersOutput_t;
+
+
+
+typedef struct setVpRegisterSpec {
+	u32	regName;
+	u32	pad;
+	u64	pad1;
+	u64	lowValue;
+	u64	highValue;
+} setVpRegisterSpec_t;
+typedef struct setVpRegistersInput {
+	u64	partitionId;
+	u64	vpIndex;
+	setVpRegisterSpec_t	regSpec;
+} setVpRegistersInput_t;
+
+
+typedef struct flushVa {
+	u64	asHandle;
+	u64	flags;
+	union  {
+		u64		processorMask;
+		cpumask_t 	vcpuMask;
+	} procMask;
+#define pMask 	procMask.processorMask
+#define vMask	procMask.vcpuMask
+	u64	gva;
+} flushVa_t;
+
+#define NS_FLUSH_ALL_PROCESSORS	0x00000001
+#define NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES 0x00000002
+#define NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY 0x00000004
+
+#define NS_ALL_AS	(-1)
+
+/*
+ * Hypercall verbs.
+ */
+
+#define NS_CREATE_PARTITION 	0x0010
+#define NS_INITIALIZE_PARTITION 0x0011
+#define NS_DELETE_PARTITION	0x0014
+#define NS_GET_PARTITION_PROPERTY 0x0017
+#define NS_SET_PARTITION_PROPERTY 0x0018
+#define NS_GET_PARTITION_ID	0x0015
+#define NS_GET_NEXT_CHILD_PARTITION 0x0016
+#define NS_SET_LOGICAL_PROCESSOR_RUN_TIME_GROUP 0x0005
+#define NS_CLEAR_LOGICAL_PROCESSOR_RUN_TIME_GROUP 0x0006
+#define NS_NOTIFY_LOGICAL_PROCESSOR_POWER_STATE	0x0007
+#define NS_GET_LOGICAL_PROCESSOR_RUN_TIME	0x0004
+#define NS_DEPOSIT_MEMORY	0x001C
+#define NS_WITHDRAW_MEMORY	0x001D
+#define NS_GET_MEMORY_BALANCE	0x001E
+#define NS_MAP_GPA_PAGES	0x001A
+#define NS_UNMAP_GPA_PAGES	0x001B
+#define NS_INSTALL_INTERCEPT	0x0019
+#define NS_CREATE_VP		0x001F
+#define NS_TERMINATE_VP		0x0020
+#define NS_DELETE_VP		0x0021
+#define NS_GET_NEXT_VP		0x0027
+#define NS_GET_VP_REGISTERS	0x0022
+#define NS_SET_VP_REGISTERS	0x0023
+#define NS_SWITCH_VA		0x0001
+#define NS_FLUSH_VA		0x0002
+#define NS_FLUSH_VA_LIST	0x0003
+#define NS_TRASLATE_VA		0x0024
+#define NS_READ_GPA		0x0025
+#define NS_WRITE_GPA		0x0026
+#define NS_ASSERT_VIRTUAL_INTERRUPT	0x002A
+#define NS_CLEAR_VIRTUAL_INTERRUPT	0x002C
+#define NS_CREATE_PORT			0x002D
+#define NS_DELETE_PORT			0x002E
+#define NS_CONNECT_PORT			0x002F
+#define NS_GET_PORT_PROPERTY		0x0031
+#define NS_DISCONNECT_PORT		0x0030
+#define NS_POST_MESSAGE			0x0032
+#define NS_POST_EVENT			0x0034
+
+#endif /* NS_HYPERCALL_H */
Index: xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/nsintercept.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-unstable.hg/xen/arch/x86/hvm/hvm_ext/novell/nsintercept.c	2008-03-04 18:38:32.000000000 -0500
@@ -0,0 +1,2094 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * nsintercept.c.
+ * This file implements the intercepts to support the  Novell Shim. 
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#include <asm/hvm/hvm_extensions.h>
+
+
+#include <asm/config.h>
+#include <asm/hvm/io.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/apicdef.h>
+#include <asm/regs.h>
+#include <asm/msr.h>
+
+#include <xen/string.h>
+#include <xen/init.h>
+#include <xen/compile.h>
+#include <xen/hvm/save.h>
+#include <public/sched.h>
+
+
+/*
+ * Local includes; extension specific.
+ */
+#include "ns_errno.h"
+#include "ns_shim.h"
+
+
+/*
+ * Implement Novell Shim.
+ */
+
+
+/*
+ * Hypervisor intercept vector.
+ */
+static int
+nsDomainCreate(struct domain *d);
+static void 
+nsDomainDestroy(struct domain *d);
+static int
+nsVcpuInitialize(struct vcpu *v);
+static void
+nsVcpuUp(struct vcpu *v);
+static void 
+nsVcpuDestroy(struct vcpu *v);
+static int
+nsDoCpuId(uint32_t input, struct cpu_user_regs *regs);
+static int
+nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs);
+static int
+nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs);
+static int
+nsDoHyperCall(struct cpu_user_regs *pregs);
+static void
+nsDoMigrateTimers(struct vcpu *v);
+
+extension_intercept_vector_t	nsExtensionVector = {
+	.domain_create = nsDomainCreate,
+	.domain_destroy = nsDomainDestroy,
+	.vcpu_initialize = nsVcpuInitialize,
+	.vcpu_destroy = nsVcpuDestroy,
+	.do_cpuid = nsDoCpuId,
+	.do_msr_read = nsDoRdMsr,
+	.do_msr_write = nsDoWrMsr,
+	.do_hypercall = nsDoHyperCall,
+	.do_continuation = nsDoTlbFlush, 
+	.do_migrate_timers = nsDoMigrateTimers,
+	.vcpu_up = nsVcpuUp 
+};
+
+/*
+ * Hooks into xen services; to be populated by our proxy in xen.
+ */
+
+xen_call_vector_t nsXenVector;
+
+static inline void
+nsInjectException(int trap);
+
+static inline void
+nsHypercallPageInitialize(void *hypercallPage,  nsPartition_t *curp);
+
+static inline void
+nsInitEventPage(void *siefPage);
+
+static inline void
+nsInitMessagePage(void *simPage);
+
+/*
+ * static int __init nsExtensionInit(void)
+ * Initialize the extensiom module.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static int __init nsExtensionInit(void)
+{
+	int retVal;
+	retVal = hvm_ext_register(1, &nsExtensionVector, &nsXenVector);
+	NS_ASSERT(retVal == 0);
+	nsXenVector.extPrintk("NS Extension Initialized\n");
+	return 0;
+}
+__initcall(nsExtensionInit);
+
+/*
+ * Our lock primitives.
+ */
+/*
+ * void 
+ * nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Acquire the specified lock.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+void 
+nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+{
+	NS_ASSERT(nsLock->owner != vcpup);
+	spin_lock_irqsave(&nsLock->spinLock, nsLock->flags);
+	nsLock->owner = vcpup;
+	nsLock->retAddr = __builtin_return_address(0);
+	vcpup->nsVcplockDepth++;
+}
+
+/*
+ * void 
+ * nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Release the specified spin lock.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+void 
+nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+{
+	NS_ASSERT((nsLock->owner == vcpup));
+	nsLock->owner = NULL;
+	vcpup->nsVcplockDepth--;
+	NS_ASSERT(vcpup->nsVcplockDepth >= 0);
+	spin_unlock_irqrestore(&nsLock->spinLock, nsLock->flags);
+}
+
+/*
+ * void 
+ * nsLockInit(nsSpinLock_t *nsLock)
+ * Initialize the specified spin lock.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+void 
+nsLockInit(nsSpinLock_t *nsLock)
+{
+	spin_lock_init(&nsLock->spinLock);
+	nsLock->owner = NULL;
+	nsLock->retAddr = NULL;
+}
+
+/*
+ * static inline void nsWriteGuestIdMsr(nsPartition_t *curp, 
+ *					nsVcpu_t      *curVcpu,
+ *					u64 msrContent)
+ *	Write the guest ID.
+ *
+ * Calling/Exit State:
+ * 	None.
+ */
+static inline void 
+nsWriteGuestIdMsr(nsPartition_t *curp, nsVcpu_t *curVcpu, u64 msrContent)
+{
+	curp->nsGuestIdMsr = msrContent;
+	if (curp->nsGuestIdMsr == 0) {
+		/*
+		 * Guest has cleared the guest ID;
+		 * clear the hypercall page.
+		 */
+		if (curp->nsHypercallMsr)  {
+			curVcpu->nsVcpuFlags &= ~NS_VCPU_UP;
+		}
+	}
+}
+
+/*
+ * static inline void nsWriteHypercallMsr(nsPartition_t *curp,
+ *					  nsVcpu_t	*curVcpu,
+ *					  u64		msrContent)
+ *	Write hypercall msr.
+ *
+ * Calling/Exit State:
+ * 	None.
+ */
+
+static inline void 
+nsWriteHypercallMsr(nsPartition_t *curp,
+		  nsVcpu_t	*curVcpu,
+		  u64		msrContent)
+{
+	unsigned long gmfn;
+	void	*hypercallPage;
+	struct domain	*d = curVcpu->nsVcpXenVcpu->domain;
+
+	nsLockAcquire(curVcpu, &curp->nsLock);
+	gmfn = (msrContent >> 12);
+	if (curp->nsGuestIdMsr == 0) {
+		/* Nothing to do if the guest is not registered*/
+		nsLockRelease(curVcpu, &curp->nsLock);
+		return;
+	}
+	/*
+	 * Guest is registered; see if we can turn-on the 
+	 * hypercall page.
+	 * XXXKYS: Can the guest write the GPA in one call and 
+	 * subsequently enable it? Check. For now assume that all the
+	 * info is specified in one call.
+	 */
+	if (((u32)msrContent & (0x00000001)) == 0) {	
+		/*
+		 * The client is not enabling the hypercall; just
+		 * ignore everything. 
+		 */
+		nsLockRelease(curVcpu, &curp->nsLock);
+		return;
+	}
+	hypercallPage = nsXenVector.extGetVirtFromGmfn(d,gmfn);
+	if (hypercallPage == NULL) {
+		/*
+		 * The guest specified a bogus GPA; inject a GP fault
+		 * into the guest.
+		 */
+		nsInjectException(TRAP_gp_fault);
+		nsLockRelease(curVcpu, &curp->nsLock);
+		return;
+	}
+	nsHypercallPageInitialize(hypercallPage, curp);
+	curp->nsHypercallMfn = nsXenVector.extGetMfnFromGmfn(d, gmfn);
+#ifdef CONFIG_DOMAIN_PAGE
+	nsXenVector.extUnmapDomainPage(hypercallPage);
+#endif
+	curp->nsHypercallMsr = msrContent;
+	nsLockRelease(curVcpu, &curp->nsLock);
+	curVcpu->nsVcpuFlags |= NS_VCPU_UP;
+}
+
+/*
+ * static inline void nsWriteSxMsr(uint32_t idx, nsPartition_t *curp,
+ *					  nsVcpu_t	*curVcpu,
+ *					  u64		msrContent)
+ *	Write SIEFP or SIMP  msr.
+ *
+ * Calling/Exit State:
+ * 	None.
+ */
+
+static inline void nsWriteSxMsr(uint32_t idx, nsPartition_t *curp,
+				  nsVcpu_t	*curVcpu,
+				  u64		msrContent)
+{
+	unsigned long gmfn;
+	void            *sxPage;
+	struct domain	*d = curVcpu->nsVcpXenVcpu->domain;
+	gmfn = (msrContent >> 12);
+	/*
+	 * Can the client enable the siefp and specify 
+	 * the base address in two 
+	 * different calls? XXXKYS: For now assume 
+	 * that it is done in one call.
+	 */
+	if (!((u32)msrContent & (0x00000001))) {	
+		/*
+		 * The client is not enabling the sx page; just
+		 * ignore everything. 
+		 */
+		return;
+	}
+	sxPage = nsXenVector.extGetVirtFromGmfn(d, gmfn);
+	if (sxPage == NULL) {
+		/*
+		 * The guest specified a bogus GPA; inject a GP fault
+		 * into the guest.
+		 */
+		nsInjectException(TRAP_gp_fault);
+		return;
+	}
+	switch (idx) {
+		case NS_MSR_SIEFP:
+			nsInitEventPage(sxPage);
+			curVcpu->nsVcpSIefpMsr = msrContent; 
+			curVcpu->nsVcpSiefPage = sxPage; 
+			break;
+		case NS_MSR_SIMP:
+			nsInitMessagePage(sxPage);
+			curVcpu->nsVcpSimpMsr = msrContent;
+			curVcpu->nsVcpSimPage = sxPage;
+			break;
+	}
+
+}
+
+/*
+ * Time this domain booted.
+ */
+s_time_t nsDomainBootTime;
+
+/*
+ * static inline u64
+ * nsGetTimeSinceDomainBoot(void)
+ * Retrieve the time since boot in 100ns units.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline u64
+nsGetTimeSinceDomainBoot(void)
+{
+	u64	curTime = nsXenVector.extGetTimeSinceBoot();
+	return ((curTime - nsDomainBootTime)/100) ;
+}
+
+/*
+ * static inline int
+ * nsCallFromBios(struct cpu_user_regs *regs)
+ * Check if the caller is in the right state to consumE the services of the 
+ * extension module.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline int
+nsCallFromBios(struct cpu_user_regs *regs)
+{
+	if (hvm_paging_enabled(current)) {
+		return (0);
+	} else {
+		return (1);
+	}
+}
+
+/*
+ * static inline void
+ * nsInjectException(int trap)
+ * Injecct the specified exception into the invoking virtual CPU.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+ 
+static inline void
+nsInjectException(int trap)
+{
+	nsXenVector.hvmFuncTable->inject_exception(trap, 0, 0);
+}
+
+
+/*
+ * static inline int
+ * nsOsRegistered(void)
+ * Check to see if the guest has registered itself with the Novell Shim.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+ 
+static inline int
+nsOsRegistered(void)
+{
+	nsPartition_t	*curp = nsGetCurrentPartition();
+	return (curp->nsGuestIdMsr != 0?1:0);
+}
+
+
+/*
+ * static inline void 
+ * nsSetPartitionPrivileges(nsPartition_t *nspp)
+ * Set the partitionwide privileges. Currently it is harcoded.
+ * We could perhaps make this an attribute of the domain and have the
+ * configuration tools manage it.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline void 
+nsSetPartitionPrivileges(nsPartition_t *nspp)
+{
+	/*
+	 * This is based on the hypervisor spec under section 5.2.3. 
+	 */
+	nspp->nsPrivileges = 0x000000020000007f;
+}
+
+/*
+ * static inline u32
+ * nsGetRecommendations(void)
+ * Get the recommendations.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline u32
+nsGetRecommendations(void)
+{
+	/*
+	 *For now we recommend all the features. Need to validate.
+	 */
+	if ( paging_mode_hap(current->domain)) {
+		/*
+		 * If HAP is enabled; the guest should not use TLB flush
+		 * related enlightenments.
+		 */
+		return (0x19);
+	} else {
+		return (0x1f); 
+	}
+}
+
+/*
+ * static inline void 
+ * nsSetPartitionFeatures(nsPartition_t *nspp)
+ * Set the partitionwide features. Currently it is harcoded.
+ * We could perhaps make this an attribute of the domain and have the
+ * configuration tools manage it.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline void 
+nsSetPartitionFeatures(nsPartition_t *nspp)
+{
+	nspp->nsSupportedFeatures = 0x1f;
+}
+
+static inline u16 
+nsGetGuestMajor(void)
+{
+	return (0);
+}
+static inline u16
+nsGetGuestMinor(void)
+{
+	return (0);
+}
+static inline u32
+nsGetGuestServicePack(void)
+{
+	return (0);
+}
+ 
+static inline u8 
+nsGetGuestServiceBranchInfo(void)
+{
+	return (0);
+}
+static inline u32 
+nsGetGuestServiceNumber(void)
+{
+	return (0);
+}
+
+/*
+ * static inline u32
+ * nsGetSupportedSyntheticMsrs(void)
+ * Get the synthetic MSRs supported by the Novell Shim. Currently
+ * it is hardcoded.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline u32
+nsGetSupportedSyntheticMsrs(void)
+{
+	/*
+	 * All MSRS in the spec version 0.83 including RESET MSR. 
+	 */
+	return (0xff);
+}
+
+
+/*
+ * static inline u32
+ * nsGetMaxVcpusSupported(void)
+ * Retrieve the maximum vcpus supported.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline u32
+nsGetMaxVcpusSupported(void)
+{
+	return MAX_VIRT_CPUS;
+}
+
+/*
+ * static inline u32
+ * nsGetMaxLcpusSupported(void)
+ * Retrieve the maximum physical cpus supported.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline u32
+nsGetMaxLcpusSupported(void)
+{
+	return NR_CPUS;
+}
+
+
+/*
+ * static inline void
+ * nsReadIcr(u64 *icrContent)
+ * Read the ICR of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsReadIcr(u64 *icrContent)
+{
+	u32	icrLow, icrHigh;
+	u64	retVal;
+
+
+	icrLow = nsXenVector.mmIoHandler->read_handler(current, 
+		 (vlapic_base_address(vcpu_vlapic(current)) + 0x300), 4);
+	icrHigh = nsXenVector.mmIoHandler->read_handler(current, 
+		 (vlapic_base_address(vcpu_vlapic(current)) + 0x310), 4);
+	retVal = icrHigh;
+	*icrContent = ((retVal << 32) | icrLow);
+
+}
+
+/*
+ * static inline void
+ * nsReadTpr(u64 *tprContent)
+ * Read the TPR of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsReadTpr(u64 *tprContent)
+{
+	u32	tprLow;
+
+
+	tprLow = nsXenVector.mmIoHandler->read_handler(current, 
+		 (vlapic_base_address(vcpu_vlapic(current)) + 0x80), 4);
+	*tprContent = (u64)tprLow;
+
+}
+
+/*
+ * static inline void
+ * nsWriteEoi(u64 msrContent)
+ * Write the EOI register of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsWriteEoi(u64 msrContent)
+{
+	u32 eoi = (u32)msrContent;
+
+	nsXenVector.mmIoHandler->write_handler(current, 
+		 (vlapic_base_address(vcpu_vlapic(current)) + 0xb0), 4, eoi);
+
+}
+
+/*
+ * static inline void
+ * nsWriteIcr(u64 msrContent)
+ * Write the ICR register of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsWriteIcr(u64 msrContent)
+{
+	u32	icrLow, icrHigh;
+	icrLow = (u32)msrContent;
+	icrHigh = (u32)(msrContent >> 32);
+
+	if (icrHigh != 0) {
+		nsXenVector.mmIoHandler->write_handler(current, 
+		 (vlapic_base_address(vcpu_vlapic(current)) + 0x310), 4, 
+		icrHigh);
+	}
+	if (icrLow != 0) {
+		nsXenVector.mmIoHandler->write_handler(current, 
+		 (vlapic_base_address(vcpu_vlapic(current)) + 0x300), 4, 
+		icrLow);
+	}
+
+}
+
+/*
+ * static inline void
+ * nsWriteTpr(u64 msrContent)
+ * Write the TPR register of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsWriteTpr(u64 msrContent)
+{
+	u32 tpr = (u32)msrContent;
+
+
+	nsXenVector.mmIoHandler->write_handler(current, 
+		 (vlapic_base_address(vcpu_vlapic(current)) + 0x80), 4, tpr);
+
+}
+
+/*
+ * static inline void
+ * nsHypercallPageInitialize(void *hypercallPage,  nsPartition_t *curp)
+ * Initialize the hypercall page to support the Novell Shim Hypercalls.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsHypercallPageInitialize(void *hypercallPage, nsPartition_t *curp)
+{
+	char *p;
+
+	if (nsXenVector.hvmFuncTable->guest_x86_mode(current) == 8) {
+		curp->nsLongModeGuest = 1;
+	} else {
+		curp->nsLongModeGuest = 0;
+	}
+
+	memset(hypercallPage, 0, PAGE_SIZE);
+	p = (char *)(hypercallPage) ;
+	*(u8  *)(p + 0) = 0x0f; /* vmcall */
+       	*(u8  *)(p + 1) = 0x01;
+	if (nsXenVector.extCpuIsIntel()) {
+       		*(u8  *)(p + 2) = 0xc1;
+	} else { 
+       		*(u8  *)(p + 2) = 0xd9;
+	}
+       	*(u8  *)(p + 3) = 0xc3; /* ret */
+}
+
+/*
+ * static inline void
+ * nsInitEventPage(void *siefPage)
+ * Initialize the per-vcpu event page.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsInitEventPage(void *siefPage)
+{
+	memset(siefPage, 0, PAGE_SIZE);
+}
+
+/*
+ * static inline void
+ * nsInitMessagePage(void *siefPage)
+ * Initialize the per-vcpu message page.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsInitMessagePage(void *simPage)
+{
+	memset(simPage, 0, PAGE_SIZE);
+}
+
+
+/*
+ * static inline void
+ * nsProcessMessageQ(nsPartition_t *curp, nsVcpu_t *curVcpu)
+ * Process the message queue.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsProcessMessageQ(nsPartition_t *curp, nsVcpu_t *curVcpu)
+{
+	/*
+	 * XXXKYS: we currently do not support queued messages.
+	 */
+}
+
+/*
+ * static inline void
+ * nsScheduleTimeOut(nsVcpTimerState_t *timer) 
+ * Schedule a timeout based on the specified timer.
+ *
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsScheduleTimeOut(nsVcpTimerState_t *timer) 
+{
+	/*
+	 * We maintain the count in the units of 100ns. Furthermore,
+	 * this is not relative to NOW() but rather absolute.
+	 */
+	nsXenVector.extSetTimer(&timer->vcpuTimer, (timer->count * 100));
+}
+
+/*
+ * static void
+ * nsTimeOutHandler(void *arg)
+ * The timeout handler for Novell Shim/Adaptor.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static void
+nsTimeOutHandler(void *arg)
+{
+	nsVcpTimerState_t	*timerData = arg;
+	nsVcpu_t	*curVcpu = timerData->thisCpu;
+	int		sIntNum;
+	int		vector;
+	if (!(curVcpu->nsVcpSControlMsr & 0x9)) {
+		goto nsToPostProcess;
+	}
+	/*
+	 * SynIC is enabled; do further processing. Timeouts are posted as
+	 * messages; verify if the message page is enabled.
+	 */
+	if (!(curVcpu->nsVcpSimpMsr & 0x1)) {
+		goto nsToPostProcess;
+	}
+	sIntNum = (((u32)(timerData->config >> 16)) & 0x0000000f);
+	/*
+	 * First post the message and then optionally deal with the 
+	 * interrupt notification.
+	 */
+	if (curVcpu->nsVcpSimPage == NULL) {
+		NS_PANIC("Novell Shim: Sim page not setup\n");
+	}
+	if ((((nsMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageType !=
+		nsMessageTypeNone) {
+		/*
+		 * The message slot is not empty just silently return.
+		 */
+		goto nsToPostProcess;
+	}
+	/*
+	 * The slot is available; post the message.
+	 */
+	(((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageType = 
+	nsMessageTimerExpired;
+	(((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageSize = 
+	sizeof(nsTimerMessage_t);
+	(((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).timerIndex = 
+	timerData->timerIndex;
+	(((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).expirationTime = 
+	timerData->count;
+	if ((curVcpu->nsVcpSIntMsr[sIntNum] >> 16) &0x1) {
+		/*
+		 * The designated sintx register is masked; just return.
+		 */
+		goto nsToPostProcess;
+	}
+	vector = ((u32)curVcpu->nsVcpSIntMsr[sIntNum] &0xff);
+
+	/*
+	 * Now post the interrupt to the VCPU.
+	 * XXXKYS: What is the delivery mode for interrupts delivered here.
+	 * Check with Mike?
+	 */
+	nsXenVector.extPostInterrupt(current, vector, APIC_DM_FIXED);
+	
+	/*
+	 * If auto eoi is set; deal with that.
+	 */
+	if (((u32)(curVcpu->nsVcpSIntMsr[sIntNum] >> 16)) & 0x1) {
+		nsWriteEoi(0);
+	}
+
+nsToPostProcess:
+	/*
+	 * Prior to returning, deal with all the post timeout issues.
+	 */
+	if (((u32)(timerData->config))  & 0x00000002) {
+		NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats);
+		nsScheduleTimeOut(timerData);
+	}
+}
+
+/*
+ * static inline void
+ * nsTimerInit(nsVcpu_t *vcpup, int timer)
+ * Initialize the specified timer structure.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline void
+nsTimerInit(nsVcpu_t *vcpup, int timer)
+{
+	vcpup->nsVcpTimers[timer].config = 0;
+	vcpup->nsVcpTimers[timer].count = 0;
+	vcpup->nsVcpTimers[timer].thisCpu = vcpup;
+	vcpup->nsVcpTimers[timer].timerIndex = timer;
+	init_timer(&vcpup->nsVcpTimers[timer].vcpuTimer, nsTimeOutHandler, 
+		&vcpup->nsVcpTimers[timer], current->processor);
+}
+
+/*
+ * static inline int
+ * nsAccessTimeRefCnt(nsPartition_t *curp, u64 *msrContent)
+ * Read the per-partition time base.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline int
+nsAccessTimeRefCnt(nsPartition_t *curp, u64 *msrContent)
+{
+	if (!nsPrivilegeCheck(curp, NS_ACCESS_TIME_REF_CNT)) {
+		/*
+		 * The partition does not have the privilege to
+		 * read this; return error.
+		 */
+		return (0);
+	}
+	*msrContent = nsGetTimeSinceDomainBoot();
+	return (1);
+}
+
+/*
+ * static void
+ * nsDoMigrateTimers(struct vcpu *v)
+ * The binding between this vcpu and the physical cpu has changed; migrate 
+ * the timers for this vcpu.
+ *
+ * Calling/Exit State:
+ *	The new binding is already in place.
+ */
+
+static void
+nsDoMigrateTimers(struct vcpu *v)
+{
+	nsPartition_t	*curp = nsGetCurrentPartition();
+	nsVcpu_t        *vcpup;
+	int i;
+	vcpup  =  &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+
+	for (i=0; i<4; i++) {
+		nsXenVector.extMigrateTimer(&vcpup->nsVcpTimers[i].vcpuTimer,
+					    v->processor);
+	}
+}
+		
+/*
+ * static void
+ * nsVcpuUp(struct vcpu *v)
+ * A secondary processor has come on line; mark the processor as up.
+ *
+ * Calling/Exit State:
+ *	None.	
+ */
+
+static void
+nsVcpuUp(struct vcpu *v)
+{
+	nsPartition_t	*curp = nsGetCurrentPartition();
+	nsVcpu_t        *vcpup;
+	vcpup  =  &curp->nsVcpuState[v->vcpu_id];
+	vcpup->nsVcpuFlags |= NS_VCPU_UP;
+}
+		
+/*
+ * static int
+ * nsDoHyperCall(struct cpu_user_regs *pregs)
+ * Intercept for implementing Extension hypercalls.
+ *
+ * Calling/Exit State:
+ *	Based on the hypercall; the caller may give up the CPU while
+ *	processing the hypercall. No locks should be held on entry and
+ *	no locks will be held on return.
+ *
+ *
+ */
+
+static int
+nsDoHyperCall(struct cpu_user_regs *pregs)
+{
+	nsPartition_t	*curp = nsGetCurrentPartition();
+	nsVcpu_t        *vcpup;
+	int	longModeGuest = curp->nsLongModeGuest;
+	unsigned long hypercallMfn;
+	unsigned long gmfn;
+	gmfn = (curp->nsHypercallMsr >> 12);
+
+	hypercallMfn = nsXenVector.extGetMfnFromGva(pregs->eip);
+
+	if (hypercallMfn == curp->nsHypercallMfn) {
+		u64	opcode, input, output, retVal;
+		vcpup  =  &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+
+		/* 
+		 * This is an extension hypercall; process it; but first make
+		 * sure that the CPU is in the right state for invoking
+		 * the hypercall - protected mode at CPL 0.
+		 */
+		if (nsInvalidCpuState())  {
+			nsInjectException(TRAP_gp_fault);
+                	retVal = nsBuildHcallRetVal(NS_STATUS_INVALID_VP_STATE, 
+						  0);
+			nsSetSysCallRetVal(pregs, longModeGuest, retVal);
+			return (1);
+		}
+		if (longModeGuest) {
+			opcode = pregs->ecx;
+			input = pregs->edx;
+			output = pregs->r8;
+		} else {
+			opcode = 
+			((((u64)pregs->edx) << 32) | ((u64)pregs->eax));
+			input = 
+			((((u64)pregs->ebx) << 32) | ((u64)pregs->ecx));
+			output = 
+			((((u64)pregs->edi) << 32) | ((u64)pregs->esi));
+		}
+		NS_ASSERT(vcpup->nsVcplockDepth == 0);
+		nsHandleHyperCall(opcode, input, output, &retVal); 
+		nsSetSysCallRetVal(pregs, longModeGuest, retVal);
+		NS_ASSERT(vcpup->nsVcplockDepth == 0);
+		return (1);
+	}
+	/*
+	 * This hypercall page is not the page for extension.
+	 */
+	return (0);
+}
+
+/*
+ * static int 
+ * nsDomainCreate(struct domain *d)
+ * NS intercept for domain creation.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+ 
+	 
+static int 
+nsDomainCreate(struct domain *d)
+{
+	nsPartition_t	*nspp;
+	nspp = nsXenVector.extAllocMem(sizeof(nsPartition_t));
+	if (nspp == NULL) {
+		nsDebugPrint("Memory allocation failed\n");
+		return (1);
+	}
+	memset(nspp, 0, sizeof(*nspp));
+	nsLockInit(&nspp->nsLock);
+	/*
+	 * Set the partition wide privilege; We can start with no privileges 
+	 * and progressively turn on fancier hypervisor features.
+	 */
+	nsSetPartitionPrivileges(nspp);
+	nsSetPartitionFeatures(nspp);
+	/*
+	 * Stash away pointer to our state in the hvm domain structure.
+	 */
+	d->arch.hvm_domain.ext_handle = nspp;
+	nsDomainBootTime = nsXenVector.extGetTimeSinceBoot();
+	return (0);
+}
+
+
+
+/*
+ * static void 
+ * nsDomainDestroy(struct domain *d)
+ * NS intercept for the domain destruction.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void 
+nsDomainDestroy(struct domain *d)
+{
+	int i;
+	nsPartition_t *curp = d->arch.hvm_domain.ext_handle;
+	nsXenVector.extPrintk("NS Domain Being Destroyed\n");
+	NS_ASSERT(curp != NULL);
+	nsXenVector.extPrintk("DUMP STATS\n");
+	nsXenVector.extPrintk("GFS cpucount is %d\n", curp->nsFlushState.cpuCount);
+	if (curp->nsFlushState.currentOwner != NULL) {
+		nsXenVector.extPrintk("GFS owner  is %d\n", curp->nsFlushState.currentOwner->vcpu_id);
+	} else {
+		nsXenVector.extPrintk("GFS is free\n");
+	}
+	if (!cpus_empty(curp->nsFlushState.waiters)) {
+		nsXenVector.extPrintk("GFS: waiters not empty\n");
+	} else {
+		nsXenVector.extPrintk("GFS: waiters  empty\n");
+	}
+	for (i=0; i < MAX_VIRT_CPUS; i++) {
+		if (d->vcpu[i] != NULL) {
+			nsPrintStats(curp, i);
+		}
+	}
+	
+	nsXenVector.extFreeMem(d->arch.hvm_domain.ext_handle);	
+	d->arch.hvm_domain.ext_handle = NULL;
+}
+
+/*
+ * static int
+ * nsVcpuInitialize(struct vcpu *v)
+ * NS intercept for vcpu creation.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+ 
+static int
+nsVcpuInitialize(struct vcpu *v)
+{
+	nsVcpu_t	*vcpup;
+	nsPartition_t	*curp = v->domain->arch.hvm_domain.ext_handle;
+	int		i;
+	vcpup = &curp->nsVcpuState[v->vcpu_id];
+	atomic_inc(&curp->nsNumVcpusActive);
+	if (v->vcpu_id == 0) {
+		vcpup->nsVcpuFlags |= NS_VCPU_BOOT_CPU;
+	}
+	/*
+	 * Initialize all the synthetic MSRs corresponding to this VCPU. 
+	 * Note that all state is set to 0 to begin 
+	 * with.
+	 */
+	vcpup->nsVcpSVersionMsr = 0x00000001;
+	/*
+	 * Initialize the synthetic timet structures.
+	 */
+	for (i=0; i < 4; i++) {
+		nsTimerInit(vcpup, i);
+	}
+	/*
+	 * Setup the input page for handling hypercalls.
+	 *
+	 */
+	vcpup->nsVcpInputBufferPage = 	
+	nsXenVector.extAllocDomHeapPage();	
+	if (vcpup->nsVcpInputBufferPage == NULL) {
+		nsDebugPrint("Memory allocation failed\n");
+		return (1);
+	}
+	vcpup->nsVcpInputBuffer =
+	nsXenVector.extGetVirtFromPagePtr(vcpup->nsVcpInputBufferPage);	
+	if (vcpup->nsVcpInputBuffer == NULL) {
+		nsDebugPrint("Coud not get VA\n");
+		nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);	
+		return (1);
+	}
+	memset(vcpup->nsVcpInputBuffer, 0, PAGE_SIZE); 
+	vcpup->nsVcpOutputBufferPage = 	
+	nsXenVector.extAllocDomHeapPage();	
+	if (vcpup->nsVcpOutputBufferPage == NULL) {
+		nsDebugPrint("Memory allocation failed\n");
+#ifdef CONFIG_DOMAIN_PAGE
+		nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer);
+#endif
+		nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);	
+		return (1);
+	}
+	vcpup->nsVcpOutputBuffer =
+	nsXenVector.extGetVirtFromPagePtr(vcpup->nsVcpOutputBufferPage);	
+	if (vcpup->nsVcpOutputBuffer == NULL) {
+		nsDebugPrint("Coud not get VA\n");
+		nsXenVector.extFreeDomHeapPage(vcpup->nsVcpOutputBufferPage);	
+#ifdef CONFIG_DOMAIN_PAGE
+		nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer);
+#endif
+		nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);	
+		return (1);
+	}
+	vcpup->nsVcpXenVcpu = v; 
+	vcpup->nsVcpFlushRequest = 0;
+
+	return (0);
+}
+
+/*
+ * static void 
+ * nsVcpuDestroy(struct vcpu *v)
+ * NS intercept for domain destruction.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void 
+nsVcpuDestroy(struct vcpu *v)
+{
+	nsVcpu_t	*vcpup;
+	nsPartition_t	*curp = v->domain->arch.hvm_domain.ext_handle;
+	int 		i;
+
+	vcpup = &curp->nsVcpuState[v->vcpu_id];
+	atomic_dec(&curp->nsNumVcpusActive);
+	vcpup->nsVcpuFlags &= ~NS_VCPU_UP;
+	/*
+	 * Get rid of the pages we have allocated for this VCPU.
+	 */
+#ifdef CONFIG_DOMAIN_PAGE
+	nsXenVector.extUnmapDomainPage(vcpup->nsVcpSiefPage);
+	nsXenVector.extUnmapDomainPage(vcpup->nsVcpSimPage);
+	nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer);
+	nsXenVector.extUnmapDomainPage(vcpup->nsVcpOutputBuffer);
+#endif
+
+	nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);	
+	nsXenVector.extFreeDomHeapPage(vcpup->nsVcpOutputBufferPage);	
+	/*
+	 * Kill the timers 
+	 */
+	for (i=0; i < 4; i++) {
+		nsXenVector.extKillTimer(&vcpup->nsVcpTimers[i].vcpuTimer);
+	}
+	return;
+}
+
+/*
+ * static int nsVcpuSave(struct domain *d, hvm_domain_context_t *h)
+ *	Save per-cpu shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ *	None.
+ */
+static int 
+nsVcpuSave(struct domain *d, hvm_domain_context_t *h)
+{
+	struct vcpu *v;
+	struct hvm_ns_veridian_cpu ctxt;
+
+	nsVcpu_t	*vcpup;
+	nsPartition_t	*curp = d->arch.hvm_domain.ext_handle;
+	int i;
+
+	if (curp == NULL) {
+		return 0;
+	}
+	for_each_vcpu(d, v) {
+		vcpup = &curp->nsVcpuState[v->vcpu_id];
+	
+		NS_ASSERT(vcpup->nsVcplockDepth == 0);
+		NS_ASSERT(vcpup->nsVcpFlushRequest == 0);
+		NS_ASSERT(vcpup->nsVcpWaitingOnGFS == 0);
+		NS_ASSERT(vcpup->nsVcpFlushPending == 0);
+		NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0);
+        	/* 
+	 	 * We don't need to save state for a 
+		 * vcpu that is down; the restore
+         	 * code will leave it down if there is nothing saved. 
+	 	 */
+        	if ( test_bit(_VPF_down, &v->pause_flags) )
+            		continue;
+		ctxt.control_msr = vcpup->nsVcpSControlMsr;
+		ctxt.version_msr = vcpup->nsVcpSVersionMsr;
+		ctxt.sief_msr = vcpup->nsVcpSIefpMsr;
+		ctxt.simp_msr = vcpup->nsVcpSimpMsr;
+		ctxt.eom_msr = vcpup->nsVcpEomMsr;
+		for (i=0; i < 16; i++)
+			ctxt.int_msr[i] = vcpup->nsVcpSIntMsr[i];
+		for (i=0; i < 4; i++) {
+			ctxt.timers[i].config = vcpup->nsVcpTimers[i].config;
+			/*
+			 * Save the count in units of 100ns relative to NOW()
+			 * When we restore we will add NOW() to properly
+			 * account for the elapsed time when the timer was
+			 * active.
+			 */ 
+			if (vcpup->nsVcpTimers[i].count > ((NOW())/100)) {
+				ctxt.timers[i].count = 
+				(vcpup->nsVcpTimers[i].count - ((NOW())/100));
+			} else {
+				ctxt.timers[i].count = 0;
+			} 
+		}
+		if ( hvm_save_entry(NS_VERIDIAN_CPU, 
+			v->vcpu_id, h, &ctxt) != 0 )
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * static int nsVcpuRestore(struct domain *d, hvm_domain_context_t *h)
+ *	Restore per-cpu shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ *	None.
+ */
+static int 
+nsVcpuRestore(struct domain *d, hvm_domain_context_t *h)
+{
+	int vcpuid, i;
+	struct hvm_ns_veridian_cpu ctxt;
+
+	nsVcpu_t	*vcpup;
+	nsPartition_t	*curp = d->arch.hvm_domain.ext_handle;
+
+	if (curp == NULL) {
+		return 0;
+	}
+	/* Which vcpu is this? */
+	vcpuid = hvm_load_instance(h);
+	vcpup = &curp->nsVcpuState[vcpuid];
+	NS_ASSERT(vcpup != NULL);
+	if ( hvm_load_entry(NS_VERIDIAN_CPU, h, &ctxt) != 0 )
+        	return -22;
+
+	vcpup->nsVcpSControlMsr = ctxt.control_msr;
+	vcpup->nsVcpSVersionMsr = ctxt.version_msr;
+
+	nsWriteSxMsr(NS_MSR_SIEFP, curp, vcpup, ctxt.sief_msr); 
+	nsWriteSxMsr(NS_MSR_SIMP, curp, vcpup, ctxt.simp_msr); 
+
+	vcpup->nsVcpEomMsr = ctxt.eom_msr;
+	for (i=0; i<16; i++)
+		vcpup->nsVcpSIntMsr[i] = ctxt.int_msr[i];
+	for (i=0; i < 4; i++) {
+		vcpup->nsVcpTimers[i].config = ctxt.timers[i].config;
+		vcpup->nsVcpTimers[i].count = 
+		(ctxt.timers[i].count + ((NOW())/100)); 
+		if ((vcpup->nsVcpTimers[i].config | 0x9)) {
+			/*
+			 * XXXKYS: Some issues with regards to time
+			 * management here:
+			 * 1) We will ignore the elapsed wall clock time
+			 *    when the domain was not running.
+			 * 2) Clearly we should account fot the time that 
+			 *    has elapsed when the domain was running with 
+			 *    respect to the timeouts that were scheduled
+			 *    prior to saving the domain.
+			 * We will deal with on the save side.
+			 */ 
+			nsScheduleTimeOut(&vcpup->nsVcpTimers[i]); 
+			NS_STATS_COLLECT(NS_TIMEOUTS, &vcpup->nsVcpStats);
+		}
+	}
+
+	vcpup->nsVcpuFlags |=  NS_VCPU_UP;
+	return 0;
+}
+
+
+
+/*
+ * static int nsDomSave(struct domain *d, hvm_domain_context_t *h)
+ *	Save per-domain shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ *	None.
+ */
+
+static int 
+nsDomSave(struct domain *d, hvm_domain_context_t *h)
+{
+	struct hvm_ns_veridian_dom ctxt;
+	nsPartition_t	*curp = d->arch.hvm_domain.ext_handle;
+
+	if (curp == NULL) {
+		return 0;
+	}
+
+	ctxt.guestid_msr = curp->nsGuestIdMsr;
+	ctxt.hypercall_msr = curp->nsHypercallMsr;
+	ctxt.long_mode = curp->nsLongModeGuest;
+	ctxt.pad0 = 0;
+	return (hvm_save_entry(NS_VERIDIAN_DOM, 0, h, &ctxt)); 
+}
+
+/*
+ * static int nsDomRestore(struct domain *d, hvm_domain_context_t *h)
+ *	Restore per-domain shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ *	None.
+ */
+
+static int 
+nsDomRestore(struct domain *d, hvm_domain_context_t *h)
+{
+	struct hvm_ns_veridian_dom ctxt;
+	nsPartition_t	*curp = d->arch.hvm_domain.ext_handle;
+
+	if (curp == NULL) {
+		return 0;
+	}
+
+	if ( hvm_load_entry(NS_VERIDIAN_DOM, h, &ctxt) != 0 )
+        	return -22;
+	curp->nsGuestIdMsr = ctxt.guestid_msr;
+	curp->nsHypercallMsr = ctxt.hypercall_msr;
+	curp->nsLongModeGuest = ctxt.long_mode;
+	curp->nsHypercallMfn =
+	nsXenVector.extGetMfnFromGmfn(d, (ctxt.hypercall_msr >> 12));
+	
+	return 0; 
+}
+
+HVM_REGISTER_SAVE_RESTORE(NS_VERIDIAN_DOM, nsDomSave, nsDomRestore,
+                          1, HVMSR_PER_DOM);
+	
+
+HVM_REGISTER_SAVE_RESTORE(NS_VERIDIAN_CPU, nsVcpuSave , nsVcpuRestore,
+                          1, HVMSR_PER_VCPU);
+
+
+/*
+ * static int
+ * nsPreProcessCpuIdLeaves(unsigned int input, struct cpu_user_regs *regs)
+ *
+ * Preprocess cpuid leaves. Both xen and Veridian use identical cpuid 
+ * leaves for getting info from the hypervisor. 
+ *
+ * Calling exit state:
+ *	None.
+ */
+static int
+nsPreProcessCpuIdLeaves(unsigned int input, struct cpu_user_regs *regs)
+{
+	uint32_t idx;
+	struct domain	*d = current->domain;
+	int	extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
+
+	if (extid == 1) {
+		/*
+		 * Enlightened Windows guest; need to remap and handle 
+		 * leaves used by PV front-end drivers.
+		 */
+		if ((input >= 0x40000000) && (input <= 0x40000005)) {
+			return (0);
+		}
+		/*
+	 	 * PV drivers use cpuid to query the hypervisor for details. On
+	 	 * Windows we will use the following leaves for this:
+		 *
+		 * 4096: VMM Sinature (corresponds to 0x40000000 on Linux)
+		 * 4097: VMM Version (corresponds to 0x40000001 on Linux)
+		 * 4098: Hypercall details (corresponds to 0x40000002 on Linux)
+		 */
+		if ((input >= 0x40001000) && (input <= 0x40001002)) {
+			idx = (input - 0x40001000);
+			switch (idx) {
+			case 0:
+				regs->eax = 0x40000002; /* Largest leaf  */
+				regs->ebx = 0x566e6558;/*Signature 1: "XenV" */
+				regs->ecx = 0x65584d4d; /*Signature 2: "MMXe" */
+				regs->edx = 0x4d4d566e; /*Signature 3: "nVMM"*/
+				break;
+			case 1:
+				regs->eax = 
+				(XEN_VERSION << 16) | 
+				XEN_SUBVERSION;
+				regs->ebx = 0;          /* Reserved */
+				regs->ecx = 0;          /* Reserved */
+				regs->edx = 0;          /* Reserved */
+				break;
+
+			case 2:
+				regs->eax = 1; /*Number of hypercall-transfer pages*/
+				/*In linux this is 0x40000000 */
+				regs->ebx = 0x40001000; /* MSR base address */
+				regs->ecx = 0;          /* Features 1 */
+				regs->edx = 0;          /* Features 2 */
+				break;
+			}
+		}
+		return (1);
+	} else {
+		/*
+		 * For now this is all other "enlightened guests"
+		 */
+		if ((input >= 0x40000000) && (input <= 0x40000002)) {
+			/*
+			 * These leaves have already been correctly
+			 * processed; just return.
+			 */
+			return (1);
+		} 
+		return (0);
+	}
+}
+
+/*
+ * static int 
+ * nsDoCpuId(unsigned int input, struct cpu_user_regs *regs)
+ * NS intercept for cpuid instruction
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static int 
+nsDoCpuId(unsigned int input, struct cpu_user_regs *regs)
+{
+	uint32_t idx;
+
+	/*
+	 * hvmloader uses cpuid to set up a hypercall page; we don't want to
+	 * intercept calls coming from the bootstrap (bios) code in the HVM 
+	 * guest; we discriminate based on the instruction pointer.
+	 */
+	if (nsCallFromBios(regs)) {
+		/*
+		 * We don't intercept this.
+		 */
+		return (0);
+	}
+		
+	if (input == 0x00000001) { 
+		regs->ecx = (regs->ecx | 0x80000000);
+		return (1);
+	} 
+
+	if (nsPreProcessCpuIdLeaves(input, regs)) {
+		return (0);
+	}
+	idx = (input - 0x40000000);
+
+	switch (idx) {
+	case 0:
+		/*
+		 * 0x40000000: Hypervisor identification. 
+		 */
+		regs->eax = 0x40000005; /* For now clamp this */
+		regs->ebx = 0x65766f4e; /* "Nove" */ 
+		regs->ecx = 0x68536c6c; /* "llSh" */
+		regs->edx = 0x76486d69; /* "imHv" */ 
+		break;
+
+	case 1:
+		/*
+		 * 0x40000001: Hypervisor identification. 
+		 */
+		regs->eax = 0x31237648; /* "Hv#1*/
+		regs->ebx = 0; /* Reserved */ 
+		regs->ecx = 0; /* Reserved */
+		regs->edx = 0; /* Reserved */ 
+		break;
+	case 2:
+		/*
+		 * 0x40000002: Guest Info 
+		 */
+		if (nsOsRegistered()) {
+			regs->eax = nsGetGuestMajor();
+			regs->ebx = 
+			(nsGetGuestMajor() << 16) | nsGetGuestMinor();
+			regs->ecx = nsGetGuestServicePack();
+			regs->edx = 
+			(nsGetGuestServiceBranchInfo() << 24) |
+			nsGetGuestServiceNumber();
+		} else {
+			regs->eax = 0;
+			regs->ebx = 0;
+			regs->ecx = 0;
+			regs->edx = 0;
+		}
+		break;
+	case 3:
+		/*
+		 * 0x40000003: Feature identification.
+		 */
+		regs->eax = nsGetSupportedSyntheticMsrs();
+		/* We only support AcessSelfPartitionId bit 1 */
+		regs->ebx = 0x2; 
+		regs->ecx = 0; /* Reserved */
+		regs->edx = 0; /*No MWAIT (bit 0), No debugging (bit 1)*/
+		break;
+	case 4:
+		/*
+		 * 0x40000004: Imlementation recommendations.
+		 */
+		regs->eax = nsGetRecommendations();
+		regs->ebx = 0; /* Reserved */
+		regs->ecx = 0; /* Reserved */
+		regs->edx = 0; /* Reserved */
+		break;
+	case 5:
+		/*
+		 * 0x40000005: Implementation limits.
+		 * Currently we retrieve maximum number of vcpus and 
+		 * logical processors (hardware threads) supported.
+		 */
+		regs->eax = nsGetMaxVcpusSupported();
+		regs->ebx = nsGetMaxLcpusSupported();
+		regs->ecx = 0; /* Reserved */
+		regs->edx = 0; /* Reserved */
+		break;
+
+	default:
+		/*
+		 * We don't handle this leaf.
+		 */
+		return (0);
+
+	}
+	return (1);
+}
+
+/*
+ * static int
+ * nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs)
+ * NS intercept for reading MSRS.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+	
+static int
+nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs)
+{
+	nsPartition_t	*curp = nsGetCurrentPartition();
+	unsigned int	vcpuIndex = nsGetCurrentVcpuIndex();
+	u64 msrContent = 0;
+	nsVcpu_t	*curVcpu = &curp->nsVcpuState[vcpuIndex];
+	int		synInt, timer;
+	struct domain	*d = current->domain;
+	int	extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
+	u64		timerCount;
+
+	/*
+	 * hvmloader uses rdmsr; we don't want to
+	 * intercept calls coming from the bootstrap (bios) code in the HVM 
+	 * guest; we descriminate based on the instruction pointer.
+	 */
+	if (nsCallFromBios(regs)) {
+		/*
+		 * We don't intercept this.
+		 */
+		return (0);
+	}
+	if (extid > 1) {
+		/*
+		 * For now this is all other "Enlightened" operating systems
+		 * other than Longhorn.
+		 */
+		if (idx == 0x40000000) {
+			/*
+			 * PV driver hypercall setup. Let xen handle this.
+			 */
+			return (0);
+		}
+		if (idx == 0x40001000) {
+			idx = 0x40000000;
+		}
+	}
+	switch (idx) {
+	case NS_MSR_GUEST_OS_ID: 
+		nsLockAcquire(curVcpu, &curp->nsLock);
+		regs->eax = (u32)(curp->nsGuestIdMsr & 0xFFFFFFFF);
+		regs->edx = (u32)(curp->nsGuestIdMsr >> 32);
+		nsLockRelease(curVcpu, &curp->nsLock);
+		break;
+	case NS_MSR_HYPERCALL:
+		nsLockAcquire(curVcpu, &curp->nsLock);
+		regs->eax = (u32)(curp->nsHypercallMsr & 0xFFFFFFFF);
+		regs->edx = (u32)(curp->nsHypercallMsr >> 32);
+		nsLockRelease(curVcpu, &curp->nsLock);
+		if ((((u32)curp->nsHypercallMsr) & (0x00000001)) != 0) {
+			curVcpu->nsVcpuFlags |= NS_VCPU_UP;
+		}
+		break;
+	case NS_MSR_VP_INDEX:
+		regs->eax = (u32)(vcpuIndex);
+		regs->edx = (u32)(0x0);
+		break;
+	case NS_MSR_ICR:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+			goto msrReadError;
+		}
+		nsReadIcr(&msrContent);
+		NS_STATS_COLLECT(NS_ICR_READ, &curVcpu->nsVcpStats);
+		regs->eax = (u32)(msrContent & 0xFFFFFFFF);
+		regs->edx = (u32)(msrContent >> 32);
+		break;
+	case NS_MSR_TPR:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+			goto msrReadError;
+		}
+		nsReadTpr(&msrContent);
+		NS_STATS_COLLECT(NS_TPR_READ, &curVcpu->nsVcpStats);
+		regs->eax = (u32)(msrContent & 0xFFFFFFFF);
+		regs->edx = (u32)(msrContent >> 32);
+		break;
+	/*
+	 * The following synthetic MSRs are implemented in the Novell Shim.
+	 */
+	case NS_MSR_SCONTROL:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrReadError;
+		}
+		regs->eax = (u32)(curVcpu->nsVcpSControlMsr & 0xFFFFFFFF);
+		regs->edx = (u32)(curVcpu->nsVcpSControlMsr >> 32);
+		break;
+	case NS_MSR_SVERSION:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrReadError;
+		}
+		regs->eax = (u32)(curVcpu->nsVcpSVersionMsr & 0xFFFFFFFF);
+		regs->edx = (u32)(curVcpu->nsVcpSVersionMsr >> 32);
+		break;
+	case NS_MSR_SIEFP:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrReadError;
+		}
+		regs->eax = (u32)(curVcpu->nsVcpSIefpMsr & 0xFFFFFFFF);
+		regs->edx = (u32)(curVcpu->nsVcpSIefpMsr >> 32);
+		break;
+	case NS_MSR_SIMP:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrReadError;
+		}
+		regs->eax = (u32)(curVcpu->nsVcpSimpMsr & 0xFFFFFFFF);
+		regs->edx = (u32)(curVcpu->nsVcpSimpMsr >> 32);
+		break;
+	case NS_MSR_SINT0:
+		synInt = 0;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT1:
+		synInt = 1;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT2:
+		synInt = 2;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT3:
+		synInt = 3;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT4:
+		synInt = 4;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT5:
+		synInt = 5;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT6:
+		synInt = 6;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT7:
+		synInt = 7;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT8:
+		synInt = 8;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT9:
+		synInt = 9;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT10:
+		synInt = 10;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT11:
+		synInt = 11;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT12:
+		synInt = 12;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT13:
+		synInt = 13;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT14:
+		synInt = 14;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT15:
+		synInt = 15;
+synIntReadProcess:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrReadError;
+		}
+		regs->eax = (u32)(curVcpu->nsVcpSIntMsr[synInt] & 0xFFFFFFFF);
+		regs->edx = (u32)(curVcpu->nsVcpSIntMsr[synInt] >> 32);
+		break;
+
+	case NS_MSR_SEOM:
+		/*
+		 * This is a write only register; reads return 0.
+		 */
+		regs->eax = 0;
+		regs->edx = 0;
+		break;
+	case NS_MSR_TIME_REF_COUNT:
+		if (!nsAccessTimeRefCnt(curp, &msrContent)) {
+			goto msrReadError;
+		}
+		regs->eax = (u32)(msrContent & 0xFFFFFFFF);
+		regs->edx = (u32)(msrContent >> 32);
+		break;
+	/*
+	 * Synthetic timer MSRs.
+	 */
+	case NS_MSR_TIMER0_CONFIG:
+		timer = 0;
+		goto	processTimerConfigRead;
+	case NS_MSR_TIMER1_CONFIG:
+		timer = 1;
+		goto	processTimerConfigRead;
+	case NS_MSR_TIMER2_CONFIG:
+		timer = 2;
+		goto	processTimerConfigRead;
+	case NS_MSR_TIMER3_CONFIG:
+		timer = 3;
+processTimerConfigRead:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+			goto msrReadError;
+		}
+		regs->eax = 
+		(u32)(curVcpu->nsVcpTimers[timer].config & 0xFFFFFFFF);
+		regs->edx = 
+		(u32)(curVcpu->nsVcpTimers[timer].config >> 32);
+		break;
+	case NS_MSR_TIMER0_COUNT:
+		timer = 0;
+		goto processTimerCountRead;
+	case NS_MSR_TIMER1_COUNT:
+		timer = 1;
+		goto processTimerCountRead;
+	case NS_MSR_TIMER2_COUNT:
+		timer = 2;
+		goto processTimerCountRead;
+	case NS_MSR_TIMER3_COUNT:
+		timer = 3;
+processTimerCountRead:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+			goto msrReadError;
+		}
+		timerCount = curVcpu->nsVcpTimers[timer].count;
+		if (timerCount > ((NOW())/100)) {
+			timerCount -= ((NOW())/100);
+		} else {
+			timerCount = 0;
+		}
+		regs->eax = 
+		(u32)(timerCount & 0xFFFFFFFF);
+		regs->edx = 
+		(u32)(timerCount >> 32);
+		break;
+	case NS_MSR_PVDRV_HCALL:
+		regs->eax = 0;
+		regs->edx = 0;
+		break; 
+	case NS_MSR_SYSTEM_RESET:
+		regs->eax = 0;
+		regs->edx = 0;
+		break; 
+	default:
+		/*
+		 * We did not handle the MSR address specified; 
+		 * let the caller figure out
+		 * What to do.
+		 */
+		return (0);
+	}
+	return (1);
+msrReadError:
+	/*
+	 * Have to inject #GP fault.
+	 */
+	nsInjectException(TRAP_gp_fault);
+	return (1);
+}
+
+/*
+ * static int
+ * nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs)
+ * NS intercept for writing MSRS.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static int
+nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs)
+{
+	nsPartition_t	*curp = nsGetCurrentPartition();
+	unsigned int	vcpuIndex = nsGetCurrentVcpuIndex();
+	u64 msrContent = 0;
+	nsVcpu_t	*curVcpu = &curp->nsVcpuState[vcpuIndex];
+	int		synInt, timer;
+	struct domain	*d = current->domain;
+	int	extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
+
+	/*
+	 * hvmloader uses wrmsr; we don't want to
+	 * intercept calls coming from the bootstrap (bios) code in the HVM 
+	 * guest; we descriminate based on the instruction pointer.
+	 */
+	if (nsCallFromBios(regs)) {
+		/*
+		 * We don't intercept this.
+		 */
+		return (0);
+	}
+	msrContent =
+	(u32)regs->eax | ((u64)regs->edx << 32);
+	if (extid > 1) {
+		/*
+		 * For now this is all other "Enlightened" operating systems
+		 * other than Longhorn.
+		 */
+		if (idx == 0x40000000) {
+			/*
+			 * PV driver hypercall setup. Let xen handle this.
+			 */
+			return (0);
+		}
+		if (idx == 0x40001000) {
+			idx = 0x40000000;
+		}
+	}
+
+	switch (idx) {
+	case NS_MSR_GUEST_OS_ID: 
+		nsWriteGuestIdMsr(curp, curVcpu,  msrContent);
+		break; 
+	case NS_MSR_HYPERCALL:
+		nsWriteHypercallMsr(curp, curVcpu, msrContent);
+		break;
+
+	case NS_MSR_VP_INDEX:
+		goto msrWriteError;
+		
+	case NS_MSR_EOI:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+			goto msrWriteError;
+		}
+		nsWriteEoi(msrContent);
+		NS_STATS_COLLECT(NS_EOI_WRITE, &curVcpu->nsVcpStats);
+		break;
+	case NS_MSR_ICR:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+			goto msrWriteError;
+		}
+		nsWriteIcr(msrContent);
+		NS_STATS_COLLECT(NS_ICR_WRITE, &curVcpu->nsVcpStats);
+		break;
+	case NS_MSR_TPR:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+			goto msrWriteError;
+		}
+		nsWriteTpr(msrContent);
+		NS_STATS_COLLECT(NS_TPR_WRITE, &curVcpu->nsVcpStats);
+		break;
+
+	/*
+	 * The following MSRs are synthetic MSRs supported in the Novell Shim.
+	 */
+	case NS_MSR_SCONTROL:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrWriteError;
+		}
+		curVcpu->nsVcpSControlMsr = msrContent; 
+		break;
+	case NS_MSR_SVERSION:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrWriteError;
+		}
+		/*
+		 * This is a read-only MSR; generate #GP
+		 */
+		nsInjectException(TRAP_gp_fault);
+		break;
+	case NS_MSR_SIEFP:
+	case NS_MSR_SIMP:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrWriteError;
+		} 
+		nsWriteSxMsr(idx, curp, curVcpu, msrContent);
+		break;
+	case NS_MSR_SINT0:
+		synInt = 0;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT1:
+		synInt = 1;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT2:
+		synInt = 2;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT3:
+		synInt = 3;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT4:
+		synInt = 4;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT5:
+		synInt = 5;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT6:
+		synInt = 6;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT7:
+		synInt = 7;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT8:
+		synInt = 8;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT9:
+		synInt = 9;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT10:
+		synInt = 10;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT11:
+		synInt = 11;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT12:
+		synInt = 12;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT13:
+		synInt = 13;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT14:
+		synInt = 14;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT15:
+		synInt = 15;
+synIntWrProcess:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrWriteError;
+		}
+		/*
+		 * XXXKYS: We assume that the synInt registers will be 
+		 * first written before the interrupt generation can occur.
+		 * Specifically if SINT is masked all interrupts that may have 
+		 * been generated will be lost. Also when SINT is disabled; 
+		 * its effects will be only felt for subsequent interrupts that 
+		 * may be posted. XXXKYS: CHECK
+		 */
+		curVcpu->nsVcpSIntMsr[synInt] = msrContent; 
+		break;
+
+	case NS_MSR_SEOM:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrWriteError;
+		}
+		curVcpu->nsVcpEomMsr = msrContent; 
+		nsProcessMessageQ(curp, curVcpu);
+		break;
+	case NS_MSR_TIME_REF_COUNT: 
+		/*
+		 * This is a read-only msr.
+		 */
+		goto msrWriteError;
+	
+	/*
+	 * Synthetic timer MSRs.
+	 */
+	case NS_MSR_TIMER0_CONFIG:
+		timer = 0;
+		goto	processTimerConfig;
+	case NS_MSR_TIMER1_CONFIG:
+		timer = 1;
+		goto	processTimerConfig;
+	case NS_MSR_TIMER2_CONFIG:
+		timer = 2;
+		goto	processTimerConfig;
+	case NS_MSR_TIMER3_CONFIG:
+		timer = 3;
+processTimerConfig:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+			goto msrWriteError;
+		}
+		/*
+		 * Assume that the client is going to write the whole msr. 
+		 */
+		if (!(msrContent & 0x9)) {
+			/*
+			 * We are neither setting Auto Enable or Enable; 
+			 * silently exit.
+			 * Should this be considered to turn off a 
+			 * timer that may be currently 
+			 * active; XXXKYS: Check. For now we are 
+			 * not doing anything here.
+			 */
+			break;
+		}
+		if (!(((u32)(msrContent >> 16)) & 0x0000000f)) {
+			/*
+			 * sintx is 0; clear the enable bit(s).
+			 */
+			msrContent &= ~(0x1);
+		}
+		curVcpu->nsVcpTimers[timer].config = msrContent;
+		/*
+		 * XXXKYS: Can any order be assumed here; 
+		 * should we just act on whatever is in the 
+		 * count register. For now act as if the count 
+		 * register is valid and act on it.
+		 */
+		if (msrContent & 0x1) {
+			nsScheduleTimeOut(&curVcpu->nsVcpTimers[timer]); 
+			NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats);
+		}
+		break;
+	case NS_MSR_TIMER0_COUNT:
+		timer = 0;
+		goto processTimerCount;
+	case NS_MSR_TIMER1_COUNT:
+		timer = 1;
+		goto processTimerCount;
+	case NS_MSR_TIMER2_COUNT:
+		timer = 2;
+		goto processTimerCount;
+	case NS_MSR_TIMER3_COUNT:
+		timer = 3;
+processTimerCount:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+			goto msrWriteError;
+		}
+		curVcpu->nsVcpTimers[timer].count = 
+		(msrContent + ((NOW())/100));
+		if ((curVcpu->nsVcpTimers[timer].config | 0x9)) {
+			nsScheduleTimeOut(&curVcpu->nsVcpTimers[timer]); 
+			NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats);
+		}
+		
+		break;
+	case NS_MSR_PVDRV_HCALL:
+		/*
+		 * Establish the hypercall page for PV drivers.
+		 */
+		nsXenVector.extWrmsrHypervisorRegs(0x40000000, regs->eax,
+						   regs->edx);
+		break; 
+	case NS_MSR_SYSTEM_RESET:
+		/*
+		 * Shutdown the domain/partition.
+	 	 */
+		if (msrContent & 0x1) {
+			domain_shutdown(d, SHUTDOWN_reboot);
+		}
+		break; 
+		
+	default:
+		/*
+		 * We did not handle the MSR address; 
+		 * let the caller deal with this.
+		 */
+		return (0);
+	}
+	return (1);
+msrWriteError:
+	/*
+	 * Have to inject #GP fault.
+	 */
+	nsInjectException(TRAP_gp_fault);
+	return (1);
+}

[-- Attachment #5: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
  2008-03-05 22:15 [PATCH][RFC] Supporting Enlightened Windows 2008 Server Ky Srinivasan
@ 2008-03-05 22:28 ` Daniel P. Berrange
  2008-03-05 22:38   ` Daniel P. Berrange
  2008-03-07  1:05   ` Ky Srinivasan
  2008-03-06  7:28 ` Keir Fraser
  1 sibling, 2 replies; 15+ messages in thread
From: Daniel P. Berrange @ 2008-03-05 22:28 UTC (permalink / raw)
  To: Ky Srinivasan; +Cc: xen-devel

On Wed, Mar 05, 2008 at 03:15:19PM -0700, Ky Srinivasan wrote:
> I am attaching updated versions of the patches that I posted a couple 
> of weeks ago. These  have been merged up to the current unstable tip: 
> changeset 17186:854b0704962b
> 
> These patches have been tested on the unstable tip.

I'm not expert enough to comment on the HV extension implementation itself,
but in terms of the userspace  side, the user visible configuration file 
option 'extid=1' is pretty unpleasant.  It is akin to a 'magic constant'
in C code - no understandable meaning at all.

I'd like to see it accept a named extension type - if its possible to
have multiple extensions per guest, then using a list instead of a scalar
would be better. So how about something closer to

  extensions = [ "win2k8" ]

> +gopts.var('extid', val='EXTID',
> +          fn=set_int, default=0,
> +          use="Specify extention ID for a HVM domain.")
> +

And this help message could list the valid extension names


Regards,
Dan.
-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
|=-           Perl modules: http://search.cpan.org/~danberr/              -=|
|=-               Projects: http://freshmeat.net/~danielpb/               -=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
  2008-03-05 22:28 ` Daniel P. Berrange
@ 2008-03-05 22:38   ` Daniel P. Berrange
  2008-03-07  1:06     ` Ky Srinivasan
  2008-03-07  1:05   ` Ky Srinivasan
  1 sibling, 1 reply; 15+ messages in thread
From: Daniel P. Berrange @ 2008-03-05 22:38 UTC (permalink / raw)
  To: Ky Srinivasan; +Cc: xen-devel

On Wed, Mar 05, 2008 at 10:28:28PM +0000, Daniel P. Berrange wrote:
> On Wed, Mar 05, 2008 at 03:15:19PM -0700, Ky Srinivasan wrote:
> > I am attaching updated versions of the patches that I posted a couple 
> > of weeks ago. These  have been merged up to the current unstable tip: 
> > changeset 17186:854b0704962b
> > 
> > These patches have been tested on the unstable tip.
> 
> I'm not expert enough to comment on the HV extension implementation itself,
> but in terms of the userspace  side, the user visible configuration file 
> option 'extid=1' is pretty unpleasant.  It is akin to a 'magic constant'
> in C code - no understandable meaning at all.
> 
> I'd like to see it accept a named extension type - if its possible to
> have multiple extensions per guest, then using a list instead of a scalar
> would be better. So how about something closer to
> 
>   extensions = [ "win2k8" ]

Or is there some way you can have some super light weight trap / hook
always loaded, so when Win2k8 makes it first paravirt call, you can
then automatically enable the full extension ? That could let Xen
just 'do the right thing' without needing a config param, and without
having to fully enable the full extension for non-Win2k8 guests.

>Dan.
-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
|=-           Perl modules: http://search.cpan.org/~danberr/              -=|
|=-               Projects: http://freshmeat.net/~danielpb/               -=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
  2008-03-05 22:38   ` Daniel P. Berrange
@ 2008-03-07  1:06     ` Ky Srinivasan
  0 siblings, 0 replies; 15+ messages in thread
From: Ky Srinivasan @ 2008-03-07  1:06 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: xen-devel



>>> On Wed, Mar 5, 2008 at  5:38 PM, in message
<20080305223827.GI19306@redhat.com>, "Daniel P. Berrange" <berrange@redhat.com>
wrote: 
> On Wed, Mar 05, 2008 at 10:28:28PM +0000, Daniel P. Berrange wrote:
>> On Wed, Mar 05, 2008 at 03:15:19PM -0700, Ky Srinivasan wrote:
>> > I am attaching updated versions of the patches that I posted a couple 
>> > of weeks ago. These  have been merged up to the current unstable tip: 
>> > changeset 17186:854b0704962b
>> > 
>> > These patches have been tested on the unstable tip.
>> 
>> I'm not expert enough to comment on the HV extension implementation itself,
>> but in terms of the userspace  side, the user visible configuration file 
>> option 'extid=1' is pretty unpleasant.  It is akin to a 'magic constant'
>> in C code - no understandable meaning at all.
>> 
>> I'd like to see it accept a named extension type - if its possible to
>> have multiple extensions per guest, then using a list instead of a scalar
>> would be better. So how about something closer to
>> 
>>   extensions = [ "win2k8" ]
> 
> Or is there some way you can have some super light weight trap / hook
> always loaded, so when Win2k8 makes it first paravirt call, you can
> then automatically enable the full extension ? That could let Xen
> just 'do the right thing' without needing a config param, and without
> having to fully enable the full extension for non-Win2k8 guests.

I considered this. Unfortunately, we have no control on the mechanisms used by windows to discover the hypervisor. Furthermore, CPUID leaves used by longhorn collide with CPUID leaves used by Xen for supporting hypervisor discovery for PV drivers. Since we have no control on what Microsoft may do here, I felt the best way might be to tag the guest so that we can interpret the hypercalls, CPUID calls and MSR (read/write) calls within the guest namespace.

Regards,

K. Y

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
  2008-03-05 22:28 ` Daniel P. Berrange
  2008-03-05 22:38   ` Daniel P. Berrange
@ 2008-03-07  1:05   ` Ky Srinivasan
  1 sibling, 0 replies; 15+ messages in thread
From: Ky Srinivasan @ 2008-03-07  1:05 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: xen-devel



>>> On Wed, Mar 5, 2008 at  5:28 PM, in message
<20080305222828.GH19306@redhat.com>, "Daniel P. Berrange" <berrange@redhat.com>
wrote: 
> On Wed, Mar 05, 2008 at 03:15:19PM -0700, Ky Srinivasan wrote:
>> I am attaching updated versions of the patches that I posted a couple 
>> of weeks ago. These  have been merged up to the current unstable tip: 
>> changeset 17186:854b0704962b
>> 
>> These patches have been tested on the unstable tip.
> 
> I'm not expert enough to comment on the HV extension implementation itself,
> but in terms of the userspace  side, the user visible configuration file 
> option 'extid=1' is pretty unpleasant.  It is akin to a 'magic constant'
> in C code - no understandable meaning at all.

Agreed; I could make the extension tag more descriptive.

Regards,

K. Y

> 
> I'd like to see it accept a named extension type - if its possible to
> have multiple extensions per guest, then using a list instead of a scalar
> would be better. So how about something closer to
> 
>   extensions = [ "win2k8" ]
> 
>> +gopts.var('extid', val='EXTID',
>> +          fn=set_int, default=0,
>> +          use="Specify extention ID for a HVM domain.")
>> +
> 
> And this help message could list the valid extension names
> 
> 
> Regards,
> Dan.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
  2008-03-05 22:15 [PATCH][RFC] Supporting Enlightened Windows 2008 Server Ky Srinivasan
  2008-03-05 22:28 ` Daniel P. Berrange
@ 2008-03-06  7:28 ` Keir Fraser
  2008-03-06 10:15   ` Tim Deegan
  2008-03-07  1:08   ` Ky Srinivasan
  1 sibling, 2 replies; 15+ messages in thread
From: Keir Fraser @ 2008-03-06  7:28 UTC (permalink / raw)
  To: Ky Srinivasan, xen-devel

Personally I think the approach is ugly, and also you have not yet presented
evidence that supporting the Viridian paravirtualisations improves
performance. If it doesn't then it's a waste of time; if it does then it
raises the question of which hypercalls provide the benefit, and do we get a
smaller neater patch by supporting just those? One final comment is that the
TLB management code that this slaps on top of the core hypervisor looks a
bit scary to me. Tim Deegan may care to comment more on that.

 -- Keir

On 5/3/08 22:15, "Ky Srinivasan" <ksrinivasan@novell.com> wrote:

> I am attaching updated versions of the patches that I posted a couple of weeks
> ago. These  have been merged up to the current unstable tip: changeset
> 17186:854b0704962b
> 
> These patches have been tested on the unstable tip.
> 
> Kier, what are your thoughts on accepting these patches.
> 
> Signed-off by K. Y. Srinivasan (ksrinivasan@novell.com)
> 
> Regards,
> 
> K. Y 
> 
> 
> 
> 
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
  2008-03-06  7:28 ` Keir Fraser
@ 2008-03-06 10:15   ` Tim Deegan
  2008-03-07  1:10     ` [PATCH][RFC] Supporting Enlightened Windows 2008Server Ky Srinivasan
  2008-03-07  1:08   ` Ky Srinivasan
  1 sibling, 1 reply; 15+ messages in thread
From: Tim Deegan @ 2008-03-06 10:15 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Ky Srinivasan, xen-devel

At 07:28 +0000 on 06 Mar (1204788507), Keir Fraser wrote:
> Personally I think the approach is ugly, and also you have not yet presented
> evidence that supporting the Viridian paravirtualisations improves
> performance. If it doesn't then it's a waste of time; if it does then it
> raises the question of which hypercalls provide the benefit, and do we get a
> smaller neater patch by supporting just those? One final comment is that the
> TLB management code that this slaps on top of the core hypervisor looks a
> bit scary to me. Tim Deegan may care to comment more on that.

Some blame lies with the mismatch between the Viridian interface and
Xen's; there needs to be a way for the TLB flush hypercall to block
indefinitely.  But I can't see how that turns into more than an atomic_t
for TlbFlushInhibit and a block-and-schedule operation.  In the current
patches, there's quite a lot of locking and ownership going on as well.
I'm confused by the use of wait_on_xen_event_channel(0, xyz); event
channels don't seem to come into it.

I'll mention now, since I have the patch in front of me, that I dislike
the addition of an "ext_id" field to the HVM save format header and
associated special treatment in the save/restore code; you should be
able to figure out that this is a w2k8 domain from the presence of your
other records in the save file.

More generally, I agree that the approach is very heavyweight.  I don't
see the need for a framework here, since there's no other proposed user
of it that would want the same interface.  It seems to duplicate a lot
of things (does it really need its own spinlock implementation?)

It's certainly not in Xen coding style, even in the framework
implementation.  (The MS habit of encoding scope and type information in
variable names annoys the heck out of me.  Why does a lock field in an
nsPartition_t need to be called "nsLock"?)

The naming in general could do with a kicking -- calling everything
"Novell Shim" is understandable for historical reasons but not really
descriptive of its function.   But maybe that can wait.

Tim

-- 
Tim Deegan <Tim.Deegan@citrix.com>
Principal Software Engineer, Citrix Systems (R&D) Ltd.
[Company #02300071, SL9 0DZ, UK.]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC] Supporting Enlightened Windows 2008Server
  2008-03-06 10:15   ` Tim Deegan
@ 2008-03-07  1:10     ` Ky Srinivasan
  2008-03-07 11:57       ` Tim Deegan
                         ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Ky Srinivasan @ 2008-03-07  1:10 UTC (permalink / raw)
  To: Tim Deegan, Keir Fraser; +Cc: xen-devel

>>> On Thu, Mar 6, 2008 at  5:15 AM, in message
<20080306101542.GA22422@york.uk.xensource.com>, Tim Deegan
<Tim.Deegan@citrix.com> wrote: 
> At 07:28 +0000 on 06 Mar (1204788507), Keir Fraser wrote:
>> Personally I think the approach is ugly, and also you have not yet presented
>> evidence that supporting the Viridian paravirtualisations improves
>> performance. If it doesn't then it's a waste of time; if it does then it
>> raises the question of which hypercalls provide the benefit, and do we get a
>> smaller neater patch by supporting just those? One final comment is that the
>> TLB management code that this slaps on top of the core hypervisor looks a
>> bit scary to me. Tim Deegan may care to comment more on that.
> 
> Some blame lies with the mismatch between the Viridian interface and
> Xen's; there needs to be a way for the TLB flush hypercall to block
> indefinitely.  But I can't see how that turns into more than an atomic_t
> for TlbFlushInhibit and a block-and-schedule operation.  In the current
> patches, there's quite a lot of locking and ownership going on as well.
> I'm confused by the use of wait_on_xen_event_channel(0, xyz); event
> channels don't seem to come into it.

The Veridian API allows the guest to pass in a variable list of arguments to the TLB flush call ( HvFlushVirtualAddressList). Furthermore, both forms of the flush APIs (HvFlushVirtualAddressSpace and HvFlushVirtualAdressList) can specify a list of vcpus that should be involved in the flush process. So, as you have noted we will need a mechanism to co-ordinate the flush operation amongst the set of vcpus involved which means we need to be able give up the physical CPU in the hypervisor waiting for the flush to complete. I have used wait_on_xen_event_channel() to implement this synchronization. Since we don't preserve the stack state when we block in the hypervisor, I have used a seperate per-vcpu page for dealing with hypercall input parameters for calls that can potentially block in the hypervisor. From what I have seen, win2k8 server mostly specifies  all the processors in ProcessorMask. So, I chose to implement TLB flush operations using a single serialization object that keeps track of both the set of vcpus involved in the flush operation as well as the list of pages to be flushed.

> 
> I'll mention now, since I have the patch in front of me, that I dislike
> the addition of an "ext_id" field to the HVM save format header and
> associated special treatment in the save/restore code; you should be
> able to figure out that this is a w2k8 domain from the presence of your
> other records in the save file.

I can fix this.

> 
> 
> 
> More generally, I agree that the approach is very heavyweight.  I don't
> see the need for a framework here, since there's no other proposed user
> of it that would want the same interface.

I agree that there is no need to isolate the shim's dependence on the base Xen code (xen_call_vector_t). I implemented this shim a year ago and at that point it was not clear what Microsoft might do with the Veridian specification.  So, clearly some of the design choices that I made a year ago may not be the right choice today. However, I still think that having an intercept framework where  one can implement Veridian specific functionality without cluttering up the base Xen code is still the right approach. 

> It seems to duplicate a lot
> of things (does it really need its own spinlock implementation?)

Clearly not! As I noted in an earlier email to Kier, I will be the first to admit that these patches require significant cleanup and I am willing to clean them up. A lot of what you see has historical baggage and I wanted to get some feedback before I invested the time to clean things up.

> 
> It's certainly not in Xen coding style, even in the framework
> implementation.  (The MS habit of encoding scope and type information in
> variable names annoys the heck out of me.  Why does a lock field in an
> nsPartition_t need to be called "nsLock"?)

Agreed. 

> 
> The naming in general could do with a kicking -- calling everything
> "Novell Shim" is understandable for historical reasons but not really
> descriptive of its function.   But maybe that can wait.
Agreed.

Regards,

K. Y
> 
> Tim

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC] Supporting Enlightened Windows 2008Server
  2008-03-07  1:10     ` [PATCH][RFC] Supporting Enlightened Windows 2008Server Ky Srinivasan
@ 2008-03-07 11:57       ` Tim Deegan
  2008-03-07 13:19       ` Keir Fraser
  2008-03-07 13:30       ` Keir Fraser
  2 siblings, 0 replies; 15+ messages in thread
From: Tim Deegan @ 2008-03-07 11:57 UTC (permalink / raw)
  To: Ky Srinivasan; +Cc: xen-devel, Keir Fraser

At 18:10 -0700 on 06 Mar (1204827047), Ky Srinivasan wrote:

> The Veridian API allows the guest to pass in a variable list of
> arguments to the TLB flush call
> (HvFlushVirtualAddressList). Furthermore, both forms of the flush APIs
> (HvFlushVirtualAddressSpace and HvFlushVirtualAdressList) can specify
> a list of vcpus that should be involved in the flush process.

I expect that the best way to implement the list-of-addresses feature on
Xen is to do a full TLB flush if there's more than one entry in the list
(that's different from the Hyper-V shadow pagetable design, where
explicit lists of addresses to flush make much more sense).

Then all you need are the existing Xen TLB flush operations, and some
means of gating them.  If you use a generation counter with each vcpu's
inhibit bit, you can probably do that without the need for any locks.

> So, as
> you have noted we will need a mechanism to co-ordinate the flush
> operation amongst the set of vcpus involved which means we need to be
> able give up the physical CPU in the hypervisor waiting for the flush
> to complete. I have used wait_on_xen_event_channel() to implement this
> synchronization.

wait_on_xen_event_channel is more than you need, since you're not going
to wake on events.

> Since we don't preserve the stack state when we block
> in the hypervisor, I have used a seperate per-vcpu page for dealing
> with hypercall input parameters for calls that can potentially block
> in the hypervisor.

Xen already has a system of hypercall continuations that might help here.

Cheers,

Tim

-- 
Tim Deegan <Tim.Deegan@citrix.com>
Principal Software Engineer, Citrix Systems (R&D) Ltd.
[Company #02300071, SL9 0DZ, UK.]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC] Supporting Enlightened Windows 2008Server
  2008-03-07  1:10     ` [PATCH][RFC] Supporting Enlightened Windows 2008Server Ky Srinivasan
  2008-03-07 11:57       ` Tim Deegan
@ 2008-03-07 13:19       ` Keir Fraser
  2008-03-07 13:30       ` Keir Fraser
  2 siblings, 0 replies; 15+ messages in thread
From: Keir Fraser @ 2008-03-07 13:19 UTC (permalink / raw)
  To: Ky Srinivasan, Tim Deegan; +Cc: xen-devel

On 7/3/08 01:10, "Ky Srinivasan" <ksrinivasan@novell.com> wrote:

> The Veridian API allows the guest to pass in a variable list of arguments to
> the TLB flush call ( HvFlushVirtualAddressList). Furthermore, both forms of
> the flush APIs (HvFlushVirtualAddressSpace and HvFlushVirtualAdressList) can
> specify a list of vcpus that should be involved in the flush process. So, as
> you have noted we will need a mechanism to co-ordinate the flush operation
> amongst the set of vcpus involved which means we need to be able give up the
> physical CPU in the hypervisor waiting for the flush to complete. I have used
> wait_on_xen_event_channel() to implement this synchronization. Since we don't
> preserve the stack state when we block in the hypervisor, I have used a
> seperate per-vcpu page for dealing with hypercall input parameters for calls
> that can potentially block in the hypervisor. From what I have seen, win2k8
> server mostly specifies  all the processors in ProcessorMask. So, I chose to
> implement TLB flush operations using a single serialization object that keeps
> track of both the set of vcpus involved in the flush operation as well as the
> list of pages to be flushed.

Clearly avoiding emulating IPI-to-all-CPUs is rather likely to be a win. But
is the very selective subset-of-CPUs and subset-of-addresses really that
useful? Do you get any significant win over just calling
hvmop_flush_tlb_all()?

Also we need to weigh up the likely penetration of NPT and EPT capable
processors by the time w2k8 is shipping in any volume. But even ignoring
that, I bet 95% of the benefit of this patch can be got with a much smaller
patch.

 -- Keir

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC] Supporting Enlightened Windows 2008Server
  2008-03-07  1:10     ` [PATCH][RFC] Supporting Enlightened Windows 2008Server Ky Srinivasan
  2008-03-07 11:57       ` Tim Deegan
  2008-03-07 13:19       ` Keir Fraser
@ 2008-03-07 13:30       ` Keir Fraser
  2 siblings, 0 replies; 15+ messages in thread
From: Keir Fraser @ 2008-03-07 13:30 UTC (permalink / raw)
  To: Ky Srinivasan, Tim Deegan; +Cc: xen-devel

On 7/3/08 01:10, "Ky Srinivasan" <ksrinivasan@novell.com> wrote:

> I agree that there is no need to isolate the shim's dependence on the base Xen
> code (xen_call_vector_t). I implemented this shim a year ago and at that point
> it was not clear what Microsoft might do with the Veridian specification.  So,
> clearly some of the design choices that I made a year ago may not be the right
> choice today. However, I still think that having an intercept framework where
> one can implement Veridian specific functionality without cluttering up the
> base Xen code is still the right approach.

Clearly putting the Viridian hypercall shims in a different file/directory
makes sense. But I think the shims would need to go on a diet. The TLB
flushing implementation is a good example -- the useful extra features of
the Viridian flush hypercall (if there are any, when partnered with Xen's
shadow code) should be pushed into core Xen HVM TLB-flush handling code.
Otherwise it sits out on the periphery with a correspndingly greater
tendency to rot, and for no benefit (certainly I would strongly argue it is
not cleaner!).

 -- Keir

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC] Supporting Enlightened Windows 2008Server
  2008-03-06  7:28 ` Keir Fraser
  2008-03-06 10:15   ` Tim Deegan
@ 2008-03-07  1:08   ` Ky Srinivasan
  1 sibling, 0 replies; 15+ messages in thread
From: Ky Srinivasan @ 2008-03-07  1:08 UTC (permalink / raw)
  To: Keir Fraser, xen-devel

>>> On Thu, Mar 6, 2008 at  2:28 AM, in message
<C3F54D9B.14C64%keir.fraser@eu.citrix.com>, Keir Fraser
<keir.fraser@eu.citrix.com> wrote: 
> Personally I think the approach is ugly, and also you have not yet presented
> evidence that supporting the Viridian paravirtualisations improves
> performance.

When I first implemented this (about a year ago), it was not clear if Microsoft would be open sourcing the Veridian specification. Given that, I wanted to have a narrow set of interfaces both to the adapter as well as from the adapter. I take it that you don't care much for this exercise in attempting to isolate the adapter. Now that Veridian specification has been open sourced, I agree there is no need to isolate the adapter from the base hypervisor the way it is currently done. However, given that:
(a) Veridian specification is evolving and Microsoft may define additional functionality to improve guest performance
(b) CPUID namespace, MSR namespace and hypercall namespace collisions between Xen and Veridian. This is the case today and it can only get worse over time.

I feel having a framework that allows you to implement these kinds of mapping layers in complete isolation from the base hypervisor  may in fact be cleaner than trying to implement the mapping code inline in the base Xen code.

With regards to performance, we have only run NetBench and on NetBench we have seen a 10% improvement (on a uniprocessor system). We have had some issues with SMP PV drivers and that is the reason I don't have SMP numbers (the adapter has been tested on SMP machines though). We are currently in the process of running a range of benchmarks and I will keep you posted on what we see. Our goal here is clearly to be competitive (as far as performance goes) with Veridian hosting an enlightened windows guest.  

> If it doesn't then it's a waste of time; if it does then it
> raises the question of which hypercalls provide the benefit, and do we get a
> smaller neater patch by supporting just those?

I think the only assumption we can make here is  that the enlightenments will improve the  guest performance. This has been confirmed with the minimal performance testing we have already done.  I am also going to assume that Microsoft will continue to evolve Veridian and the set of enlightenments visible to their guests to improve performance. The question that we need to answer, I think is how are we going to support these enlightenments and not if we are going to support Microsoft specific enlightenments. I will be the first one to admit the patches I submitted need to be cleaned up:

1) Fix coding style
2) Get rid of code that is not being exercised. Based on the Veridian specification I identified a set of functionality that I thought an enlightened guest may depend on. It looks like the current shipping windows server 2008 does not use all the functionality that is currently supported. I am somewhat hesitant to get rid of   unused functionality since I don't know what the next release of windows will use. In fact, the current shipping windows server 2008 (32 bit version)  is already using an undocumented hypercall! 

I do think however that having an environment in which we can implement and evolve the support for windows enlightenments without constantly churning the base Xen code  is the right approach.

Regards,

K. Y  

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH][RFC] Supporting Enlightened Windows 2008 Server
@ 2008-02-19 22:11 Ky Srinivasan
  2008-02-20  9:44 ` Keir Fraser
  0 siblings, 1 reply; 15+ messages in thread
From: Ky Srinivasan @ 2008-02-19 22:11 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 1426 bytes --]

During the spring Xen Summit in New York, I presented our work on hosting Enlightened windows 2008 server on  Xen based platforms. Now that Microsoft has published their Hypervisor specification, we can release our code to the community. The goal of this effort has been to host Enlightened windows 2008 server as a guest on a Xen based platform. We have addressed this requirement by :

A) Leveraging the I/O framework in the Xen based platform. Appropriate PV drivers will be loaded up in the guest to improve the I/O performance.
 
B) Implement an adapter that implements the required Hyper-V functionality.  

We  have implemented only a subset of Hyper-V functionality that is required for enlightened windows 2008 guest today. However, we have the framework in place to implement any additional functionality that the windows guests may leverage going forward. The framework is extensible and one can easily implement OS specific enlightenments.

I am enclosing three patches that implement our adapter for your consideration:

1) ns_tools.patch: Modifications to xen tools to support the the adapter
2) ns_xen_base.patch: Modifications to the base Xen code to support the adapter
3) ns_xen_extension.patch: New code implementing the adapter 

These patches have been tested on a xen 3.2 based system (SLES10 SP2).

Signed-off by K. Y. Srinivasan (ksrinivasan@novell.com) 

Regards,

K. Y






[-- Attachment #2: ns_tools.patch --]
[-- Type: text/plain, Size: 4939 bytes --]

Index: xen-3.2-testing/tools/python/xen/lowlevel/xc/xc.c
===================================================================
--- xen-3.2-testing.orig/tools/python/xen/lowlevel/xc/xc.c
+++ xen-3.2-testing/tools/python/xen/lowlevel/xc/xc.c
@@ -632,14 +632,14 @@ static PyObject *pyxc_hvm_build(XcObject
     int i;
 #endif
     char *image;
-    int memsize, vcpus = 1, acpi = 0, apic = 1;
+    int memsize, vcpus = 1, acpi = 0, apic = 1, extid = 0;
 
     static char *kwd_list[] = { "domid",
-				"memsize", "image", "vcpus", "acpi",
+				"memsize", "image", "vcpus", "extid", "acpi",
 				"apic", NULL };
-    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iii", kwd_list,
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiii", kwd_list,
                                       &dom, &memsize,
-                                      &image, &vcpus, &acpi, &apic) )
+                                      &image, &vcpus, &extid, &acpi, &apic) )
         return NULL;
 
     if ( xc_hvm_build(self->xc_handle, dom, memsize, image) != 0 )
@@ -664,6 +664,7 @@ static PyObject *pyxc_hvm_build(XcObject
     va_hvm->checksum = -sum;
     munmap(va_map, XC_PAGE_SIZE);
 #endif
+    xc_set_hvm_param(self->xc_handle, dom, HVM_PARAM_EXTEND_HYPERVISOR, extid);
 
     return Py_BuildValue("{}");
 }
Index: xen-3.2-testing/tools/python/xen/xend/XendConfig.py
===================================================================
--- xen-3.2-testing.orig/tools/python/xen/xend/XendConfig.py
+++ xen-3.2-testing/tools/python/xen/xend/XendConfig.py
@@ -126,7 +126,7 @@ LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(
 # Platform configuration keys.
 XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'display', 
                         'fda', 'fdb', 'keymap', 'isa', 'localtime', 'monitor', 
-                        'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
+                        'nographic', 'pae', 'extid', 'rtc_timeoffset', 'serial', 'sdl',
                         'soundhw','stdvga', 'usb', 'usbdevice', 'vnc',
                         'vncconsole', 'vncdisplay', 'vnclisten', 'timer_mode',
                         'vncpasswd', 'vncunused', 'xauthority', 'pci', 'vhpt',
Index: xen-3.2-testing/tools/python/xen/xend/image.py
===================================================================
--- xen-3.2-testing.orig/tools/python/xen/xend/image.py
+++ xen-3.2-testing/tools/python/xen/xend/image.py
@@ -426,6 +426,7 @@ class HVMImageHandler(ImageHandler):
 
         self.apic = int(vmConfig['platform'].get('apic', 0))
         self.acpi = int(vmConfig['platform'].get('acpi', 0))
+	self.extid  = int(vmConfig['platform'].get('extid',  0))
         self.guest_os_type = vmConfig['platform'].get('guest_os_type')
 
     # Return a list of cmd line args to the device models based on the
@@ -516,6 +517,7 @@ class HVMImageHandler(ImageHandler):
         log.debug("store_evtchn   = %d", store_evtchn)
         log.debug("memsize        = %d", mem_mb)
         log.debug("vcpus          = %d", self.vm.getVCpuCount())
+	log.debug("extid          = %d", self.extid)
         log.debug("acpi           = %d", self.acpi)
         log.debug("apic           = %d", self.apic)
 
@@ -523,6 +525,7 @@ class HVMImageHandler(ImageHandler):
                           image          = self.kernel,
                           memsize        = mem_mb,
                           vcpus          = self.vm.getVCpuCount(),
+			  extid          = self.extid,
                           acpi           = self.acpi,
                           apic           = self.apic)
         rc['notes'] = { 'SUSPEND_CANCEL': 1 }
Index: xen-3.2-testing/tools/python/xen/xm/create.py
===================================================================
--- xen-3.2-testing.orig/tools/python/xen/xm/create.py
+++ xen-3.2-testing/tools/python/xen/xm/create.py
@@ -199,6 +199,10 @@ gopts.var('timer_mode', val='TIMER_MODE'
           use="""Timer mode (0=delay virtual time when ticks are missed;
           1=virtual time is always wallclock time.""")
 
+gopts.var('extid', val='EXTID',
+          fn=set_int, default=0,
+          use="Specify extention ID for a HVM domain.")
+
 gopts.var('acpi', val='ACPI',
           fn=set_int, default=1,
           use="Disable or enable ACPI of HVM domain.")
@@ -719,7 +723,7 @@ def configure_vifs(config_devs, vals):
 def configure_hvm(config_image, vals):
     """Create the config for HVM devices.
     """
-    args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb', 'timer_mode',
+    args = [ 'device_model', 'pae', 'extid', 'vcpus', 'boot', 'fda', 'fdb', 'timer_mode',
              'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw',
              'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
              'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor',

[-- Attachment #3: ns_xen_base.patch --]
[-- Type: text/plain, Size: 13115 bytes --]

%patch
Index: xen-3.2-testing/xen/arch/x86/hvm/svm/svm.c
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/hvm/svm/svm.c	2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/hvm/svm/svm.c	2008-02-09 21:24:32.000000000 -0500
@@ -50,6 +50,7 @@
 #include <asm/hvm/vpt.h>
 #include <asm/hvm/trace.h>
 #include <asm/hap.h>
+#include <asm/hvm/hvm_extensions.h>
 
 u32 svm_feature_flags;
 
@@ -73,6 +74,7 @@
 /* vmcb used for extended host state */
 static void *root_vmcb[NR_CPUS] __read_mostly;
 
+
 static void inline __update_guest_eip(
     struct cpu_user_regs *regs, unsigned int inst_len)
 {
@@ -882,7 +884,7 @@
     .set_tsc_offset       = svm_set_tsc_offset,
     .inject_exception     = svm_inject_exception,
     .init_hypercall_page  = svm_init_hypercall_page,
-    .event_pending        = svm_event_pending
+    .event_pending        = svm_event_pending,
 };
 
 int start_svm(struct cpuinfo_x86 *c)
@@ -1044,6 +1046,7 @@
     HVMTRACE_3D(CPUID, v, input,
                 ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
 
+    ext_intercept_do_cpuid(input, regs);
     inst_len = __get_instruction_length(v, INSTR_CPUID, NULL);
     __update_guest_eip(regs, inst_len);
 }
@@ -1739,6 +1742,11 @@
     /* is it a read? */
     if (vmcb->exitinfo1 == 0)
     {
+        if (ext_intercept_do_msr_read(ecx, regs))
+        {
+            goto done;
+        }
+
         switch (ecx) {
         case MSR_IA32_TSC:
             msr_content = hvm_get_guest_time(v);
@@ -1829,6 +1837,11 @@
     }
     else
     {
+        if (ext_intercept_do_msr_write(ecx, regs))
+        {
+            goto done_1;
+        }
+
         msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
 
         hvmtrace_msr_write(v, ecx, msr_content);
@@ -1889,6 +1902,7 @@
             }
             break;
         }
+done_1:
 
         inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
     }
Index: xen-3.2-testing/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/hvm/vmx/vmx.c	2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/hvm/vmx/vmx.c	2008-02-09 21:24:32.000000000 -0500
@@ -50,6 +50,7 @@
 #include <asm/hvm/vpt.h>
 #include <public/hvm/save.h>
 #include <asm/hvm/trace.h>
+#include <asm/hvm/hvm_extensions.h>
 
 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
 
@@ -62,6 +63,7 @@
 static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr);
 static void vmx_update_guest_efer(struct vcpu *v);
 
+
 static int vmx_domain_initialise(struct domain *d)
 {
     return vmx_alloc_vlapic_mapping(d);
@@ -1238,7 +1240,8 @@
     unsigned int count = *ecx;
 
 #ifdef VMXASSIST
-    if ( input == 0x40000003 )
+    if (( input == 0x40000003 ) &&
+       (vmx_guest_x86_mode(current) ==  0))
     {
         /*
          * NB. Unsupported interface for private use of VMXASSIST only.
@@ -1319,12 +1322,13 @@
 
 static void vmx_do_cpuid(struct cpu_user_regs *regs)
 {
-    unsigned int eax, ebx, ecx, edx;
+    unsigned int eax, ebx, ecx, edx, input;
 
     eax = regs->eax;
     ebx = regs->ebx;
     ecx = regs->ecx;
     edx = regs->edx;
+    input = eax;
 
     vmx_cpuid_intercept(&eax, &ebx, &ecx, &edx);
 
@@ -1332,6 +1336,7 @@
     regs->ebx = ebx;
     regs->ecx = ecx;
     regs->edx = edx;
+    ext_intercept_do_cpuid(input, regs);
 }
 
 #define CASE_GET_REG_P(REG, reg)    \
@@ -2316,6 +2321,9 @@
 
     HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
 
+    if (ext_intercept_do_msr_read(ecx, regs))
+        goto done;
+
     switch ( ecx )
     {
     case MSR_IA32_TSC:
@@ -2499,6 +2507,9 @@
     HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x",
                 ecx, (u32)regs->eax, (u32)regs->edx);
 
+    if (ext_intercept_do_msr_write(ecx, regs)) 
+        return 1;
+
     msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
 
     hvmtrace_msr_write(v, ecx, msr_content);
Index: xen-3.2-testing/xen/include/asm-x86/hvm/domain.h
===================================================================
--- xen-3.2-testing.orig/xen/include/asm-x86/hvm/domain.h	2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/include/asm-x86/hvm/domain.h	2008-02-09 21:24:32.000000000 -0500
@@ -73,6 +73,10 @@
 
     /* Pass-through */
     struct hvm_iommu       hvm_iommu;
+    /* Hvm extension handle */
+    void                   *ext_handle; /* will be NULL on creation (memset)*/
+    struct extension_intercept_vector   *ext_vector;
+
 };
 
 #endif /* __ASM_X86_HVM_DOMAIN_H__ */
Index: xen-3.2-testing/xen/include/public/hvm/params.h
===================================================================
--- xen-3.2-testing.orig/xen/include/public/hvm/params.h	2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/include/public/hvm/params.h	2008-02-09 21:24:32.000000000 -0500
@@ -50,10 +50,12 @@
 
 #define HVM_PARAM_BUFIOREQ_PFN 6
 
+#define HVM_PARAM_EXTEND_HYPERVISOR 7
+
 #ifdef __ia64__
-#define HVM_PARAM_NVRAM_FD     7
-#define HVM_PARAM_VHPT_SIZE    8
-#define HVM_PARAM_BUFPIOREQ_PFN	9
+#define HVM_PARAM_NVRAM_FD     8 
+#define HVM_PARAM_VHPT_SIZE    9 
+#define HVM_PARAM_BUFPIOREQ_PFN 10	
 #endif
 
 /*
@@ -75,12 +77,13 @@
  *   Missed interrupts are collapsed together and delivered as one 'late tick'.
  *   Guest time always tracks wallclock (i.e., real) time.
  */
-#define HVM_PARAM_TIMER_MODE   10
+//KYS Check the modifications done to this file
+#define HVM_PARAM_TIMER_MODE   11
 #define HVMPTM_delay_for_missed_ticks    0
 #define HVMPTM_no_delay_for_missed_ticks 1
 #define HVMPTM_no_missed_ticks_pending   2
 #define HVMPTM_one_missed_tick_pending   3
 
-#define HVM_NR_PARAMS          11
+#define HVM_NR_PARAMS          12
 
 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
Index: xen-3.2-testing/xen/arch/x86/hvm/Makefile
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/hvm/Makefile	2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/hvm/Makefile	2008-02-09 21:24:32.000000000 -0500
@@ -1,5 +1,6 @@
 subdir-y += svm
 subdir-y += vmx
+subdir-y += hvm_ext 
 
 obj-y += hvm.o
 obj-y += i8254.o
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm.c
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/hvm/hvm.c	2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm.c	2008-02-09 21:37:20.000000000 -0500
@@ -42,6 +42,7 @@
 #include <asm/mc146818rtc.h>
 #include <asm/spinlock.h>
 #include <asm/hvm/hvm.h>
+#include <asm/hvm/hvm_extensions.h>
 #include <asm/hvm/vpt.h>
 #include <asm/hvm/support.h>
 #include <asm/hvm/cacheattr.h>
@@ -118,6 +119,7 @@
     rtc_migrate_timers(v);
     hpet_migrate_timers(v);
     pt_migrate(v);
+    ext_intercept_do_migrate_timers(v);
 }
 
 void hvm_do_resume(struct vcpu *v)
@@ -279,6 +281,7 @@
 
 void hvm_domain_destroy(struct domain *d)
 {
+    ext_intercept_domain_destroy(d);
     hvm_funcs.domain_destroy(d);
     vioapic_deinit(d);
     hvm_destroy_cacheattr_region_list(d);
@@ -447,8 +450,14 @@
 {
     int rc;
 
+    if ((rc = ext_intercept_vcpu_initialize(v)) != 0)
+        goto fail1;
+
     if ( (rc = vlapic_init(v)) != 0 )
+    {
+        ext_intercept_vcpu_destroy(v);
         goto fail1;
+    }
 
     if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
         goto fail2;
@@ -496,12 +505,14 @@
     hvm_funcs.vcpu_destroy(v);
  fail2:
     vlapic_destroy(v);
+    ext_intercept_vcpu_destroy(v);
  fail1:
     return rc;
 }
 
 void hvm_vcpu_destroy(struct vcpu *v)
 {
+    ext_intercept_vcpu_destroy(v);
     vlapic_destroy(v);
     hvm_funcs.vcpu_destroy(v);
 
@@ -1573,6 +1584,10 @@
     case 0:
         break;
     }
+    if (ext_intercept_do_hypercall(regs)) 
+    {
+        return HVM_HCALL_completed;
+    }
 
     if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
     {
@@ -1964,6 +1979,9 @@
                 if ( a.value > HVMPTM_one_missed_tick_pending )
                     goto param_fail;
                 break;
+            case HVM_PARAM_EXTEND_HYPERVISOR:
+                if (hvm_ext_bind(d, (int)a.value)) 
+                    goto param_fail;
             }
             d->arch.hvm_domain.params[a.index] = a.value;
             rc = 0;
Index: xen-3.2-testing/xen/arch/x86/x86_64/asm-offsets.c
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/x86_64/asm-offsets.c	2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/x86_64/asm-offsets.c	2008-02-09 21:24:32.000000000 -0500
@@ -146,4 +146,7 @@
     BLANK();
 
     OFFSET(CPUINFO_ext_features, struct cpuinfo_x86, x86_capability[1]);
+    BLANK();
+
+    OFFSET(DOM_ext_vector, struct domain, arch.hvm_domain.ext_vector);
 }
Index: xen-3.2-testing/xen/arch/x86/hvm/vmx/x86_64/exits.S
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/hvm/vmx/x86_64/exits.S	2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/hvm/vmx/x86_64/exits.S	2008-02-09 21:24:32.000000000 -0500
@@ -112,6 +112,14 @@
         ALIGN
 ENTRY(vmx_asm_do_vmentry)
         GET_CURRENT(%rbx)
+        mov  VCPU_domain(%rbx),%rax
+        mov  DOM_ext_vector(%rax),%rdx
+        test  %rdx,%rdx
+        je    vmx_no_ext_vector
+        sti
+        callq *(%rdx)
+vmx_no_ext_vector:
+
         cli                             # tests must not race interrupts
 
         movl  VCPU_processor(%rbx),%eax
Index: xen-3.2-testing/xen/arch/x86/hvm/svm/x86_64/exits.S
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/hvm/svm/x86_64/exits.S	2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/hvm/svm/x86_64/exits.S	2008-02-09 21:24:32.000000000 -0500
@@ -37,6 +37,14 @@
 
 ENTRY(svm_asm_do_resume)
         GET_CURRENT(%rbx)
+        mov  VCPU_domain(%rbx),%rax
+        mov  DOM_ext_vector(%rax),%rdx
+        test  %rdx,%rdx
+        je    svm_no_ext_vector
+        sti
+        callq *(%rdx)
+svm_no_ext_vector:
+
         CLGI
 
         movl VCPU_processor(%rbx),%eax
Index: xen-3.2-testing/xen/arch/x86/hvm/save.c
===================================================================
--- xen-3.2-testing.orig/xen/arch/x86/hvm/save.c	2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/arch/x86/hvm/save.c	2008-02-09 21:24:32.000000000 -0500
@@ -23,6 +23,8 @@
 
 #include <asm/hvm/support.h>
 #include <public/hvm/save.h>
+#include <public/hvm/params.h>
+#include <asm/hvm/hvm_extensions.h>
 
 void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr)
 {
@@ -31,8 +33,7 @@
     /* Save some CPUID bits */
     cpuid(1, &eax, &ebx, &ecx, &edx);
     hdr->cpuid = eax;
-
-    hdr->pad0 = 0;
+    hdr->ext_id = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
 }
 
 int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
@@ -61,6 +62,9 @@
 
     /* VGA state is not saved/restored, so we nobble the cache. */
     d->arch.hvm_domain.stdvga.cache = 0;
+    d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] = hdr->ext_id;
+    if (hvm_ext_bind(d, hdr->ext_id))
+        return -1;
 
     return 0;
 }
Index: xen-3.2-testing/xen/include/public/arch-x86/hvm/save.h
===================================================================
--- xen-3.2-testing.orig/xen/include/public/arch-x86/hvm/save.h	2008-02-09 21:24:22.000000000 -0500
+++ xen-3.2-testing/xen/include/public/arch-x86/hvm/save.h	2008-02-09 21:24:32.000000000 -0500
@@ -38,7 +38,7 @@
     uint32_t version;           /* File format version */
     uint64_t changeset;         /* Version of Xen that saved this file */
     uint32_t cpuid;             /* CPUID[0x01][%eax] on the saving machine */
-    uint32_t pad0;
+    uint32_t ext_id;		/* extension ID */
 };
 
 DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header);
@@ -422,9 +422,30 @@
 
 DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr);
 
+struct hvm_ns_veridian_dom {
+    uint64_t guestid_msr;
+    uint64_t hypercall_msr;
+    uint32_t long_mode;
+    uint32_t pad0;
+};
+DECLARE_HVM_SAVE_TYPE(NS_VERIDIAN_DOM, 15, struct hvm_ns_veridian_dom);
+
+struct hvm_ns_veridian_cpu {
+    uint64_t control_msr;
+    uint64_t version_msr;
+    uint64_t sief_msr;
+    uint64_t simp_msr;
+    uint64_t eom_msr;
+    uint64_t int_msr[16];
+    struct {
+        uint64_t config;
+        uint64_t count;
+    } timers[4];
+};
+DECLARE_HVM_SAVE_TYPE(NS_VERIDIAN_CPU, 16, struct hvm_ns_veridian_cpu);
 /* 
  * Largest type-code in use
  */
-#define HVM_SAVE_CODE_MAX 14
+#define HVM_SAVE_CODE_MAX 16
 
 #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */

[-- Attachment #4: ns_xen_extension.patch --]
[-- Type: text/plain, Size: 125710 bytes --]

%patch
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/Makefile
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/Makefile	2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,3 @@
+subdir-y += novell 
+
+obj-y += hvm_ext.o
Index: xen-3.2-testing/xen/include/asm-x86/hvm/hvm_extensions.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/include/asm-x86/hvm/hvm_extensions.h	2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,239 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * hvm_extensions.h  
+ * This file implements a framework for extending the hypervisor 
+ * functionality in a modular fashion. The framework is comprised of 
+ * two components: A) A set of intercepts that will allow the extension 
+ * module to implement its functionality by intercepting the corresponding
+ * code paths in Xen and B) A controlled runtime for the extension module.
+ * Initially the goal was to pacakage the extension  module as a boot-time
+ * loadable module. This may not be the way we wend up packaging it. 
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef HVM_EXTENSION_H
+#define HVM_EXTENSION_H 
+
+#include <xen/sched.h>
+#include <asm/domain.h>
+#include <xen/timer.h>
+#include <xen/time.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/domain.h>
+
+
+			
+/*
+ * Hypervisor extension hooks.
+ */
+typedef struct extension_intercept_vector {
+	/* Do not move the first field (do_continuation). Offset 
+	 * hardcoded in assembly files exits.S (VMX and SVM).
+	 */
+	void (*do_continuation)(void);
+	int (*domain_create)(struct domain *d);
+	void (*domain_destroy)(struct domain *d);
+	int (*vcpu_initialize)(struct vcpu *v);
+	void (*vcpu_destroy)(struct vcpu *v);
+	int (*do_cpuid)(uint32_t idx, struct cpu_user_regs *regs);
+	int (*do_msr_read)(uint32_t idx, struct cpu_user_regs *regs);
+	int (*do_msr_write)(uint32_t idx, struct cpu_user_regs *regs);
+	int (*do_hypercall)(struct cpu_user_regs *pregs);
+	void (*do_migrate_timers)(struct vcpu *v);
+} extension_intercept_vector_t;
+
+static inline int
+ext_intercept_domain_create(struct domain *d)
+{
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.ext_vector->domain_create(d));
+	}
+	return (0);
+}
+
+static inline void 
+ext_intercept_domain_destroy(struct domain *d)
+{
+	if (d->arch.hvm_domain.ext_vector) {
+		d->arch.hvm_domain.ext_vector->domain_destroy(d);
+	}
+}
+
+static inline int
+ext_intercept_vcpu_initialize(struct vcpu *v)
+{
+	struct domain *d = v->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.ext_vector->vcpu_initialize(v));
+	}
+	return (0);
+}
+	
+static inline void 
+ext_intercept_vcpu_destroy(struct vcpu *v)
+{
+	struct domain *d = v->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		d->arch.hvm_domain.ext_vector->vcpu_destroy(v);
+	}
+}
+
+static inline int
+ext_intercept_do_cpuid(uint32_t idx, struct cpu_user_regs *regs)
+{
+	struct domain *d = current->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.ext_vector->do_cpuid(
+								idx, regs));
+	}
+	return (0);
+}
+
+static inline int
+ext_intercept_do_msr_read(uint32_t idx, struct cpu_user_regs *regs)
+{
+	struct domain *d = current->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.
+		ext_vector->do_msr_read(idx, regs));
+	}
+	return (0);
+}
+static inline int
+ext_intercept_do_msr_write(uint32_t idx, struct cpu_user_regs *regs)
+{
+	struct domain *d = current->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.
+		ext_vector->do_msr_write(idx, regs));
+	}
+	return (0);
+}
+
+static inline int
+ext_intercept_do_hypercall(struct cpu_user_regs *regs)
+{
+	struct domain *d = current->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.
+		ext_vector->do_hypercall(regs));
+	}
+	return (0);
+}
+
+static inline void 
+ext_intercept_do_migrate_timers(struct vcpu *v)
+{
+	struct domain *d = current->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		return(d->arch.hvm_domain.
+		ext_vector->do_migrate_timers(v));
+	}
+}
+static inline void 
+ext_intercept_do_continuation(void)
+{
+	struct domain *d = current->domain;
+	if (d->arch.hvm_domain.ext_vector) {
+		d->arch.hvm_domain.
+		ext_vector->do_continuation();
+	}
+}
+
+/*
+ * Base hypervisor support available to extension modules.
+ * We may choose to do away with this level of indirection!
+ * It may still be useful to have a controlled environment for the
+ * extension modules.
+ */
+typedef struct xen_call_vector {
+	/*
+	 * We may want to embed version/compiler info here to avoid mismatches
+	 */
+	struct hvm_function_table *hvmFuncTable;
+	struct hvm_mmio_handler	*mmIoHandler;
+	void (*extPanic)(const char *s, ...);
+	void (*extPrintk)(const char *format, ...);
+	void (*extPostInterrupt)(struct vcpu *v, int vector, int type);
+	void (*extSetTimer)(struct timer *timer, s_time_t expires);
+	s_time_t  (*extGetTimeSinceBoot)(void);
+	void * (*extGetVirtFromGmfn)(struct domain *d, unsigned long gmfn);
+	unsigned long (*extGetMfnFromGmfn)(struct domain *d, unsigned long gmfn); 
+	unsigned long (*extGetMfnFromGva)(unsigned long va); 
+	void (*extUnmapDomainPage)(void *p);
+	void *(*extAllocMem)(size_t size);
+	void (*extFreeMem)(void *ptr);
+	enum hvm_copy_result (*extCopyToGuestPhysical)(paddr_t paddr, void *buf, int size);
+	enum hvm_copy_result (*extCopyFromGuestPhysical)(void *buf, paddr_t paddr, int size);
+	void *(*extAllocDomHeapPage)(void);
+	void (*extFreeDomHeapPage)(void *);
+	void * (*extGetVirtFromPagePtr)(void *);
+	void (*extVcpuPause)(struct vcpu *v);
+	void (*extVcpuUnPause)(struct vcpu *v);
+	void (*extArchGetDomainInfoCtxt)(struct vcpu *v, 
+		struct vcpu_guest_context *);
+	int (*extArchSetDomainInfoCtxt)(struct vcpu *v, 
+		struct vcpu_guest_context *);
+	int (*extCpuIsIntel)(void );
+	int (*extWrmsrHypervisorRegs)(uint32_t idx, uint32_t eax, 
+					uint32_t edx);
+	void (*extKillTimer)(struct timer *timer);
+	void (*extMigrateTimer)(struct timer *timer, unsigned int new_cpu);
+} xen_call_vector_t;
+
+#define MAX_EXTENSION_ID	1
+
+/*
+ * int hvm_ext_bind(struct domain *d, int ext_id)
+ * Bind the specified domain to the specified extension module.
+ *
+ * Calling/Exit State:
+ *	None.
+ *
+ * Remarks:
+ *	The goal is to support per-domain extension modules. Domain
+ * 	creating tools will have to specify the needed extension 
+ * 	module ID. For now it is hard coded.
+ */ 
+int hvm_ext_bind(struct domain *d, int ext_id);
+
+/*
+ * int hvm_ext_register(int ext_id, 
+ *			struct extension_intercept_vector *ext_vector,
+ *			struct xen_call_vector *xen_vector) 
+ * Register the extension module with the hypervisor
+ * Calling/Exit State:
+ *	None.
+ */
+
+int hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector,
+			struct xen_call_vector *xen_vector); 
+
+
+#endif
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/hvm_ext.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/hvm_ext.c	2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,350 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * hvm_ext.c 
+ * Glue code for implementing the extension module. 
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#include <asm/hvm/hvm_extensions.h>
+#include <xen/lib.h>
+#include <asm/event.h>
+#include <asm/shadow.h>
+#include <asm/hvm/support.h>
+#include <xen/domain_page.h>
+#include <xen/domain.h>
+#include <xen/mm.h>
+#include <xen/event.h>
+#include <xen/sched.h>
+
+
+struct extension_intercept_vector *intercept_vector;
+
+/*
+ * static void
+ * hvm_ext_inject_interrupt(struct vcpu *v, int vector, int type)
+ * Inject the specified exception to the specified virtual cpu.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void
+hvm_ext_inject_interrupt(struct vcpu *v, int vector, int type)
+{
+	struct vlapic *vlapic = vcpu_vlapic(v);
+
+	/*
+	 * XXXKYS: Check the trigger mode.
+	 */
+	if (vlapic_set_irq(vlapic, vector, 1)) {
+		vcpu_kick(v);
+	}
+}
+
+/*
+ * static void
+ * hvm_ext_set_timer(struct timer *timer, s_time_t expires)
+ * Set a timeout.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void
+hvm_ext_set_timer(struct timer *timer, s_time_t expires)
+{
+	set_timer(timer, expires);
+}
+
+/*
+ * static void
+ * hvm_ext_kill_timer(struct timer *timer)
+ * Kill the specified timer.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void
+hvm_ext_kill_timer(struct timer *timer)
+{
+	kill_timer(timer);
+}
+
+/*
+ * static void
+ * hvm_ext_migrate_timer(struct timer *timer, unsigned int new_cpu)
+ * Migrate the  timer to the new cpu.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void
+hvm_ext_migrate_timer(struct timer *timer, unsigned int new_cpu)
+{
+	migrate_timer(timer, new_cpu);
+}
+
+
+/*
+ * static void *
+ * hvm_ext_get_virt_from_gmfn(struct domain *d, unsigned long gmfn)
+ * Given a guest frame number return a virtual address at which 
+ * the specified page can be accessed in the hypervisor.
+ *
+ * Calling/Exit State:
+ * 	None.
+ */
+static void *
+hvm_ext_get_virt_from_gmfn(struct domain *d, unsigned long gmfn)
+{
+	unsigned long mfn = gmfn_to_mfn(d, gmfn);
+	if (mfn == INVALID_MFN) {
+		return (NULL);
+	}
+	return (map_domain_page_global(mfn));
+}
+
+/*
+ * static unsigned long
+ * hvm_ext_get_mfn_from_gmfn(struct domain *d, unsigned long gmfn)
+ * Get the machine frame number given the guest frame number.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static unsigned long
+hvm_ext_get_mfn_from_gmfn(struct domain *d, unsigned long gmfn)
+{
+	return (gmfn_to_mfn(d, gmfn));
+}
+
+/*
+ * static unsigned long
+ * hvm_ext_get_mfn_from_gva(unsigned long va)
+ * Given the guest virtual address return the machine frame number backing the 
+ * address.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static unsigned long
+hvm_ext_get_mfn_from_gva(unsigned long va)
+{
+	uint32_t pfec = PFEC_page_present;
+	unsigned long gfn;
+	gfn = paging_gva_to_gfn(current, va, &pfec);
+	return (gmfn_to_mfn((current->domain), gfn));
+}
+
+/*
+ * static void *
+ * hvm_ext_alloc_mem(size_t size)
+ * Allocate specified bytes of memory.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void *
+hvm_ext_alloc_mem(size_t size)
+{
+	return (xmalloc_bytes(size));
+}
+
+/*
+ * static void *
+ * hvm_ext_alloc_domheap_page(void)
+ * Allocate a page from the per-domain heap.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void *
+hvm_ext_alloc_domheap_page(void)
+{
+	return (alloc_domheap_page(NULL));
+}
+
+/*
+ * static void
+ * hvm_ext_free_domheap_page(void *p)
+ * Free a dom heap page.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void
+hvm_ext_free_domheap_page(void *p)
+{
+	free_domheap_pages(p, 0);
+}
+
+/*
+ * static void *
+ * hvm_ext_get_virt_from_page_ptr(void *page)
+ * Map the specified page a return a hypervisor VA.
+ *
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void *
+hvm_ext_get_virt_from_page_ptr(void *page)
+{
+	struct page_info *pg = page;
+	unsigned long mfn = page_to_mfn(pg);
+	return (map_domain_page_global(mfn));
+}
+
+extern struct cpuinfo_x86 boot_cpu_data;
+
+/*
+ * static int
+ * hvm_ext_cpu_is_intel(void)
+ * Check if the CPU vendor is Intel.
+ *
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static int
+hvm_ext_cpu_is_intel(void)
+{
+	if (boot_cpu_data.x86_vendor == 0) {
+		return (1);
+	}
+	return (0);
+}
+
+/*
+ * int 
+ * hvm_ext_bind(struct domain *d, int ext_id)
+ * Bind the specified domain with the specified extension module.
+ *
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+int 
+hvm_ext_bind(struct domain *d, int ext_id)
+{
+	int i;
+	/*
+	 * XXXKYS: Assuming that this function will be called before the
+	 * new domain begins to run. It is critical that this be the case.
+	 */
+	if (ext_id == 0) {
+		/*
+		 * This is the default value for this parameter.
+		 */
+		return (0);
+	}
+	d->arch.hvm_domain.ext_vector = intercept_vector; 
+	/*
+	 * Let the extension initialize its state.
+	 */
+	if (intercept_vector->domain_create(d)) {
+		return (1);
+	}
+	for (i=0; i < MAX_VIRT_CPUS; i++) {
+		if (d->vcpu[i] != NULL) {
+			if (intercept_vector->vcpu_initialize(d->vcpu[i])) {
+				int j;
+				for (j= (i-1); j >=0; j--) {
+					intercept_vector->vcpu_destroy(
+					d->vcpu[j]);
+				}	
+				intercept_vector->domain_destroy(d);
+				return (1);
+			}
+		}
+	}
+	return (0);
+}
+
+
+void extPanic(const char *fmt, ...)
+{
+	domain_crash_synchronous();
+}
+
+/*
+ * For now we will support only one extension; id==1!
+ */
+
+extern struct hvm_function_table hvm_funcs;
+extern struct hvm_mmio_handler vlapic_mmio_handler;
+
+/*
+ * int 
+ * hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector,
+ *
+ * Register the invoking extension module with the hypervisor.
+ *
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+int 
+hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector,
+                        struct xen_call_vector *xen_vector)
+{
+	ASSERT(ext_id == 1);
+	intercept_vector = ext_vector;
+	/*
+	 * Populate the vector of services from the xen side; ultimately
+	 * we may decide to get rid of this level of indirection; it may 
+	 * still be useful to limit the breadth of xen dependency here.
+	 */
+	xen_vector->hvmFuncTable = &hvm_funcs;
+	xen_vector->mmIoHandler = &vlapic_mmio_handler;
+	xen_vector->extPanic = extPanic;
+	xen_vector->extPrintk = printk;
+	xen_vector->extPostInterrupt = hvm_ext_inject_interrupt;
+	xen_vector->extSetTimer = hvm_ext_set_timer;
+	xen_vector->extKillTimer = hvm_ext_kill_timer;
+	xen_vector->extMigrateTimer = hvm_ext_migrate_timer;
+	xen_vector->extGetTimeSinceBoot = get_s_time;
+	xen_vector->extGetVirtFromGmfn = hvm_ext_get_virt_from_gmfn;
+	xen_vector->extGetMfnFromGmfn = hvm_ext_get_mfn_from_gmfn;
+ 
+	xen_vector->extGetMfnFromGva = hvm_ext_get_mfn_from_gva;
+#ifdef CONFIG_DOMAIN_PAGE
+	xen_vector->extUnmapDomainPage = unmap_domain_page_global;
+#endif
+	xen_vector->extAllocMem = hvm_ext_alloc_mem;
+	xen_vector->extFreeMem = xfree;
+	xen_vector->extCopyToGuestPhysical = hvm_copy_to_guest_phys;
+	xen_vector->extCopyFromGuestPhysical = hvm_copy_from_guest_phys;
+	xen_vector->extAllocDomHeapPage = hvm_ext_alloc_domheap_page;
+	xen_vector->extFreeDomHeapPage = hvm_ext_free_domheap_page;
+	xen_vector->extGetVirtFromPagePtr = hvm_ext_get_virt_from_page_ptr;
+	xen_vector->extVcpuPause = vcpu_pause;
+	xen_vector->extVcpuUnPause = vcpu_unpause;
+	xen_vector->extArchGetDomainInfoCtxt = arch_get_info_guest; 
+	xen_vector->extArchSetDomainInfoCtxt = arch_set_info_guest; 
+	xen_vector->extCpuIsIntel = hvm_ext_cpu_is_intel;
+	xen_vector->extWrmsrHypervisorRegs = wrmsr_hypervisor_regs; 
+	
+	return 0;
+}
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/Makefile
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/Makefile	2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,2 @@
+obj-y += nsintercept.o
+obj-y += nshypercall.o
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/ns_errno.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/ns_errno.h	2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,62 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * ns_errno.h
+ * Error codes for the  Novell Shim.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef NS_ERRNO_H
+#define NS_ERRNO_H
+
+#define NS_STATUS_SUCCESS			0x0000
+#define NS_STATUS_INVALID_HYPERCALL_CODE	0x0002
+#define NS_STATUS_INVALID_HYPERCALL_INPUT	0x0003
+#define NS_STATUS_INVALID_ALIGNMENT		0x0004
+#define NS_STATUS_INVALID_PARAMETER		0x0005
+#define NS_STATUS_ACCESS_DENIED			0x0006
+#define NS_STATUS_INVALID_PARTITION_STATE	0x0007
+#define NS_STATUS_OPERATION_DENIED		0x0008
+#define NS_STATUS_UNKNOWN_PROPERTY		0x0009
+#define NS_STATUS_PROPERTY_VALUE_OUT_OF_RANGE	0x000A
+#define NS_STATUS_INSUFFICIENT_MEMORY		0x000B
+#define NS_STATUS_PARTITION_TOO_DEEP		0x000C
+#define NS_STATUS_INVALID_PARTITION_ID		0x000D
+#define NS_STATUS_INVALID_VP_INDEX		0x000E
+#define NS_STATUS_UNABLE_TO_RESTORE_STATE	0x000F
+#define NS_STATUS_NOT_FOUND			0x0010
+#define NS_STATUS_INVALID_PORT_ID		0x0011
+#define NS_STATUS_INVALID_CONNECTION_ID		0x0012
+#define NS_STATUS_INSUFFICIENT_BUFFERS		0x0013
+#define NS_STATUS_NOT_ACKNOWLEDGED		0x0014
+#define NS_STATUS_INVALID_VP_STATE		0x0015
+#define NS_STATUS_ACKNOWLEDGED			0x0016
+#define NS_STATUS_INVALID_SAVE_RESTORE_STATE	0x0017
+#define	NS_STATUS_NO_MEMORY_4PAGES		0x0100
+#define	NS_STATUS_NO_MEMORY_16PAGES		0x0101
+#define	NS_STATUS_NO_MEMORY_64PAGES		0x0102
+#define	NS_STATUS_NO_MEMORY_256PAGES		0x0103
+#define	NS_STATUS_NO_MEMORY_1024PAGES		0x0104
+#endif 	
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/ns_shim.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/ns_shim.h	2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,480 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * Novell Shim Implementation.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef NS_SHIM_H
+#define NS_SHIM_H
+
+#include <xen/sched.h>
+#include <xen/types.h>
+#include <xen/timer.h>
+#include <asm/current.h>
+#include <asm/domain.h>
+#include <asm/shadow.h>
+#include <public/xen.h>
+
+#include "nshypercall.h"
+
+/*
+ * Synthetic MSR addresses
+ */
+#define NS_MSR_GUEST_OS_ID	0x40000000
+#define NS_MSR_HYPERCALL	0x40000001
+#define NS_MSR_VP_INDEX		0x40000002
+#define NS_MSR_SYSTEM_RESET	0x40000003
+#define NS_MSR_TIME_REF_COUNT	0x40000020
+#define NS_MSR_EOI		0x40000070
+#define NS_MSR_ICR		0x40000071
+#define NS_MSR_TPR		0x40000072
+
+#define NS_MSR_SCONTROL		0x40000080
+#define NS_MSR_SVERSION		0x40000081
+#define NS_MSR_SIEFP		0x40000082
+#define NS_MSR_SIMP		0x40000083
+#define NS_MSR_SEOM		0x40000084
+#define NS_MSR_SINT0		0x40000090
+#define NS_MSR_SINT1		0x40000091
+#define NS_MSR_SINT2		0x40000092
+#define NS_MSR_SINT3		0x40000093
+#define NS_MSR_SINT4		0x40000094
+#define NS_MSR_SINT5		0x40000095
+#define NS_MSR_SINT6		0x40000096
+#define NS_MSR_SINT7		0x40000097
+#define NS_MSR_SINT8		0x40000098
+#define NS_MSR_SINT9		0x40000099
+#define NS_MSR_SINT10		0x4000009A
+#define NS_MSR_SINT11		0x4000009B
+#define NS_MSR_SINT12		0x4000009C
+#define NS_MSR_SINT13		0x4000009D
+#define NS_MSR_SINT14		0x4000009E
+#define NS_MSR_SINT15		0x4000009F
+
+#define NS_MSR_TIMER0_CONFIG	0x400000B0
+#define NS_MSR_TIMER0_COUNT	0x400000B1
+#define NS_MSR_TIMER1_CONFIG	0x400000B2
+#define NS_MSR_TIMER1_COUNT	0x400000B3
+#define NS_MSR_TIMER2_CONFIG	0x400000B4
+#define NS_MSR_TIMER2_COUNT	0x400000B5
+#define NS_MSR_TIMER3_CONFIG	0x400000B6
+#define NS_MSR_TIMER3_COUNT	0x400000B7
+
+/*
+ * MSR for supporting PV drivers on longhorn.
+ */
+#define NS_MSR_PVDRV_HCALL	0x40001000
+
+/*
+ * MSR for supporting other enlightened oses.
+ */
+#define NS_MSR_NONLH_GUEST_OS_ID	0x40001000
+
+/*
+ * Novell Shim VCPU flags.
+ * A VCPU is considered up when it is capable of invoking hypercalls.
+ */
+#define NS_VCPU_BOOT_CPU	0x00000001
+#define NS_VCPU_UP		0x00000002
+
+/*
+ * Novell shim flush flags.
+ */
+
+#define NS_FLUSH_TLB		0X01
+#define NS_FLUSH_INVLPG		0X02
+
+/*
+ * We use the following global state to manage TLB flush requests from the 
+ * guest. At most only one flush can be active in the guest; we may have to
+ * revisit this if this is a bottleneck.
+ */
+typedef struct nsGlobalFlushState {
+	int	cpuCount; //0 unused; else #cpus participating
+	cpumask_t	waiters; //Cpus waiting for the flush block
+	struct vcpu	*currentOwner;
+	u64	retVal;
+	flushVa_t	*flushParam;
+	unsigned short	repCount;
+} nsGlobalFlushState_t;
+	
+typedef struct nsSpinLock {
+	unsigned long	flags;
+	spinlock_t	spinLock;
+	struct nsVcpu	*owner;
+	void		*retAddr;
+} nsSpinLock_t;
+
+/*
+ * Novell shim message structure.
+ */
+typedef enum {
+	/*
+	 * For now we only support timer messages
+	 */
+	nsMessageTypeNone = 0x00000000,
+	nsMessageTimerExpired = 0x80000010
+} nsMessageType;
+
+typedef struct nsTimerMessage {
+	nsMessageType	messageType;
+	u8		pad1[3];
+	u8		messageSize;
+	u32		timerIndex;
+	u32		pad2;
+	u64		expirationTime;
+} nsTimerMessage_t;
+
+typedef struct nsMessage {
+	nsMessageType	messageType;
+	uint8_t		messageSize;
+	uint8_t		flags;
+	uint8_t		reserved[2];
+	uint32_t		reserved1;
+	uint64_t		payLoad[30];
+} nsMessage_t;
+
+
+typedef struct nsVcpTimerState {
+	u64	config;
+	u64	count;	/*expiration time in 100ns units*/
+	int	timerIndex;
+	struct nsVcpu	*thisCpu;
+	struct timer	vcpuTimer;
+} nsVcpTimerState_t;
+
+/*
+ * Stats structure.
+ */
+
+typedef struct {
+	u64	numSwitches;
+	u64	numFlushes;
+	u64	numFlushesPosted;
+	u64	numFlushRanges;
+	u64	numFlushRangesPosted;
+
+	u64	numTprReads;
+	u64	numIcrReads;
+	u64	numEoiWrites;
+	u64	numTprWrites;
+	u64	numIcrWrites;
+
+	u64	numGFSAcquires;
+	u64	numGFSReleases;
+	u64	numTlbFlushes;
+	u64	numInvlPages;
+	u64	numTimeOuts;
+} nsVcpuStats_t;
+
+typedef struct nsVcpu {
+	/*
+	 * Per-vcpu state to support the Novell shim; 
+	 */
+	int		nsVcplockDepth; 
+	unsigned long	nsVcpuFlags;
+	unsigned char 	nsVcpFlushRequest;
+	unsigned char	nsVcpWaitingOnGFS;
+	unsigned char	nsVcpFlushPending;
+	unsigned char	nsVcpWaitingForCleanup;
+	unsigned short	nsVcpRepCount;
+	/*
+	 * Synthetic msrs.
+	 */
+	u64	nsVcpSControlMsr;
+	u64	nsVcpSVersionMsr;
+	u64	nsVcpSIefpMsr;
+	u64	nsVcpSimpMsr;
+	u64	nsVcpEomMsr;
+
+	u64	nsVcpSIntMsr[16];
+	/*
+	 * Timer MSRs.
+	 */
+	nsVcpTimerState_t	nsVcpTimers[4];
+	void	*nsVcpSiefPage;
+	void	*nsVcpSimPage;
+	/*
+	 * Hypercall input/output processing.
+	 * We keep these pages mapped in the hypervisor space.
+	 */
+	void	*nsVcpInputBuffer; /*input buffer virt address*/
+	void	*nsVcpInputBufferPage; /*input buffer struct page */
+	void	*nsVcpOutputBuffer; /*output buffer virt address*/
+	void	*nsVcpOutputBufferPage; /*output buffer struct page */
+	struct vcpu	*nsVcpXenVcpu; /*corresponding xen vcpu*/
+	nsVcpuStats_t	nsVcpStats;
+} nsVcpu_t;
+
+/*
+ * Events of interest for gathering stats.
+ */
+#define NS_CSWITCH	1
+#define NS_FLUSH_VA_STAT	2
+#define NS_FLUSH_RANGE	3
+#define NS_FLUSH_VA_POSTED 4
+#define NS_FLUSH_RANGE_POSTED 5
+#define NS_TPR_READ	6
+#define NS_ICR_READ	7
+#define NS_TPR_WRITE	8	
+#define NS_ICR_WRITE	9
+#define NS_EOI_WRITE	10
+
+#define NS_GFS_ACQUIRE	11	
+#define NS_GFS_RELEASE	12
+#define NS_TLB_FLUSH	13
+#define NS_INVL_PG	14	
+#define NS_TIMEOUTS	15	
+
+void nsCollectStats(int event, nsVcpuStats_t *ststp); 
+
+#define NS_STATS //KYS: Temporary
+
+#ifdef NS_STATS
+#define NS_STATS_COLLECT(event, statp) nsCollectStats(event, statp)
+#else
+define NS_STATS_COLLECT(event, statp)
+#endif
+
+typedef struct nsPartition {
+	/*
+	 * State maintained on a per guest basis to implement 
+	 * the Novell shim.
+	 */
+	nsSpinLock_t	nsLock;
+	atomic_t	nsNumVcpusActive;
+	u64		nsGuestIdMsr;
+	u64		nsHypercallMsr;
+	u64		nsPrivileges;
+	u64		nsSupportedFeatures;
+	unsigned long	nsHypercallMfn;
+	int		nsLongModeGuest;
+	/*
+	 * Each VCPU here corresponds to the vcpu in the underlying hypervisor;
+	 * they share the same ID.
+	 */
+	nsVcpu_t	nsVcpuState[MAX_VIRT_CPUS];
+	nsGlobalFlushState_t nsFlushState;
+} nsPartition_t;
+
+/*
+ * Max CPUID leaves supported.
+ */
+
+#define NX_MAX_CPUID_LEAVES	5
+
+/*
+ * We don't want to intercept instructions coming from the hvm bootstrap code.
+ *
+ */
+#define NS_BIOS_HIGH_ADDR 
+/*
+ * Privilege flags.
+ */
+
+#define NS_ACCESS_VP_RUNTIME	(1ULL << 0)
+#define NS_ACCESS_TIME_REF_CNT	(1ULL << 1)
+#define NS_ACCESS_SYNC_MSRS	(1ULL << 2)
+#define NS_ACCESS_SYNC_TIMERS	(1ULL << 3)
+#define NS_ACCESS_APIC_MSRS	(1ULL << 4)
+#define NS_ACCESS_PARTITION_ID	(1ULL << 33)
+	
+#define nsGetCurrentPartition() \
+((current)->domain->arch.hvm_domain.ext_handle)
+
+#define nsGetCurrentVcpuIndex() (current)->vcpu_id
+
+#define NS_PANIC(x) \
+do {\
+	nsXenVector.extPrintk("File is: %s\n", __FILE__);\
+	nsXenVector.extPrintk("Line is: %d\n", __LINE__);\
+	nsXenVector.extPanic((x));\
+} while (0);
+
+#define NS_ASSERT(x) \
+do {\
+	if (!(x)) \
+		NS_PANIC("ASSERTION FAILED\n")\
+} while (0);
+
+#define nsDebugPrint(x)	\
+do { \
+	nsXenVector.extPrintk("File is: %s\n", __FILE__);\
+	nsXenVector.extPrintk("Line is: %d\n", __LINE__);\
+	nsXenVector.extPrintk((x));\
+} while (0);
+
+/* Hooks into Xen */
+extern xen_call_vector_t nsXenVector;
+
+/*
+ * static inline int
+ * nsInvalidCpuState(void)
+ * Check to see if the calling CPU is in the "correct state" to invoke 
+ * the functionality implemented in the Novell Shim (Adaptor).
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline int
+nsInvalidCpuState(void)
+{
+	int cpuState;
+	cpuState = nsXenVector.hvmFuncTable->guest_x86_mode(current);
+	if ((cpuState == 4) || (cpuState == 8)) {
+		return (0);
+	}
+	return (1);
+}
+
+/*
+ * inline u64
+ * nsBuildHcallRetVal(int code, int reps)
+ *
+ * Given the return code and the number of successfully completed count, 
+ * compose a return value compliant with the Viridian specification.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+ 
+static inline u64
+nsBuildHcallRetVal(int code, int reps)
+{
+	u64	retVal=0;
+	retVal |= (code & 0xff);
+	retVal |= (((long long)(reps & 0xfff)) << 32);
+	return (retVal);
+}
+
+
+/*
+ * static inline void  nsSetSysCallRetVal(struct cpu_user_regs *pregs, 
+ *				int longModeGuest, u64 retVal)
+ * Set the return value in the saved guest registers
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline void  nsSetSysCallRetVal(struct cpu_user_regs *pregs, 
+				int longModeGuest, u64 retVal)
+{
+	if (longModeGuest) {
+		pregs->eax = retVal;
+	} else {
+		pregs->edx = (u32)(retVal >> 32);
+		pregs->eax = (u32)(retVal);
+	}
+}
+
+/*
+ * static inline int 
+ * nsPrivilegeCheck(nsPartition_t *curp, u64 flags)
+ * Check if the caller is privileged to perform the operation 
+ * specified by the flags argument.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline int 
+nsPrivilegeCheck(nsPartition_t *curp, u64 flags)
+{
+	return ((curp->nsPrivileges & flags)? 1: 0);
+}
+
+/* void
+ * nsHandleHyperCall(u64 opcode, u64 input, u64 output, 
+ *		  u64 *retVal);
+ * Common entry point for handling all the extension hypercalls.
+ *
+ * Calling/Exit State:
+ *	Based on the hypercall; the caller may give up the CPU while 
+ * 	processing the hypercall. No locks should be held on entry and 
+ *	no locks will be held on return.
+ *		
+ */
+void
+nsHandleHyperCall(u64 opcode, u64 input, u64 output, 
+		  u64 *retVal);
+
+/*
+ * void nsDoTlbFlush(void);
+ * Perform TLB flush on the invoking virtual CPU.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+void nsDoTlbFlush(void);
+
+/*
+ * void
+ * nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Acquire the specified lock.
+ *
+ * Calling/Exit State:
+ *      None.
+ */
+
+void nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *lock);
+
+/*
+ * void
+ * nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Release the specified spin lock.
+ *
+ * Calling/Exit State:
+ *      None.
+ */
+
+void nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *lock);
+
+/*
+ * void
+ * nsLockInit(nsSpinLock_t *nsLock)
+ * Initialize the specified spin lock.
+ *
+ * Calling/Exit State:
+ *      None.
+ */
+
+void nsLockInit(nsSpinLock_t *lock); 
+
+/*
+ * void nsPrintStats(nsPartition_t *curp, int i)
+ * Print the per-vcpu stats for the specified partition.
+ *
+ * Calling/Exit State:
+ *      None.
+ */
+
+void nsPrintStats(nsPartition_t *curp, int i);
+
+#define NS_LOCK_OWNED(v, l) \
+((l)->owner == (v))
+#endif /*NS_SHIM_H */
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.c	2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,1220 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * nshypercall.c.
+ * This file implements the hypercall component of the Novell Shim. Hopefully
+ * we can host this component either as a driver in the guest or an extension 
+ * to the Xen hypervisor.
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/hvm/support.h>
+#include <xen/cpumask.h>
+#include <xen/event.h>
+
+#include <asm/hvm/hvm_extensions.h>
+#include "ns_shim.h"
+#include "ns_errno.h"
+#include "nshypercall.h"
+
+
+
+void nsDoTlbFlush(void);
+static void 
+nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup);
+
+
+
+/*
+ * void nsCollectStats(int event, nsVcpuStats_t *statsp)
+ * Collect stats.
+ *
+ * Calling/Exit State:
+ * 	None.
+ */
+
+void nsCollectStats(int event, nsVcpuStats_t *statsp)
+{
+	switch (event) {
+	case NS_CSWITCH:
+		statsp->numSwitches++;
+		return;
+	case NS_FLUSH_VA:
+		statsp->numFlushes++;
+		return;
+	case NS_FLUSH_RANGE:
+		statsp->numFlushRanges++;
+		return;
+	case NS_FLUSH_VA_POSTED: 
+		statsp->numFlushesPosted++;
+		return;
+	case NS_FLUSH_RANGE_POSTED:
+		statsp->numFlushRangesPosted++;
+		return;
+	case NS_TPR_READ:
+		statsp->numTprReads++;
+		return;
+	case NS_ICR_READ:
+		statsp->numIcrReads++;
+		return;
+	case NS_TPR_WRITE: 
+		statsp->numTprWrites++;
+		return;
+	case NS_ICR_WRITE:
+		statsp->numIcrWrites++;
+		return;
+	case NS_EOI_WRITE:
+		statsp->numEoiWrites++;
+		return;
+
+	case NS_GFS_ACQUIRE:
+		statsp->numGFSAcquires++;
+		return;
+	case NS_GFS_RELEASE:
+		statsp->numGFSReleases++;
+		return;
+	case NS_TLB_FLUSH:
+		statsp->numTlbFlushes++;
+		return;
+	case NS_INVL_PG:
+		statsp->numInvlPages++;
+		return;
+	}
+}
+
+/*
+ * void
+ * nsPrintStats(nsPartition_t *curp, int i)
+ * Print stats.
+ *
+ * Calling/Exit State:
+ * 	None.
+ */
+void
+nsPrintStats(nsPartition_t *curp, int i)
+{
+	nsVcpu_t *v;
+	v = &curp->nsVcpuState[i];
+	printk("Printing stats for vcpu ID: %d\n", i);
+	printk("Flush pending: %d\n", (int)v->nsVcpFlushPending);
+	printk("Flush Request: %d\n", (int)v->nsVcpFlushRequest);
+	printk("Waiting on GFS: %d\n", (int)v->nsVcpWaitingOnGFS);
+	printk("Waiting for cleanup: %d\n", (int)v->nsVcpWaitingForCleanup);
+
+	printk("Number of context switches: %lu\n", v->nsVcpStats.numSwitches);
+	printk("Number of flushes: %lu\n", v->nsVcpStats.numFlushes);
+	printk("Number of flushes posted: %lu\n", v->nsVcpStats.numFlushesPosted);
+	printk("Number of flush ranges: %lu\n", v->nsVcpStats.numFlushRanges);
+	printk("Number of flush ranges posted: %lu\n", v->nsVcpStats.numFlushRangesPosted);
+	printk("Number of TPR reads: %lu\n", v->nsVcpStats.numTprReads);
+	printk("Number of ICR reads: %lu\n", v->nsVcpStats.numIcrReads);
+	printk("Number of Eoi writes: %lu\n", v->nsVcpStats.numEoiWrites);
+	printk("Number of Tpr writes: %lu\n", v->nsVcpStats.numTprWrites);
+	printk("Number of Icr writes: %lu\n", v->nsVcpStats.numIcrWrites);
+	printk("Number of GFS acuires: %lu\n", v->nsVcpStats.numGFSAcquires);
+	printk("Number of GFS releases: %lu\n", v->nsVcpStats.numGFSReleases);
+	printk("Number of TLB flushes: %lu\n", v->nsVcpStats.numTlbFlushes);
+	printk("Number of INVLPG flushes: %lu\n", v->nsVcpStats.numInvlPages);
+	printk("Number of TIMEOUTS: %lu\n", v->nsVcpStats.numTimeOuts);
+
+}
+
+/*
+ * static inline void nsWakeupWaiters(nsPartition_t *curp)
+ * Wakeup all the VCPUs that may be blocked on the Global
+ * flush state waiting to exclusively own the global flush
+ * state.
+ *
+ * Calling/Exit State:
+ * 	The partition-wide spin lock nsLock is held on entry and 
+ *	this lock is held on exit.
+ */
+static inline void nsWakeupWaiters(nsPartition_t *curp)
+{
+	int i;
+	if (!cpus_empty(curp->nsFlushState.waiters)) {
+		/*
+ 	 	 * Need to wakeup potential waiters that 
+	 	 * are waiting for the 
+ 	 	 * flush block to become available.
+ 	 	 */
+		for (i=0; i < MAX_VIRT_CPUS; i++) {
+			struct vcpu	*curVcpu;
+			if (!cpu_isset(i, curp->nsFlushState.waiters))
+				continue;
+			curVcpu = 
+			curp->nsVcpuState[i].nsVcpXenVcpu;
+			NS_ASSERT(curVcpu != NULL);
+			if ( test_and_clear_bit(_VPF_blocked_in_xen,
+                          	     &curVcpu->pause_flags) ) {
+               			vcpu_wake(curVcpu);
+			}
+		}
+		cpus_clear(curp->nsFlushState.waiters);
+	}
+}
+
+/*
+ * static void nsAcquireGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup)
+ * Acquire the global flush state for exclusive use by the calling
+ * VCPU.
+ *
+ * Calling/Exit State:
+ * 	On entry nsLock is held and this lock is held on exit. If the calling
+ *	VCPU is required to give up the CPU, this lock will be dropped.
+ */
+static void nsAcquireGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup)
+{
+acquireGFSAgain:
+	NS_ASSERT(vcpup->nsVcpWaitingOnGFS == 0);
+	NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0);
+	NS_ASSERT(NS_LOCK_OWNED(vcpup, &curp->nsLock));
+	if (curp->nsFlushState.currentOwner != NULL) {
+		/*
+		 * Somebody is in the midst of flushing; deal with this 
+		 * situation.
+		 */
+		/*
+		 * We need to wait for the current flush sequence
+		 * to end.
+		 */
+		vcpup->nsVcpWaitingOnGFS = 0;
+		NS_ASSERT(curp->nsFlushState.currentOwner != current);
+		NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0);
+		if (vcpup->nsVcpFlushPending) {
+			nsLockRelease(vcpup, &curp->nsLock);
+			nsDoTlbFlush();
+			nsLockAcquire(vcpup, &curp->nsLock);
+		}
+		vcpup->nsVcpWaitingOnGFS = 1;
+		cpu_set(current->vcpu_id, curp->nsFlushState.waiters);
+		nsLockRelease(vcpup, &curp->nsLock);
+		wait_on_xen_event_channel(0, (curp->nsFlushState.currentOwner == NULL));
+		nsLockAcquire(vcpup, &curp->nsLock);
+		vcpup->nsVcpWaitingOnGFS = 0;
+		NS_ASSERT(curp->nsFlushState.currentOwner != current);
+		NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0);
+		if (vcpup->nsVcpFlushPending) {
+			nsLockRelease(vcpup, &curp->nsLock);
+			nsDoTlbFlush();
+			nsLockAcquire(vcpup, &curp->nsLock);
+		}
+		goto acquireGFSAgain;
+	}
+	vcpup->nsVcpWaitingOnGFS = 0;
+	curp->nsFlushState.repCount = vcpup->nsVcpRepCount; 
+	curp->nsFlushState.flushParam = 
+	vcpup->nsVcpInputBuffer;
+	NS_STATS_COLLECT(NS_GFS_ACQUIRE, &vcpup->nsVcpStats);
+}
+
+/*
+ * static void nsReleaseGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup,
+ *					int lockOwned)
+ * There can at most be one TLB flush event active in the system. All of the
+ * VCPUs that are part of the flush sequence need to relaese their hold
+ * on the global flush object before the global flush object can be freed.
+ * This function manages the release of the global flush object.
+ * If the "lockOwned" parameter is non-zero; on entry the nsLock is held.
+ *
+ * Calling/Exit State:
+ * 	The current owner of GFS may be forced to give up the CPU.
+ *	On exit nsLock is  held.
+ */
+static void nsReleaseGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup,
+					int lockOwned)
+{
+	if (!lockOwned) {
+		nsLockAcquire(vcpup, &curp->nsLock);
+	}
+	NS_ASSERT(curp->nsFlushState.cpuCount >= 0);
+	NS_ASSERT(curp->nsFlushState.currentOwner != NULL);
+
+	if (vcpup->nsVcpFlushPending) {
+		curp->nsFlushState.cpuCount--;
+		NS_ASSERT(curp->nsFlushState.cpuCount >= 0);
+		vcpup->nsVcpFlushPending = 0;
+	}
+	
+nsReleaseGFS:
+	if (curp->nsFlushState.cpuCount > 0) {
+		if (curp->nsFlushState.currentOwner == current)  {
+			/*
+			 * We are the initiator; need to wait for 
+			 * others to complete.
+	 		 */
+			nsWakeupWaiters(curp);
+			vcpup->nsVcpWaitingForCleanup = 1;
+			nsLockRelease(vcpup, &curp->nsLock);
+			wait_on_xen_event_channel(0,(curp->nsFlushState.cpuCount == 0));
+			nsLockAcquire(vcpup, &curp->nsLock);
+			vcpup->nsVcpWaitingForCleanup = 0;
+			goto nsReleaseGFS;
+		} else {
+			return;
+		}
+	}
+	NS_ASSERT(curp->nsFlushState.cpuCount == 0);
+	if (curp->nsFlushState.currentOwner == current) {
+		/* We are the current owner; do the final cleanup.
+		 * But first set the return value. This has been stashed
+		 * before we blocked.
+	 	 */
+		NS_STATS_COLLECT(NS_GFS_RELEASE, &vcpup->nsVcpStats);
+		vcpup->nsVcpFlushRequest = 0;
+		vcpup->nsVcpFlushPending = 0;
+		vcpup->nsVcpWaitingForCleanup = 0;
+		nsSetSysCallRetVal(guest_cpu_user_regs(), 
+				   curp->nsLongModeGuest, 
+				   curp->nsFlushState.retVal);
+		curp->nsFlushState.cpuCount = 0;
+		curp->nsFlushState.currentOwner = NULL;
+		curp->nsFlushState.retVal = 0;
+		curp->nsFlushState.flushParam = NULL;
+		curp->nsFlushState.repCount = 0;
+		nsWakeupWaiters(curp);
+	} else {
+		/*
+		 * We are not the owner; wakeup the owner.
+		 */
+		if ( test_and_clear_bit(_VPF_blocked_in_xen,
+                            &(curp->nsFlushState.currentOwner->pause_flags))){
+			vcpu_wake(curp->nsFlushState.currentOwner);
+		}
+	}
+}
+	
+
+/*
+ * static inline int nsFlushPermitted(nsVcpu_t *vcpup)
+ * Check to see if we can execute a TLB flush on the calling vcpu.
+ *
+ * Calling/Exit State:
+ * 	None.
+ */
+static inline int nsFlushPermitted(nsVcpu_t *vcpup)
+{
+	if (!hvm_paging_enabled(current)) {
+		return (0);
+	}
+	if (current->arch.hvm_vmx.vmxassist_enabled) {
+		return (0);
+	}
+	if (nsInvalidCpuState()) {
+		return (0);
+	}
+
+	return (1);
+}
+	
+/*
+ * void
+ * nsDoTlbFlush(void)
+ *	Perform flush operations based on the state of GFS. VCPUs may be
+ *	forced to relinquish the physical CPU while attempting to flush; in 
+ *	those events, thi is also the continuation point for execution.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+void
+nsDoTlbFlush(void)
+{
+	nsPartition_t   *curp = nsGetCurrentPartition();
+        nsVcpu_t	*vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+	flushVa_t  	*flushArgp;
+	int 		i,j, numPages;
+	u64		*pgList;
+	long		baseVa;
+	unsigned short 	repCount;
+
+	NS_ASSERT(local_irq_is_enabled());
+
+	NS_ASSERT(vcpup->nsVcplockDepth == 0);
+
+	nsLockAcquire(vcpup, &curp->nsLock);
+	if (vcpup->nsVcpWaitingForCleanup) {
+		/*
+		 * This is the continuation point for us; cleanup
+		 * the global flush state.
+		 */
+		vcpup->nsVcpWaitingForCleanup =0;
+		NS_ASSERT(curp->nsFlushState.currentOwner == current);
+		nsReleaseGlobalFlushState(curp, vcpup, 1);
+	} else if (vcpup->nsVcpWaitingOnGFS)  { 
+		/*
+		 * This is the continuation point for us; acquire
+		 * GFS and proceed with our flush operation.
+		 */
+		vcpup->nsVcpWaitingOnGFS =0; 
+		nsAcquireGlobalFlushState(curp, vcpup);
+		/*
+		 * Now do the rest of the syscall processing
+		 */
+		nsFlushPostProcess(curp, vcpup);
+	}
+	if (!vcpup->nsVcpFlushPending) {
+		nsLockRelease(vcpup, &curp->nsLock);
+		return;
+	}
+	flushArgp = curp->nsFlushState.flushParam;
+	repCount  = curp->nsFlushState.repCount;
+	/*
+	 * At this point a flush has been posted; see if we can perform a
+	 * flush given our state.
+	 */
+	if (!nsFlushPermitted(vcpup)) {
+		nsReleaseGlobalFlushState(curp, vcpup, 1);
+		nsLockRelease(vcpup, &curp->nsLock);
+		NS_ASSERT(vcpup->nsVcplockDepth == 0);
+		return;
+	}
+	nsLockRelease(vcpup, &curp->nsLock);
+	if (vcpup->nsVcpFlushPending & NS_FLUSH_TLB) {
+		NS_STATS_COLLECT(NS_TLB_FLUSH, &vcpup->nsVcpStats);
+		paging_update_cr3(current);
+	} else {
+		pgList = &flushArgp->gva;
+		NS_ASSERT(vcpup->nsVcpFlushPending == NS_FLUSH_INVLPG);
+		NS_ASSERT(pgList != NULL);
+		NS_ASSERT(repCount >=1);
+		NS_STATS_COLLECT(NS_INVL_PG, &vcpup->nsVcpStats);
+		for (i = 0; i < repCount; i++) {
+			baseVa = (long)(pgList[i] & PAGE_MASK);
+			numPages = (int)(~baseVa & pgList[i]);
+			for (j = 0; j <= numPages; j++) {
+				if (paging_invlpg(current, 
+				    (baseVa + (j << PAGE_SHIFT)))) {
+					flush_tlb_one_local((baseVa + 
+					(j<< PAGE_SHIFT)));
+				}
+				//KYS: need to deal with ASIDS
+			}
+		}
+	}
+	/*
+	 * Do post processing on the global flush state. 
+	 */
+	nsReleaseGlobalFlushState(curp, vcpup, 0);
+	nsLockRelease(vcpup, &curp->nsLock);
+	NS_ASSERT(vcpup->nsVcplockDepth == 0);
+}				
+			
+/*
+ * static int
+ * nsGetVpRegisters(paddr_t input, paddr_t output)
+ * Get the VCP register state.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static int
+nsGetVpRegisters(paddr_t input, paddr_t output)
+{
+	nsVcpu_t        *vcpup, *targetp;
+	nsPartition_t   *curp = nsGetCurrentPartition();
+	getVpRegistersInput_t	*inBuf;
+	getVpRegistersOutput_t	*outBuf;
+	struct vcpu_guest_context	*vcpuCtx;
+	u32		*regIndexp;
+	getVpRegistersOutput_t		*outRegp;
+	u32		numOutputBytes = 0;
+
+        vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+	inBuf = vcpup->nsVcpInputBuffer;
+	outBuf = vcpup->nsVcpOutputBuffer;
+	outRegp = outBuf;
+	/*
+	 * Copy the input data to the per-cpu input buffer.
+	 * This may be an overkill; obviously it is better to only
+	 * copy what we need. XXXKYS: Check with Mike.
+	 */
+	if (nsXenVector.extCopyFromGuestPhysical(inBuf, input, PAGE_SIZE)) {
+		return (NS_STATUS_INVALID_ALIGNMENT);
+	}
+	/*
+	 * If the partition ID specified does not match with the current 
+	 * domain return appropriate error.
+	 */
+	if ((u64)current->domain->domain_id != inBuf-> partitionId) {
+		return (NS_STATUS_ACCESS_DENIED);
+	}
+	if (inBuf->vpIndex > MAX_VIRT_CPUS) { 
+		return (NS_STATUS_INVALID_VP_INDEX);
+	}
+	targetp = &curp->nsVcpuState[inBuf->vpIndex]; 
+	if (!(targetp->nsVcpuFlags & NS_VCPU_UP)) {
+		return (NS_STATUS_INVALID_VP_STATE);
+	}
+	if ((vcpuCtx = 
+	     nsXenVector.extAllocMem(sizeof(struct vcpu_guest_context))) 
+		== NULL) {
+		return (NS_STATUS_INSUFFICIENT_MEMORY);
+	}
+
+	/*
+	 * Get the register state of the specified vcp.
+	 */
+	if (current->vcpu_id != inBuf->vpIndex) {
+		nsXenVector.extVcpuPause(targetp->nsVcpXenVcpu);
+	}
+	nsXenVector.extArchGetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx);
+	if (current->vcpu_id != inBuf->vpIndex) {
+		nsXenVector.extVcpuUnPause(targetp->nsVcpXenVcpu);
+	}
+	/*
+	 * Now that we have the register state; select what we want and
+	 * populate the output buffer.
+	 */
+	regIndexp = &inBuf->regIndex;
+	while (*regIndexp != 0) {
+		switch	(*regIndexp) {
+			/*
+			 * XXXKYS: need mapping code here; populate
+			 * outBuf.
+			 */
+			NS_PANIC("nsGetVpRegisters not supported\n");
+		}
+		regIndexp++;
+		outRegp++ ;	/*128 bit registers */
+		numOutputBytes +=16;
+		if ((char *)regIndexp > ((char *)inBuf + PAGE_SIZE)) {
+			/*
+			 *input list not reminated correctly; bail out.
+			 */
+			NS_PANIC("nsGetVpRegisters:input list not terminated\n"); 
+			break;
+		}
+	}
+	if (nsXenVector.extCopyToGuestPhysical(output, outBuf, 
+		numOutputBytes)) {
+		/* Some problem copying data out*/
+		NS_PANIC("nsGetVpRegisters:copyout problem\n"); 
+	}
+	nsXenVector.extFreeMem(vcpuCtx);
+	return (NS_STATUS_SUCCESS);
+}
+		
+/*
+ * static int
+ * nsSetVpRegisters(paddr_t input, paddr_t output)
+ * Set the VCPU register state.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static int
+nsSetVpRegisters(paddr_t input, paddr_t output)
+{
+	nsVcpu_t        *vcpup, *targetp;
+	nsPartition_t   *curp = nsGetCurrentPartition();
+	setVpRegistersInput_t	*inBuf;
+	struct vcpu_guest_context	*vcpuCtx;
+	setVpRegisterSpec_t		*regIndexp;
+	int		retVal = NS_STATUS_SUCCESS;
+
+        vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+	inBuf = vcpup->nsVcpInputBuffer;
+	/*
+	 * Copy the input data to the per-cpu input buffer.
+	 * This may be an overkill; obviously it is better to only
+	 * copy what we need. XXXKYS: Check with Mike.
+	 */
+	if (nsXenVector.extCopyFromGuestPhysical(inBuf, input, PAGE_SIZE)) {
+		return (NS_STATUS_INVALID_ALIGNMENT);
+	}
+	/*
+	 * If the partition ID specified does not match with the current 
+	 * domain return appropriate error.
+	 */
+	if ((u64)current->domain->domain_id != inBuf-> partitionId) {
+		return (NS_STATUS_ACCESS_DENIED);
+	}
+	if (inBuf->vpIndex > MAX_VIRT_CPUS) { 
+		return (NS_STATUS_INVALID_VP_INDEX);
+	}
+	targetp = &curp->nsVcpuState[inBuf->vpIndex]; 
+	if (!(targetp->nsVcpuFlags & NS_VCPU_UP)) {
+		return (NS_STATUS_INVALID_VP_STATE);
+	}
+	if ((vcpuCtx = 
+	     nsXenVector.extAllocMem(sizeof(struct vcpu_guest_context))) 
+		== NULL) {
+		return (NS_STATUS_INSUFFICIENT_MEMORY);
+	}
+	/*
+	 * XXXKYS: Is it sufficient to just pause the target vcpu; on the 
+	 * xen side domain is paused for this call. CHECK.
+	 */
+	if (current->vcpu_id != inBuf->vpIndex) {
+		nsXenVector.extVcpuPause(targetp->nsVcpXenVcpu);
+	}
+
+	nsXenVector.extArchGetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx);
+	/*
+	 * Now that we have the register state; update the register state
+	 * based on what we are given. 
+	 */
+	regIndexp = &inBuf->regSpec;
+	/*
+	 * XXXKYS: Assuming the list is terminated by a regName that is 0.
+	 * Check with Mike.
+	 */
+	while (regIndexp->regName != 0) {
+		switch	(regIndexp->regName) {
+			/*
+			 * XXXKYS: need mapping code here; populate
+			 * vcpuCtx 
+			 */
+			NS_PANIC("nsSetVpRegisters not supported\n");
+		}
+		regIndexp++;
+		if ((char *)regIndexp > ((char *)inBuf + PAGE_SIZE)) {
+			/*
+			 *input list not reminated correctly; bail out.
+			 */
+			NS_PANIC("nsSetVpRegisters:input list not terminated\n"); 
+			break;
+		}
+	}
+	/*
+	 * Now set register state.
+	 *
+	 * XXXKYS: Is it sufficient to just pause the target vcpu; on the 
+	 * xen side domain is paused for this call. CHECK.
+	 */
+
+	if (nsXenVector.extArchSetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx)) { 
+		retVal = NS_STATUS_INVALID_PARAMETER;
+	}
+	if (current->vcpu_id != inBuf->vpIndex) {
+		nsXenVector.extVcpuUnPause(targetp->nsVcpXenVcpu);
+	}
+	nsXenVector.extFreeMem(vcpuCtx);
+	return (retVal);
+}
+
+/*
+ * static int
+ * nsSwitchVa(paddr_t input)
+ *
+ * Switch the page table base of the calling vcpu.
+ *
+ * Calling/Exit State:
+ *	None.
+ *
+ * Remarks:
+ *	The spec specifies that the input register is pointing to a guest 
+ * 	physical that has the new page table base. However it appears that the 
+ *	page table base is being passed in the input register.
+ */
+static int
+nsSwitchVa(paddr_t input)
+{
+	nsPartition_t   *curp = nsGetCurrentPartition();
+        nsVcpu_t *vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+
+	/*
+	 * XXXKYS: the spec sys the asID is passed via memory at offset 0 of 
+	 * the page whose GPA is in the input register. However, it appears 
+	 * the current build of longhorn (longhorn-2007-02-06-x86_64-fv-02)
+	 * passes the asID in the input register instead. Need to check if 
+	 * future builds do this.
+	 */
+	hvm_set_cr3(input); 
+	NS_STATS_COLLECT(NS_CSWITCH, &vcpup->nsVcpStats);
+	return (NS_STATUS_SUCCESS);
+}
+
+/*
+ * static int 
+ * nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup)
+ *
+ * Perform the flush operation once GFS is acquired.
+ *
+ * Calling/Exit State:
+ *	On entry nsLock is held; on exit this lock continues to be held.
+ */
+
+static void 
+nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup)
+{
+	int		target; 
+	nsVcpu_t	*vcpup;
+	cpumask_t	vcpuMask;
+	struct flushVa	*flushArgp;
+
+	flushArgp = curVcpup->nsVcpInputBuffer;
+	vcpuMask = flushArgp->vMask;
+	/*
+	 * On entry we must own the global flush state.
+	 */
+	NS_ASSERT(NS_LOCK_OWNED(curVcpup, &curp->nsLock));
+	NS_ASSERT(curp->nsFlushState.cpuCount == 0); 
+	NS_ASSERT(curp->nsFlushState.currentOwner == NULL); 
+
+	curp->nsFlushState.retVal = 
+	nsBuildHcallRetVal(NS_STATUS_SUCCESS, curVcpup->nsVcpRepCount);
+	curp->nsFlushState.currentOwner = current; 
+	if (cpu_isset(current->vcpu_id, vcpuMask)) {
+		curp->nsFlushState.cpuCount = 1;
+		curVcpup->nsVcpFlushPending = 
+		curVcpup->nsVcpFlushRequest;
+#ifdef NS_STATS
+		if (curVcpup->nsVcpFlushRequest == NS_FLUSH_TLB) {
+			NS_STATS_COLLECT(NS_FLUSH_VA_POSTED, &curVcpup->nsVcpStats);
+		} else {
+			NS_STATS_COLLECT(NS_FLUSH_RANGE_POSTED, &curVcpup->nsVcpStats);
+		}
+#endif
+			
+		cpu_clear(current->vcpu_id, vcpuMask);
+	}
+	if (cpus_empty(vcpuMask)) {
+		/*
+		 * We are done.
+		 */
+		goto flushVaDone;
+	}
+	while (!cpus_empty(vcpuMask)) {
+		target = first_cpu(vcpuMask);
+        	vcpup = &curp->nsVcpuState[target];
+		cpu_clear(target, vcpuMask);
+		if (!(vcpup->nsVcpuFlags & NS_VCPU_UP)) {
+			continue;
+		}
+		if (!nsFlushPermitted(vcpup)) {
+			continue;
+		}
+		curp->nsFlushState.cpuCount++;
+		vcpup->nsVcpFlushPending = 
+		curVcpup->nsVcpFlushRequest;
+#ifdef NS_STATS
+		if (curVcpup->nsVcpFlushRequest == NS_FLUSH_TLB) {
+			NS_STATS_COLLECT(NS_FLUSH_VA_POSTED, &vcpup->nsVcpStats);
+		} else {
+			NS_STATS_COLLECT(NS_FLUSH_RANGE_POSTED, &vcpup->nsVcpStats);
+		}
+#endif
+
+		/*
+		 * We need to force these VCPUs into the hypervisor for
+		 * them to act on the pending request.
+		 */
+
+		vcpu_kick(vcpup->nsVcpXenVcpu);
+	}
+	/*
+	 * Now that we have posted the state; wait for other CPUs to perform
+	 * flushes; we need to wait for all the CPUs to complete the flush
+	 * before returning. 
+	 */
+flushVaDone:
+	/*
+	 * If we are included in this round of tlb flush; we will wait for 
+	 * other CPUs in the tlb flush function; else we wait right here.
+	 */
+	if (!curVcpup->nsVcpFlushPending) {
+		nsReleaseGlobalFlushState(curp, curVcpup, 1);
+	}
+	return;
+}
+
+/*
+ * static int
+ * nsFlushVa(paddr_t input)
+ * Perform a TLB flush on the specified set of VCPUs.
+ *
+ * Calling/Exit State:
+ *	No locks can be held on entry and no locks will be held on return. 
+ *	The calling VCPU may relinquish the physical CPU.
+ */
+static int
+nsFlushVa(paddr_t input)
+{
+	nsPartition_t   *curp = nsGetCurrentPartition();
+	int		i;
+        nsVcpu_t	*curVcpup;
+
+	flushVa_t	*flushArgp;
+	cpumask_t	vcpuMask;
+	u64		asId, inputMask, retVal;
+	int		flushGlobal = 1;
+
+	curVcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+	flushArgp = curVcpup->nsVcpInputBuffer;
+
+	NS_ASSERT(curVcpup->nsVcplockDepth == 0);
+	NS_ASSERT(curVcpup->nsVcpFlushRequest == 0);
+	NS_ASSERT(curVcpup->nsVcpWaitingForCleanup == 0);
+	NS_ASSERT(curVcpup->nsVcpWaitingOnGFS == 0);
+
+	if (nsXenVector.extCopyFromGuestPhysical(flushArgp, input, 
+			sizeof(*flushArgp))) {
+		return (NS_STATUS_INVALID_ALIGNMENT);
+	}
+	inputMask = flushArgp->pMask;
+	asId = flushArgp->asHandle;
+	cpus_clear(vcpuMask);
+	/*
+	 * Deal with all trivial error conditions.
+	 */
+	if (flushArgp->flags != 0 && (!(flushArgp->flags & 
+			      (NS_FLUSH_ALL_PROCESSORS | 
+			       NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+			       NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY)))) {
+		return (NS_STATUS_INVALID_PARAMETER);
+	}
+	if (((flushArgp->pMask) == 0) &&
+	   !(flushArgp->flags & NS_FLUSH_ALL_PROCESSORS)) {
+		return (NS_STATUS_INVALID_PARAMETER);
+	}
+				 
+	if (flushArgp->flags & NS_FLUSH_ALL_PROCESSORS) {
+		for (i=0; i< MAX_VIRT_CPUS; i++) {
+			if (current->domain->vcpu[i] != NULL) {
+				cpu_set(i, vcpuMask);
+			}
+		}
+	} else {
+		i = 0;
+		while (inputMask) {
+			if (inputMask &0x1) {
+				cpu_set(i, vcpuMask);
+			}
+			inputMask = (inputMask >> 1);
+			i++;
+		}
+	}
+		
+	if (flushArgp->flags & NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES) {
+		asId = NS_ALL_AS;
+	}
+	if (flushArgp->flags & NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY) {
+		flushGlobal = 0;
+	}
+	/*
+	 * Now operate on what we are given
+	 * XXXKYS: For now we are ignoring asId and fushGlobal flag.
+	 * May have to revisit this. But first stash away the processed 
+	 * parameters for subsequent use.
+	 */
+	flushArgp->asHandle = asId;
+	flushArgp->flags = flushGlobal;
+	flushArgp->vMask = vcpuMask;
+
+	curVcpup->nsVcpRepCount = 0;
+	curVcpup->nsVcpFlushRequest = NS_FLUSH_TLB;
+
+	retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, 0);
+	nsSetSysCallRetVal(guest_cpu_user_regs(),
+                                   curp->nsLongModeGuest,
+                                   retVal);
+	NS_STATS_COLLECT(NS_FLUSH_VA_STAT, &curVcpup->nsVcpStats);
+	nsLockAcquire(curVcpup, &curp->nsLock);
+	nsAcquireGlobalFlushState(curp, curVcpup);
+	nsFlushPostProcess(curp, curVcpup);
+	nsLockRelease(curVcpup, &curp->nsLock);
+	return (NS_STATUS_SUCCESS);	
+}
+
+/*
+ * static int
+ * nsFlushVaRange(paddr_t input, unsigned short startIndex, 
+ * unsigned short repCount, unsigned short *repsDone)
+ * Perform a INVLPG flush on the specified set of VCPUs.
+ *
+ * Calling/Exit State:
+ *	No locks can be held on entry and no locks will be held on return. 
+ *	The calling VCPU may relinquish the physical CPU.
+ */
+static int
+nsFlushVaRange(paddr_t input, unsigned short startIndex, 
+unsigned short repCount, unsigned short *repsDone)
+{
+	nsVcpu_t        *curVcpup;
+	nsPartition_t   *curp = nsGetCurrentPartition();
+	flushVa_t  	*flushArgp;
+	cpumask_t	vcpuMask;
+	u64		asId, inputMask, retVal;
+	int		flushGlobal = 1;
+	int		flushAllProc = 0;
+	int		i;
+
+        curVcpup =  &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+	flushArgp = curVcpup->nsVcpInputBuffer;
+	NS_ASSERT(curVcpup->nsVcplockDepth == 0);
+	NS_ASSERT(curVcpup->nsVcpFlushRequest == 0);
+	NS_ASSERT(curVcpup->nsVcpWaitingForCleanup == 0);
+	NS_ASSERT(curVcpup->nsVcpWaitingOnGFS == 0);
+	NS_ASSERT(repCount >=1);
+	NS_ASSERT(((sizeof(*flushArgp)) + 8*(repCount -1)) <= PAGE_SIZE);
+	if (nsXenVector.extCopyFromGuestPhysical(flushArgp, input, 
+			((sizeof(*flushArgp)) + 8*(repCount -1)))) {
+		return (NS_STATUS_INVALID_ALIGNMENT);
+	}
+	*repsDone = repCount;
+	inputMask = flushArgp->pMask;
+	asId = flushArgp->asHandle;
+	cpus_clear(vcpuMask);
+	/*
+	 * Deal with all trivial error conditions.
+	 */
+	if (flushArgp->flags != 0 && (!(flushArgp->flags & 
+			      (NS_FLUSH_ALL_PROCESSORS | 
+			       NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+			       NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY)))) {
+		return (NS_STATUS_INVALID_PARAMETER);
+	}
+	if ((flushArgp->pMask == 0) &&
+	   !(flushArgp->flags & NS_FLUSH_ALL_PROCESSORS)) {
+		return (NS_STATUS_INVALID_PARAMETER);
+	}
+				 
+	if (flushArgp->flags & NS_FLUSH_ALL_PROCESSORS) {
+		flushAllProc = 1;
+		for (i=0; i< MAX_VIRT_CPUS; i++) {
+			if (current->domain->vcpu[i] != NULL) {
+				cpu_set(i, vcpuMask);
+			}
+		}
+	} else {
+		i = 0;
+		/*
+		 * populate the vcpu mask based on the input.
+		 */
+		while (inputMask) {
+			if (inputMask & 0x1) {
+				cpu_set(i, vcpuMask);
+			}
+			inputMask = (inputMask >> 1);
+			i++;
+		}
+	}
+	if (flushArgp->flags & NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES) {
+		asId = NS_ALL_AS;
+	}
+	if (flushArgp->flags & NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY) {
+		flushGlobal = 0;
+	}
+	/*
+	 * Now operate on what we are given
+	 * XXXKYS: For now we are ignoring asId and fushGlobal flag.
+	 * May have to revisit this.
+	 * May have to revisit this. But first stash away the processed 
+	 * parameters for subsequent use.
+	 */
+	flushArgp->asHandle = asId;
+	flushArgp->flags = flushGlobal;
+	flushArgp->vMask = vcpuMask;
+	
+	curVcpup->nsVcpRepCount = repCount;
+	curVcpup->nsVcpFlushRequest = NS_FLUSH_INVLPG;
+
+	retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, repCount);
+	nsSetSysCallRetVal(guest_cpu_user_regs(),
+                                   curp->nsLongModeGuest,
+                                   retVal);
+
+
+	NS_STATS_COLLECT(NS_FLUSH_RANGE, &curVcpup->nsVcpStats);
+	nsLockAcquire(curVcpup, &curp->nsLock);
+	nsAcquireGlobalFlushState(curp, curVcpup);
+	nsFlushPostProcess(curp, curVcpup);
+	nsLockRelease(curVcpup, &curp->nsLock);
+	return (NS_STATUS_SUCCESS);	
+}
+
+/* void
+ * nsHandleHyperCall(u64 opcode, u64 input, u64 output,
+ *                u64 *retVal);
+ * Common entry point for handling all the extension hypercalls.
+ *
+ * Calling/Exit State:
+ *	Based on the hypercall; the caller may give up the CPU while
+ *	processing the hypercall. No locks should be held on entry and
+ *	no locks will be held on return.
+ *
+ */
+
+void
+nsHandleHyperCall(u64 opcode, u64 input, u64 output, 
+		  u64 *retVal)
+{
+	unsigned short	verb;
+	unsigned short	repCount;
+	unsigned short	repsDone =0;
+	unsigned short	startIndex;
+	nsPartition_t   *curp = nsGetCurrentPartition();
+	u64		partitionId;
+	int		value;
+	
+
+	verb = (short)(opcode & 0xffff);
+	repCount = (short)((opcode >>32) & 0xfff);
+	startIndex = (short)((opcode >> 48) & 0xfff);
+	switch (verb) {
+	case NS_CREATE_PARTITION:
+		/*
+		 * Xen only allows dom0 to create domains.
+		 */	
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_INITIALIZE_PARTITION:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_DELETE_PARTITION:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_GET_PARTITION_PROPERTY:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_SET_PARTITION_PROPERTY:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_GET_PARTITION_ID:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_PARTITION_ID)) {
+			*retVal = 
+			nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+			return;
+		}
+		partitionId = (u64)current->domain->domain_id;
+		if (nsXenVector.extCopyToGuestPhysical(output, 
+			&partitionId, 8)) {
+			/*
+			 * Invalid output area.
+			 */
+			*retVal = 
+			nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+			return;
+		}
+		*retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, 0);
+		return;
+	case NS_GET_NEXT_CHILD_PARTITION:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_SET_LOGICAL_PROCESSOR_RUN_TIME_GROUP:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_CLEAR_LOGICAL_PROCESSOR_RUN_TIME_GROUP:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_NOTIFY_LOGICAL_PROCESSOR_POWER_STATE:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_GET_LOGICAL_PROCESSOR_RUN_TIME:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_DEPOSIT_MEMORY:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_WITHDRAW_MEMORY:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_GET_MEMORY_BALANCE:	
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_MAP_GPA_PAGES:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_UNMAP_GPA_PAGES:	
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_INSTALL_INTERCEPT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_CREATE_VP:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_TERMINATE_VP:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_DELETE_VP:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_GET_NEXT_VP:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_GET_VP_REGISTERS:
+		*retVal = nsBuildHcallRetVal(
+		nsGetVpRegisters(input, output), 0);
+		return;
+	case NS_SET_VP_REGISTERS:
+		*retVal = nsBuildHcallRetVal(
+		nsSetVpRegisters(input, output), 0);
+	case NS_SWITCH_VA:
+		*retVal = 
+		nsBuildHcallRetVal(nsSwitchVa(input), 0);
+		return;
+	case NS_FLUSH_VA:
+		*retVal = 
+		nsBuildHcallRetVal(nsFlushVa(input), 0);
+		return;
+	case NS_FLUSH_VA_LIST:
+		value  = nsFlushVaRange(input, startIndex, 
+					repCount, &repsDone);
+		*retVal = nsBuildHcallRetVal(value, repsDone);  
+		return;
+		
+	case NS_TRASLATE_VA:	
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_READ_GPA:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_WRITE_GPA:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_ASSERT_VIRTUAL_INTERRUPT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_CLEAR_VIRTUAL_INTERRUPT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_CREATE_PORT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_DELETE_PORT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_CONNECT_PORT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_GET_PORT_PROPERTY:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_DISCONNECT_PORT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_POST_MESSAGE:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	case NS_POST_EVENT:
+		/*
+		 * We don't support this.
+		 */
+		*retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0);
+		return;
+	default:
+		nsXenVector.extPrintk("Unkown hypercall: verb is: %d\n", verb); 
+		*retVal = 
+		nsBuildHcallRetVal(NS_STATUS_INVALID_HYPERCALL_CODE, 0);
+		return;
+	}
+}
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.h	2008-02-15 18:28:11.000000000 -0500
@@ -0,0 +1,125 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * nshypercall.h
+ * Memory layouts for the various hypercalls supported. 
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#ifndef NS_HYPERCALL_H
+#define NS_HYPERCALL_H
+
+#include <xen/cpumask.h>
+
+
+typedef struct getVpRegistersInput {
+	u64	partitionId;
+	u64	vpIndex;
+	u32	regIndex;
+} getVpRegistersInput_t;
+
+typedef struct getVpRegistersOutput {
+	u64	lowValue;
+	u64	highValue;
+} getVpRegistersOutput_t;
+
+
+
+typedef struct setVpRegisterSpec {
+	u32	regName;
+	u32	pad;
+	u64	pad1;
+	u64	lowValue;
+	u64	highValue;
+} setVpRegisterSpec_t;
+typedef struct setVpRegistersInput {
+	u64	partitionId;
+	u64	vpIndex;
+	setVpRegisterSpec_t	regSpec;
+} setVpRegistersInput_t;
+
+
+typedef struct flushVa {
+	u64	asHandle;
+	u64	flags;
+	union  {
+		u64		processorMask;
+		cpumask_t 	vcpuMask;
+	} procMask;
+#define pMask 	procMask.processorMask
+#define vMask	procMask.vcpuMask
+	u64	gva;
+} flushVa_t;
+
+#define NS_FLUSH_ALL_PROCESSORS	0x00000001
+#define NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES 0x00000002
+#define NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY 0x00000004
+
+#define NS_ALL_AS	(-1)
+
+/*
+ * Hypercall verbs.
+ */
+
+#define NS_CREATE_PARTITION 	0x0010
+#define NS_INITIALIZE_PARTITION 0x0011
+#define NS_DELETE_PARTITION	0x0014
+#define NS_GET_PARTITION_PROPERTY 0x0017
+#define NS_SET_PARTITION_PROPERTY 0x0018
+#define NS_GET_PARTITION_ID	0x0015
+#define NS_GET_NEXT_CHILD_PARTITION 0x0016
+#define NS_SET_LOGICAL_PROCESSOR_RUN_TIME_GROUP 0x0005
+#define NS_CLEAR_LOGICAL_PROCESSOR_RUN_TIME_GROUP 0x0006
+#define NS_NOTIFY_LOGICAL_PROCESSOR_POWER_STATE	0x0007
+#define NS_GET_LOGICAL_PROCESSOR_RUN_TIME	0x0004
+#define NS_DEPOSIT_MEMORY	0x001C
+#define NS_WITHDRAW_MEMORY	0x001D
+#define NS_GET_MEMORY_BALANCE	0x001E
+#define NS_MAP_GPA_PAGES	0x001A
+#define NS_UNMAP_GPA_PAGES	0x001B
+#define NS_INSTALL_INTERCEPT	0x0019
+#define NS_CREATE_VP		0x001F
+#define NS_TERMINATE_VP		0x0020
+#define NS_DELETE_VP		0x0021
+#define NS_GET_NEXT_VP		0x0027
+#define NS_GET_VP_REGISTERS	0x0022
+#define NS_SET_VP_REGISTERS	0x0023
+#define NS_SWITCH_VA		0x0001
+#define NS_FLUSH_VA		0x0002
+#define NS_FLUSH_VA_LIST	0x0003
+#define NS_TRASLATE_VA		0x0024
+#define NS_READ_GPA		0x0025
+#define NS_WRITE_GPA		0x0026
+#define NS_ASSERT_VIRTUAL_INTERRUPT	0x002A
+#define NS_CLEAR_VIRTUAL_INTERRUPT	0x002C
+#define NS_CREATE_PORT			0x002D
+#define NS_DELETE_PORT			0x002E
+#define NS_CONNECT_PORT			0x002F
+#define NS_GET_PORT_PROPERTY		0x0031
+#define NS_DISCONNECT_PORT		0x0030
+#define NS_POST_MESSAGE			0x0032
+#define NS_POST_EVENT			0x0034
+
+#endif /* NS_HYPERCALL_H */
Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nsintercept.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nsintercept.c	2008-02-15 18:28:34.000000000 -0500
@@ -0,0 +1,2077 @@
+/****************************************************************************
+ |
+ | Copyright (c) [2007, 2008] Novell, Inc.
+ | All Rights Reserved.
+ |
+ | This program is free software; you can redistribute it and/or
+ | modify it under the terms of version 2 of the GNU General Public License as
+ | published by the Free Software Foundation.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, contact Novell, Inc.
+ |
+ | To contact Novell about this file by physical or electronic mail,
+ | you may find current contact information at www.novell.com
+ |
+ |***************************************************************************
+*/
+
+/*
+ * nsintercept.c.
+ * This file implements the intercepts to support the  Novell Shim. 
+ *
+ * Engineering Contact: K. Y. Srinivasan
+ */
+
+#include <asm/hvm/hvm_extensions.h>
+
+
+#include <asm/config.h>
+#include <asm/hvm/io.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/apicdef.h>
+#include <asm/regs.h>
+#include <asm/msr.h>
+
+#include <xen/string.h>
+#include <xen/init.h>
+#include <xen/compile.h>
+#include <xen/hvm/save.h>
+#include <public/sched.h>
+
+
+/*
+ * Local includes; extension specific.
+ */
+#include "ns_errno.h"
+#include "ns_shim.h"
+
+
+/*
+ * Implement Novell Shim.
+ */
+
+
+/*
+ * Hypervisor intercept vector.
+ */
+static int
+nsDomainCreate(struct domain *d);
+static void 
+nsDomainDestroy(struct domain *d);
+static int
+nsVcpuInitialize(struct vcpu *v);
+static void 
+nsVcpuDestroy(struct vcpu *v);
+static int
+nsDoCpuId(uint32_t input, struct cpu_user_regs *regs);
+static int
+nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs);
+static int
+nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs);
+static int
+nsDoHyperCall(struct cpu_user_regs *pregs);
+static void
+nsDoMigrateTimers(struct vcpu *v);
+
+extension_intercept_vector_t	nsExtensionVector = {
+	.domain_create = nsDomainCreate,
+	.domain_destroy = nsDomainDestroy,
+	.vcpu_initialize = nsVcpuInitialize,
+	.vcpu_destroy = nsVcpuDestroy,
+	.do_cpuid = nsDoCpuId,
+	.do_msr_read = nsDoRdMsr,
+	.do_msr_write = nsDoWrMsr,
+	.do_hypercall = nsDoHyperCall,
+	.do_continuation = nsDoTlbFlush, 
+	.do_migrate_timers = nsDoMigrateTimers
+};
+
+/*
+ * Hooks into xen services; to be populated by our proxy in xen.
+ */
+
+xen_call_vector_t nsXenVector;
+
+static inline void
+nsInjectException(int trap);
+
+static inline void
+nsHypercallPageInitialize(void *hypercallPage,  nsPartition_t *curp);
+
+static inline void
+nsInitEventPage(void *siefPage);
+
+static inline void
+nsInitMessagePage(void *simPage);
+
+/*
+ * static int __init nsExtensionInit(void)
+ * Initialize the extensiom module.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static int __init nsExtensionInit(void)
+{
+	int retVal;
+	retVal = hvm_ext_register(1, &nsExtensionVector, &nsXenVector);
+	NS_ASSERT(retVal == 0);
+	nsXenVector.extPrintk("NS Extension Initialized\n");
+	return 0;
+}
+__initcall(nsExtensionInit);
+
+/*
+ * Our lock primitives.
+ */
+/*
+ * void 
+ * nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Acquire the specified lock.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+void 
+nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+{
+	NS_ASSERT(nsLock->owner != vcpup);
+	spin_lock_irqsave(&nsLock->spinLock, nsLock->flags);
+	nsLock->owner = vcpup;
+	nsLock->retAddr = __builtin_return_address(0);
+	vcpup->nsVcplockDepth++;
+}
+
+/*
+ * void 
+ * nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+ * Release the specified spin lock.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+void 
+nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock)
+{
+	NS_ASSERT((nsLock->owner == vcpup));
+	nsLock->owner = NULL;
+	vcpup->nsVcplockDepth--;
+	NS_ASSERT(vcpup->nsVcplockDepth >= 0);
+	spin_unlock_irqrestore(&nsLock->spinLock, nsLock->flags);
+}
+
+/*
+ * void 
+ * nsLockInit(nsSpinLock_t *nsLock)
+ * Initialize the specified spin lock.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+void 
+nsLockInit(nsSpinLock_t *nsLock)
+{
+	spin_lock_init(&nsLock->spinLock);
+	nsLock->owner = NULL;
+	nsLock->retAddr = NULL;
+}
+
+/*
+ * static inline void nsWriteGuestIdMsr(nsPartition_t *curp, 
+ *					nsVcpu_t      *curVcpu,
+ *					u64 msrContent)
+ *	Write the guest ID.
+ *
+ * Calling/Exit State:
+ * 	None.
+ */
+static inline void 
+nsWriteGuestIdMsr(nsPartition_t *curp, nsVcpu_t *curVcpu, u64 msrContent)
+{
+	curp->nsGuestIdMsr = msrContent;
+	if (curp->nsGuestIdMsr == 0) {
+		/*
+		 * Guest has cleared the guest ID;
+		 * clear the hypercall page.
+		 */
+		if (curp->nsHypercallMsr)  {
+			curVcpu->nsVcpuFlags &= ~NS_VCPU_UP;
+		}
+	}
+}
+
+/*
+ * static inline void nsWriteHypercallMsr(nsPartition_t *curp,
+ *					  nsVcpu_t	*curVcpu,
+ *					  u64		msrContent)
+ *	Write hypercall msr.
+ *
+ * Calling/Exit State:
+ * 	None.
+ */
+
+static inline void 
+nsWriteHypercallMsr(nsPartition_t *curp,
+		  nsVcpu_t	*curVcpu,
+		  u64		msrContent)
+{
+	unsigned long gmfn;
+	void	*hypercallPage;
+	struct domain	*d = curVcpu->nsVcpXenVcpu->domain;
+
+	nsLockAcquire(curVcpu, &curp->nsLock);
+	gmfn = (msrContent >> 12);
+	if (curp->nsGuestIdMsr == 0) {
+		/* Nothing to do if the guest is not registered*/
+		nsLockRelease(curVcpu, &curp->nsLock);
+		return;
+	}
+	/*
+	 * Guest is registered; see if we can turn-on the 
+	 * hypercall page.
+	 * XXXKYS: Can the guest write the GPA in one call and 
+	 * subsequently enable it? Check. For now assume that all the
+	 * info is specified in one call.
+	 */
+	if (((u32)msrContent & (0x00000001)) == 0) {	
+		/*
+		 * The client is not enabling the hypercall; just
+		 * ignore everything. 
+		 */
+		nsLockRelease(curVcpu, &curp->nsLock);
+		return;
+	}
+	hypercallPage = nsXenVector.extGetVirtFromGmfn(d,gmfn);
+	if (hypercallPage == NULL) {
+		/*
+		 * The guest specified a bogus GPA; inject a GP fault
+		 * into the guest.
+		 */
+		nsInjectException(TRAP_gp_fault);
+		nsLockRelease(curVcpu, &curp->nsLock);
+		return;
+	}
+	nsHypercallPageInitialize(hypercallPage, curp);
+	curp->nsHypercallMfn = nsXenVector.extGetMfnFromGmfn(d, gmfn);
+#ifdef CONFIG_DOMAIN_PAGE
+	nsXenVector.extUnmapDomainPage(hypercallPage);
+#endif
+	curp->nsHypercallMsr = msrContent;
+	nsLockRelease(curVcpu, &curp->nsLock);
+	curVcpu->nsVcpuFlags |= NS_VCPU_UP;
+}
+
+/*
+ * static inline void nsWriteSxMsr(uint32_t idx, nsPartition_t *curp,
+ *					  nsVcpu_t	*curVcpu,
+ *					  u64		msrContent)
+ *	Write SIEFP or SIMP  msr.
+ *
+ * Calling/Exit State:
+ * 	None.
+ */
+
+static inline void nsWriteSxMsr(uint32_t idx, nsPartition_t *curp,
+				  nsVcpu_t	*curVcpu,
+				  u64		msrContent)
+{
+	unsigned long gmfn;
+	void            *sxPage;
+	struct domain	*d = curVcpu->nsVcpXenVcpu->domain;
+	gmfn = (msrContent >> 12);
+	/*
+	 * Can the client enable the siefp and specify 
+	 * the base address in two 
+	 * different calls? XXXKYS: For now assume 
+	 * that it is done in one call.
+	 */
+	if (!((u32)msrContent & (0x00000001))) {	
+		/*
+		 * The client is not enabling the sx page; just
+		 * ignore everything. 
+		 */
+		return;
+	}
+	sxPage = nsXenVector.extGetVirtFromGmfn(d, gmfn);
+	if (sxPage == NULL) {
+		/*
+		 * The guest specified a bogus GPA; inject a GP fault
+		 * into the guest.
+		 */
+		nsInjectException(TRAP_gp_fault);
+		return;
+	}
+	switch (idx) {
+		case NS_MSR_SIEFP:
+			nsInitEventPage(sxPage);
+			curVcpu->nsVcpSIefpMsr = msrContent; 
+			curVcpu->nsVcpSiefPage = sxPage; 
+			break;
+		case NS_MSR_SIMP:
+			nsInitMessagePage(sxPage);
+			curVcpu->nsVcpSimpMsr = msrContent;
+			curVcpu->nsVcpSimPage = sxPage;
+			break;
+	}
+
+}
+
+/*
+ * Time this domain booted.
+ */
+s_time_t nsDomainBootTime;
+
+/*
+ * static inline u64
+ * nsGetTimeSinceDomainBoot(void)
+ * Retrieve the time since boot in 100ns units.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline u64
+nsGetTimeSinceDomainBoot(void)
+{
+	u64	curTime = nsXenVector.extGetTimeSinceBoot();
+	return ((curTime - nsDomainBootTime)/100) ;
+}
+
+/*
+ * static inline int
+ * nsCallFromBios(struct cpu_user_regs *regs)
+ * Check if the caller is in the right state to consumE the services of the 
+ * extension module.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline int
+nsCallFromBios(struct cpu_user_regs *regs)
+{
+	if (hvm_paging_enabled(current)) {
+		return (0);
+	} else {
+		return (1);
+	}
+}
+
+/*
+ * static inline void
+ * nsInjectException(int trap)
+ * Injecct the specified exception into the invoking virtual CPU.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+ 
+static inline void
+nsInjectException(int trap)
+{
+	nsXenVector.hvmFuncTable->inject_exception(trap, 0, 0);
+}
+
+
+/*
+ * static inline int
+ * nsOsRegistered(void)
+ * Check to see if the guest has registered itself with the Novell Shim.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+ 
+static inline int
+nsOsRegistered(void)
+{
+	nsPartition_t	*curp = nsGetCurrentPartition();
+	return (curp->nsGuestIdMsr != 0?1:0);
+}
+
+
+/*
+ * static inline void 
+ * nsSetPartitionPrivileges(nsPartition_t *nspp)
+ * Set the partitionwide privileges. Currently it is harcoded.
+ * We could perhaps make this an attribute of the domain and have the
+ * configuration tools manage it.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline void 
+nsSetPartitionPrivileges(nsPartition_t *nspp)
+{
+	/*
+	 * This is based on the hypervisor spec under section 5.2.3. 
+	 */
+	nspp->nsPrivileges = 0x000000020000007f;
+}
+
+/*
+ * static inline u32
+ * nsGetRecommendations(void)
+ * Get the recommendations.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline u32
+nsGetRecommendations(void)
+{
+	/*
+	 *For now we recommend all the features. Need to validate.
+	 */
+	if ( paging_mode_hap(current->domain)) {
+		/*
+		 * If HAP is enabled; the guest should not use TLB flush
+		 * related enlightenments.
+		 */
+		return (0x19);
+	} else {
+		return (0x1f); 
+	}
+}
+
+/*
+ * static inline void 
+ * nsSetPartitionFeatures(nsPartition_t *nspp)
+ * Set the partitionwide features. Currently it is harcoded.
+ * We could perhaps make this an attribute of the domain and have the
+ * configuration tools manage it.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline void 
+nsSetPartitionFeatures(nsPartition_t *nspp)
+{
+	nspp->nsSupportedFeatures = 0x1f;
+}
+
+static inline u16 
+nsGetGuestMajor(void)
+{
+	return (0);
+}
+static inline u16
+nsGetGuestMinor(void)
+{
+	return (0);
+}
+static inline u32
+nsGetGuestServicePack(void)
+{
+	return (0);
+}
+ 
+static inline u8 
+nsGetGuestServiceBranchInfo(void)
+{
+	return (0);
+}
+static inline u32 
+nsGetGuestServiceNumber(void)
+{
+	return (0);
+}
+
+/*
+ * static inline u32
+ * nsGetSupportedSyntheticMsrs(void)
+ * Get the synthetic MSRs supported by the Novell Shim. Currently
+ * it is hardcoded.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline u32
+nsGetSupportedSyntheticMsrs(void)
+{
+	/*
+	 * All MSRS in the spec version 0.83 including RESET MSR. 
+	 */
+	return (0xff);
+}
+
+
+/*
+ * static inline u32
+ * nsGetMaxVcpusSupported(void)
+ * Retrieve the maximum vcpus supported.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline u32
+nsGetMaxVcpusSupported(void)
+{
+	return MAX_VIRT_CPUS;
+}
+
+/*
+ * static inline u32
+ * nsGetMaxLcpusSupported(void)
+ * Retrieve the maximum physical cpus supported.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline u32
+nsGetMaxLcpusSupported(void)
+{
+	return NR_CPUS;
+}
+
+
+/*
+ * static inline void
+ * nsReadIcr(u64 *icrContent)
+ * Read the ICR of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsReadIcr(u64 *icrContent)
+{
+	u32	icrLow, icrHigh;
+	u64	retVal;
+
+
+	icrLow = nsXenVector.mmIoHandler->read_handler(current, 
+		 (vlapic_base_address(vcpu_vlapic(current)) + 0x300), 4);
+	icrHigh = nsXenVector.mmIoHandler->read_handler(current, 
+		 (vlapic_base_address(vcpu_vlapic(current)) + 0x310), 4);
+	retVal = icrHigh;
+	*icrContent = ((retVal << 32) | icrLow);
+
+}
+
+/*
+ * static inline void
+ * nsReadTpr(u64 *tprContent)
+ * Read the TPR of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsReadTpr(u64 *tprContent)
+{
+	u32	tprLow;
+
+
+	tprLow = nsXenVector.mmIoHandler->read_handler(current, 
+		 (vlapic_base_address(vcpu_vlapic(current)) + 0x80), 4);
+	*tprContent = (u64)tprLow;
+
+}
+
+/*
+ * static inline void
+ * nsWriteEoi(u64 msrContent)
+ * Write the EOI register of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsWriteEoi(u64 msrContent)
+{
+	u32 eoi = (u32)msrContent;
+
+	nsXenVector.mmIoHandler->write_handler(current, 
+		 (vlapic_base_address(vcpu_vlapic(current)) + 0xb0), 4, eoi);
+
+}
+
+/*
+ * static inline void
+ * nsWriteIcr(u64 msrContent)
+ * Write the ICR register of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsWriteIcr(u64 msrContent)
+{
+	u32	icrLow, icrHigh;
+	icrLow = (u32)msrContent;
+	icrHigh = (u32)(msrContent >> 32);
+
+	if (icrHigh != 0) {
+		nsXenVector.mmIoHandler->write_handler(current, 
+		 (vlapic_base_address(vcpu_vlapic(current)) + 0x310), 4, 
+		icrHigh);
+	}
+	if (icrLow != 0) {
+		nsXenVector.mmIoHandler->write_handler(current, 
+		 (vlapic_base_address(vcpu_vlapic(current)) + 0x300), 4, 
+		icrLow);
+	}
+
+}
+
+/*
+ * static inline void
+ * nsWriteTpr(u64 msrContent)
+ * Write the TPR register of the local APIC of the calling VCPU.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsWriteTpr(u64 msrContent)
+{
+	u32 tpr = (u32)msrContent;
+
+
+	nsXenVector.mmIoHandler->write_handler(current, 
+		 (vlapic_base_address(vcpu_vlapic(current)) + 0x80), 4, tpr);
+
+}
+
+/*
+ * static inline void
+ * nsHypercallPageInitialize(void *hypercallPage,  nsPartition_t *curp)
+ * Initialize the hypercall page to support the Novell Shim Hypercalls.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsHypercallPageInitialize(void *hypercallPage, nsPartition_t *curp)
+{
+	char *p;
+
+	if (nsXenVector.hvmFuncTable->guest_x86_mode(current) == 8) {
+		curp->nsLongModeGuest = 1;
+	} else {
+		curp->nsLongModeGuest = 0;
+	}
+
+	memset(hypercallPage, 0, PAGE_SIZE);
+	p = (char *)(hypercallPage) ;
+	*(u8  *)(p + 0) = 0x0f; /* vmcall */
+       	*(u8  *)(p + 1) = 0x01;
+	if (nsXenVector.extCpuIsIntel()) {
+       		*(u8  *)(p + 2) = 0xc1;
+	} else { 
+       		*(u8  *)(p + 2) = 0xd9;
+	}
+       	*(u8  *)(p + 3) = 0xc3; /* ret */
+}
+
+/*
+ * static inline void
+ * nsInitEventPage(void *siefPage)
+ * Initialize the per-vcpu event page.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsInitEventPage(void *siefPage)
+{
+	memset(siefPage, 0, PAGE_SIZE);
+}
+
+/*
+ * static inline void
+ * nsInitMessagePage(void *siefPage)
+ * Initialize the per-vcpu message page.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsInitMessagePage(void *simPage)
+{
+	memset(simPage, 0, PAGE_SIZE);
+}
+
+
+/*
+ * static inline void
+ * nsProcessMessageQ(nsPartition_t *curp, nsVcpu_t *curVcpu)
+ * Process the message queue.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsProcessMessageQ(nsPartition_t *curp, nsVcpu_t *curVcpu)
+{
+	/*
+	 * XXXKYS: we currently do not support queued messages.
+	 */
+}
+
+/*
+ * static inline void
+ * nsScheduleTimeOut(nsVcpTimerState_t *timer) 
+ * Schedule a timeout based on the specified timer.
+ *
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static inline void
+nsScheduleTimeOut(nsVcpTimerState_t *timer) 
+{
+	/*
+	 * We maintain the count in the units of 100ns. Furthermore,
+	 * this is not relative to NOW() but rather absolute.
+	 */
+	nsXenVector.extSetTimer(&timer->vcpuTimer, (timer->count * 100));
+}
+
+/*
+ * static void
+ * nsTimeOutHandler(void *arg)
+ * The timeout handler for Novell Shim/Adaptor.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static void
+nsTimeOutHandler(void *arg)
+{
+	nsVcpTimerState_t	*timerData = arg;
+	nsVcpu_t	*curVcpu = timerData->thisCpu;
+	int		sIntNum;
+	int		vector;
+	if (!(curVcpu->nsVcpSControlMsr & 0x9)) {
+		goto nsToPostProcess;
+	}
+	/*
+	 * SynIC is enabled; do further processing. Timeouts are posted as
+	 * messages; verify if the message page is enabled.
+	 */
+	if (!(curVcpu->nsVcpSimpMsr & 0x1)) {
+		goto nsToPostProcess;
+	}
+	sIntNum = (((u32)(timerData->config >> 16)) & 0x0000000f);
+	/*
+	 * First post the message and then optionally deal with the 
+	 * interrupt notification.
+	 */
+	if (curVcpu->nsVcpSimPage == NULL) {
+		NS_PANIC("Novell Shim: Sim page not setup\n");
+	}
+	if ((((nsMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageType !=
+		nsMessageTypeNone) {
+		/*
+		 * The message slot is not empty just silently return.
+		 */
+		goto nsToPostProcess;
+	}
+	/*
+	 * The slot is available; post the message.
+	 */
+	(((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageType = 
+	nsMessageTimerExpired;
+	(((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageSize = 
+	sizeof(nsTimerMessage_t);
+	(((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).timerIndex = 
+	timerData->timerIndex;
+	(((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).expirationTime = 
+	timerData->count;
+	if ((curVcpu->nsVcpSIntMsr[sIntNum] >> 16) &0x1) {
+		/*
+		 * The designated sintx register is masked; just return.
+		 */
+		goto nsToPostProcess;
+	}
+	vector = ((u32)curVcpu->nsVcpSIntMsr[sIntNum] &0xff);
+
+	/*
+	 * Now post the interrupt to the VCPU.
+	 * XXXKYS: What is the delivery mode for interrupts delivered here.
+	 * Check with Mike?
+	 */
+	nsXenVector.extPostInterrupt(current, vector, APIC_DM_FIXED);
+	
+	/*
+	 * If auto eoi is set; deal with that.
+	 */
+	if (((u32)(curVcpu->nsVcpSIntMsr[sIntNum] >> 16)) & 0x1) {
+		nsWriteEoi(0);
+	}
+
+nsToPostProcess:
+	/*
+	 * Prior to returning, deal with all the post timeout issues.
+	 */
+	if (((u32)(timerData->config))  & 0x00000002) {
+		NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats);
+		nsScheduleTimeOut(timerData);
+	}
+}
+
+/*
+ * static inline void
+ * nsTimerInit(nsVcpu_t *vcpup, int timer)
+ * Initialize the specified timer structure.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline void
+nsTimerInit(nsVcpu_t *vcpup, int timer)
+{
+	vcpup->nsVcpTimers[timer].config = 0;
+	vcpup->nsVcpTimers[timer].count = 0;
+	vcpup->nsVcpTimers[timer].thisCpu = vcpup;
+	vcpup->nsVcpTimers[timer].timerIndex = timer;
+	/*
+	 * XXXKYS: if the binding between vcpu and physical processor
+	 * changes what is done about pending timeouts?
+	 */
+//KYS: Need to migrate timers when the vcpu->physical CPU binding changes.
+	init_timer(&vcpup->nsVcpTimers[timer].vcpuTimer, nsTimeOutHandler, 
+		&vcpup->nsVcpTimers[timer], current->processor);
+}
+
+/*
+ * static inline int
+ * nsAccessTimeRefCnt(nsPartition_t *curp, u64 *msrContent)
+ * Read the per-partition time base.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static inline int
+nsAccessTimeRefCnt(nsPartition_t *curp, u64 *msrContent)
+{
+	if (!nsPrivilegeCheck(curp, NS_ACCESS_TIME_REF_CNT)) {
+		/*
+		 * The partition does not have the privilege to
+		 * read this; return error.
+		 */
+		return (0);
+	}
+	*msrContent = nsGetTimeSinceDomainBoot();
+	return (1);
+}
+
+/*
+ * static void
+ * nsDoMigrateTimers(struct vcpu *v)
+ * The binding between this vcpu and the physical cpu has changed; migrate 
+ * the timers for this vcpu.
+ *
+ * Calling/Exit State:
+ *	The new binding is already in place.
+ */
+
+static void
+nsDoMigrateTimers(struct vcpu *v)
+{
+	nsPartition_t	*curp = nsGetCurrentPartition();
+	nsVcpu_t        *vcpup;
+	int i;
+	vcpup  =  &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+
+	for (i=0; i<4; i++) {
+		nsXenVector.extMigrateTimer(&vcpup->nsVcpTimers[i].vcpuTimer,
+					    v->processor);
+	}
+}
+		
+/*
+ * static int
+ * nsDoHyperCall(struct cpu_user_regs *pregs)
+ * Intercept for implementing Extension hypercalls.
+ *
+ * Calling/Exit State:
+ *	Based on the hypercall; the caller may give up the CPU while
+ *	processing the hypercall. No locks should be held on entry and
+ *	no locks will be held on return.
+ *
+ *
+ */
+
+static int
+nsDoHyperCall(struct cpu_user_regs *pregs)
+{
+	nsPartition_t	*curp = nsGetCurrentPartition();
+	nsVcpu_t        *vcpup;
+	int	longModeGuest = curp->nsLongModeGuest;
+	unsigned long hypercallMfn;
+	unsigned long gmfn;
+	gmfn = (curp->nsHypercallMsr >> 12);
+
+	hypercallMfn = nsXenVector.extGetMfnFromGva(pregs->eip);
+
+	if (hypercallMfn == curp->nsHypercallMfn) {
+		u64	opcode, input, output, retVal;
+		vcpup  =  &curp->nsVcpuState[nsGetCurrentVcpuIndex()];
+
+		/* 
+		 * This is an extension hypercall; process it; but first make
+		 * sure that the CPU is in the right state for invoking
+		 * the hypercall - protected mode at CPL 0.
+		 */
+		if (nsInvalidCpuState())  {
+			nsInjectException(TRAP_gp_fault);
+                	retVal = nsBuildHcallRetVal(NS_STATUS_INVALID_VP_STATE, 
+						  0);
+			nsSetSysCallRetVal(pregs, longModeGuest, retVal);
+			return (1);
+		}
+		if (longModeGuest) {
+			opcode = pregs->ecx;
+			input = pregs->edx;
+			output = pregs->r8;
+		} else {
+			opcode = 
+			((((u64)pregs->edx) << 32) | ((u64)pregs->eax));
+			input = 
+			((((u64)pregs->ebx) << 32) | ((u64)pregs->ecx));
+			output = 
+			((((u64)pregs->edi) << 32) | ((u64)pregs->esi));
+		}
+		NS_ASSERT(vcpup->nsVcplockDepth == 0);
+		nsHandleHyperCall(opcode, input, output, &retVal); 
+		nsSetSysCallRetVal(pregs, longModeGuest, retVal);
+		NS_ASSERT(vcpup->nsVcplockDepth == 0);
+		return (1);
+	}
+	/*
+	 * This hypercall page is not the page for extension.
+	 */
+	return (0);
+}
+
+/*
+ * static int 
+ * nsDomainCreate(struct domain *d)
+ * NS intercept for domain creation.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+ 
+	 
+static int 
+nsDomainCreate(struct domain *d)
+{
+	nsPartition_t	*nspp;
+	nspp = nsXenVector.extAllocMem(sizeof(nsPartition_t));
+	if (nspp == NULL) {
+		nsDebugPrint("Memory allocation failed\n");
+		return (1);
+	}
+	memset(nspp, 0, sizeof(*nspp));
+	nsLockInit(&nspp->nsLock);
+	/*
+	 * Set the partition wide privilege; We can start with no privileges 
+	 * and progressively turn on fancier hypervisor features.
+	 */
+	nsSetPartitionPrivileges(nspp);
+	nsSetPartitionFeatures(nspp);
+	/*
+	 * Stash away pointer to our state in the hvm domain structure.
+	 */
+	d->arch.hvm_domain.ext_handle = nspp;
+	nsDomainBootTime = nsXenVector.extGetTimeSinceBoot();
+	return (0);
+}
+
+
+
+/*
+ * static void 
+ * nsDomainDestroy(struct domain *d)
+ * NS intercept for the domain destruction.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void 
+nsDomainDestroy(struct domain *d)
+{
+	int i;
+	nsPartition_t *curp = d->arch.hvm_domain.ext_handle;
+	nsXenVector.extPrintk("NS Domain Being Destroyed\n");
+	NS_ASSERT(curp != NULL);
+	nsXenVector.extPrintk("DUMP STATS\n");
+	nsXenVector.extPrintk("GFS cpucount is %d\n", curp->nsFlushState.cpuCount);
+	if (curp->nsFlushState.currentOwner != NULL) {
+		nsXenVector.extPrintk("GFS owner  is %d\n", curp->nsFlushState.currentOwner->vcpu_id);
+	} else {
+		nsXenVector.extPrintk("GFS is free\n");
+	}
+	if (!cpus_empty(curp->nsFlushState.waiters)) {
+		nsXenVector.extPrintk("GFS: waiters not empty\n");
+	} else {
+		nsXenVector.extPrintk("GFS: waiters  empty\n");
+	}
+	for (i=0; i < MAX_VIRT_CPUS; i++) {
+		if (d->vcpu[i] != NULL) {
+			nsPrintStats(curp, i);
+		}
+	}
+	
+	nsXenVector.extFreeMem(d->arch.hvm_domain.ext_handle);	
+	d->arch.hvm_domain.ext_handle = NULL;
+}
+
+/*
+ * static int
+ * nsVcpuInitialize(struct vcpu *v)
+ * NS intercept for vcpu creation.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+ 
+static int
+nsVcpuInitialize(struct vcpu *v)
+{
+	nsVcpu_t	*vcpup;
+	nsPartition_t	*curp = v->domain->arch.hvm_domain.ext_handle;
+	int		i;
+	vcpup = &curp->nsVcpuState[v->vcpu_id];
+	atomic_inc(&curp->nsNumVcpusActive);
+	if (v->vcpu_id == 0) {
+		vcpup->nsVcpuFlags |= NS_VCPU_BOOT_CPU;
+	}
+	/*
+	 * Initialize all the synthetic MSRs corresponding to this VCPU. 
+	 * Note that all state is set to 0 to begin 
+	 * with.
+	 */
+	vcpup->nsVcpSVersionMsr = 0x00000001;
+	/*
+	 * Initialize the synthetic timet structures.
+	 */
+	for (i=0; i < 4; i++) {
+		nsTimerInit(vcpup, i);
+	}
+	/*
+	 * Setup the input page for handling hypercalls.
+	 *
+	 */
+	vcpup->nsVcpInputBufferPage = 	
+	nsXenVector.extAllocDomHeapPage();	
+	if (vcpup->nsVcpInputBufferPage == NULL) {
+		nsDebugPrint("Memory allocation failed\n");
+		return (1);
+	}
+	vcpup->nsVcpInputBuffer =
+	nsXenVector.extGetVirtFromPagePtr(vcpup->nsVcpInputBufferPage);	
+	if (vcpup->nsVcpInputBuffer == NULL) {
+		nsDebugPrint("Coud not get VA\n");
+		nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);	
+		return (1);
+	}
+	memset(vcpup->nsVcpInputBuffer, 0, PAGE_SIZE); 
+	vcpup->nsVcpOutputBufferPage = 	
+	nsXenVector.extAllocDomHeapPage();	
+	if (vcpup->nsVcpOutputBufferPage == NULL) {
+		nsDebugPrint("Memory allocation failed\n");
+#ifdef CONFIG_DOMAIN_PAGE
+		nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer);
+#endif
+		nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);	
+		return (1);
+	}
+	vcpup->nsVcpOutputBuffer =
+	nsXenVector.extGetVirtFromPagePtr(vcpup->nsVcpOutputBufferPage);	
+	if (vcpup->nsVcpOutputBuffer == NULL) {
+		nsDebugPrint("Coud not get VA\n");
+		nsXenVector.extFreeDomHeapPage(vcpup->nsVcpOutputBufferPage);	
+#ifdef CONFIG_DOMAIN_PAGE
+		nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer);
+#endif
+		nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);	
+		return (1);
+	}
+	vcpup->nsVcpXenVcpu = v; 
+	vcpup->nsVcpFlushRequest = 0;
+
+	return (0);
+}
+
+/*
+ * static void 
+ * nsVcpuDestroy(struct vcpu *v)
+ * NS intercept for domain destruction.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+static void 
+nsVcpuDestroy(struct vcpu *v)
+{
+	nsVcpu_t	*vcpup;
+	nsPartition_t	*curp = v->domain->arch.hvm_domain.ext_handle;
+	int 		i;
+
+	vcpup = &curp->nsVcpuState[v->vcpu_id];
+	atomic_dec(&curp->nsNumVcpusActive);
+	vcpup->nsVcpuFlags &= ~NS_VCPU_UP;
+	/*
+	 * Get rid of the pages we have allocated for this VCPU.
+	 */
+#ifdef CONFIG_DOMAIN_PAGE
+	nsXenVector.extUnmapDomainPage(vcpup->nsVcpSiefPage);
+	nsXenVector.extUnmapDomainPage(vcpup->nsVcpSimPage);
+	nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer);
+	nsXenVector.extUnmapDomainPage(vcpup->nsVcpOutputBuffer);
+#endif
+
+	nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage);	
+	nsXenVector.extFreeDomHeapPage(vcpup->nsVcpOutputBufferPage);	
+	/*
+	 * Kill the timers 
+	 */
+	for (i=0; i < 4; i++) {
+		nsXenVector.extKillTimer(&vcpup->nsVcpTimers[i].vcpuTimer);
+	}
+	return;
+}
+
+/*
+ * static int nsVcpuSave(struct domain *d, hvm_domain_context_t *h)
+ *	Save per-cpu shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ *	None.
+ */
+static int 
+nsVcpuSave(struct domain *d, hvm_domain_context_t *h)
+{
+	struct vcpu *v;
+	struct hvm_ns_veridian_cpu ctxt;
+
+	nsVcpu_t	*vcpup;
+	nsPartition_t	*curp = d->arch.hvm_domain.ext_handle;
+	int i;
+
+	if (curp == NULL) {
+		return 0;
+	}
+	for_each_vcpu(d, v) {
+		vcpup = &curp->nsVcpuState[v->vcpu_id];
+	
+		NS_ASSERT(vcpup->nsVcplockDepth == 0);
+		NS_ASSERT(vcpup->nsVcpFlushRequest == 0);
+		NS_ASSERT(vcpup->nsVcpWaitingOnGFS == 0);
+		NS_ASSERT(vcpup->nsVcpFlushPending == 0);
+		NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0);
+        	/* 
+	 	 * We don't need to save state for a 
+		 * vcpu that is down; the restore
+         	 * code will leave it down if there is nothing saved. 
+	 	 */
+        	if ( test_bit(_VPF_down, &v->pause_flags) )
+            		continue;
+		ctxt.control_msr = vcpup->nsVcpSControlMsr;
+		ctxt.version_msr = vcpup->nsVcpSVersionMsr;
+		ctxt.sief_msr = vcpup->nsVcpSIefpMsr;
+		ctxt.simp_msr = vcpup->nsVcpSimpMsr;
+		ctxt.eom_msr = vcpup->nsVcpEomMsr;
+		for (i=0; i < 16; i++)
+			ctxt.int_msr[i] = vcpup->nsVcpSIntMsr[i];
+		for (i=0; i < 4; i++) {
+			ctxt.timers[i].config = vcpup->nsVcpTimers[i].config;
+			/*
+			 * Save the count in units of 100ns relative to NOW()
+			 * When we restore we will add NOW() to properly
+			 * account for the elapsed time when the timer was
+			 * active.
+			 */ 
+			if (vcpup->nsVcpTimers[i].count > ((NOW())/100)) {
+				ctxt.timers[i].count = 
+				(vcpup->nsVcpTimers[i].count - ((NOW())/100));
+			} else {
+				ctxt.timers[i].count = 0;
+			} 
+		}
+		if ( hvm_save_entry(NS_VERIDIAN_CPU, 
+			v->vcpu_id, h, &ctxt) != 0 )
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * static int nsVcpuRestore(struct domain *d, hvm_domain_context_t *h)
+ *	Restore per-cpu shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ *	None.
+ */
+static int 
+nsVcpuRestore(struct domain *d, hvm_domain_context_t *h)
+{
+	int vcpuid, i;
+	struct hvm_ns_veridian_cpu ctxt;
+
+	nsVcpu_t	*vcpup;
+	nsPartition_t	*curp = d->arch.hvm_domain.ext_handle;
+
+	if (curp == NULL) {
+		return 0;
+	}
+	/* Which vcpu is this? */
+	vcpuid = hvm_load_instance(h);
+	vcpup = &curp->nsVcpuState[vcpuid];
+	NS_ASSERT(vcpup != NULL);
+	if ( hvm_load_entry(NS_VERIDIAN_CPU, h, &ctxt) != 0 )
+        	return -22;
+
+	vcpup->nsVcpSControlMsr = ctxt.control_msr;
+	vcpup->nsVcpSVersionMsr = ctxt.version_msr;
+
+	nsWriteSxMsr(NS_MSR_SIEFP, curp, vcpup, ctxt.sief_msr); 
+	nsWriteSxMsr(NS_MSR_SIMP, curp, vcpup, ctxt.simp_msr); 
+
+	vcpup->nsVcpEomMsr = ctxt.eom_msr;
+	for (i=0; i<16; i++)
+		vcpup->nsVcpSIntMsr[i] = ctxt.int_msr[i];
+	for (i=0; i < 4; i++) {
+		vcpup->nsVcpTimers[i].config = ctxt.timers[i].config;
+		vcpup->nsVcpTimers[i].count = 
+		(ctxt.timers[i].count + ((NOW())/100)); 
+		if ((vcpup->nsVcpTimers[i].config | 0x9)) {
+			/*
+			 * XXXKYS: Some issues with regards to time
+			 * management here:
+			 * 1) We will ignore the elapsed wall clock time
+			 *    when the domain was not running.
+			 * 2) Clearly we should account fot the time that 
+			 *    has elapsed when the domain was running with 
+			 *    respect to the timeouts that were scheduled
+			 *    prior to saving the domain.
+			 * We will deal with on the save side.
+			 */ 
+			nsScheduleTimeOut(&vcpup->nsVcpTimers[i]); 
+			NS_STATS_COLLECT(NS_TIMEOUTS, &vcpup->nsVcpStats);
+		}
+	}
+
+	vcpup->nsVcpuFlags |=  NS_VCPU_UP;
+	return 0;
+}
+
+
+
+/*
+ * static int nsDomSave(struct domain *d, hvm_domain_context_t *h)
+ *	Save per-domain shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ *	None.
+ */
+
+static int 
+nsDomSave(struct domain *d, hvm_domain_context_t *h)
+{
+	struct hvm_ns_veridian_dom ctxt;
+	nsPartition_t	*curp = d->arch.hvm_domain.ext_handle;
+
+	if (curp == NULL) {
+		return 0;
+	}
+
+	ctxt.guestid_msr = curp->nsGuestIdMsr;
+	ctxt.hypercall_msr = curp->nsHypercallMsr;
+	ctxt.long_mode = curp->nsLongModeGuest;
+	ctxt.pad0 = 0;
+	return (hvm_save_entry(NS_VERIDIAN_DOM, 0, h, &ctxt)); 
+}
+
+/*
+ * static int nsDomRestore(struct domain *d, hvm_domain_context_t *h)
+ *	Restore per-domain shim state to support either migration or domain save.
+ *
+ * Calling exit state:
+ *	None.
+ */
+
+static int 
+nsDomRestore(struct domain *d, hvm_domain_context_t *h)
+{
+	struct hvm_ns_veridian_dom ctxt;
+	nsPartition_t	*curp = d->arch.hvm_domain.ext_handle;
+
+	if (curp == NULL) {
+		return 0;
+	}
+
+	if ( hvm_load_entry(NS_VERIDIAN_DOM, h, &ctxt) != 0 )
+        	return -22;
+	curp->nsGuestIdMsr = ctxt.guestid_msr;
+	curp->nsHypercallMsr = ctxt.hypercall_msr;
+	curp->nsLongModeGuest = ctxt.long_mode;
+	curp->nsHypercallMfn =
+	nsXenVector.extGetMfnFromGmfn(d, (ctxt.hypercall_msr >> 12));
+	
+	return 0; 
+}
+
+HVM_REGISTER_SAVE_RESTORE(NS_VERIDIAN_DOM, nsDomSave, nsDomRestore,
+                          1, HVMSR_PER_DOM);
+	
+
+HVM_REGISTER_SAVE_RESTORE(NS_VERIDIAN_CPU, nsVcpuSave , nsVcpuRestore,
+                          1, HVMSR_PER_VCPU);
+
+
+/*
+ * static int
+ * nsPreProcessCpuIdLeaves(unsigned int input, struct cpu_user_regs *regs)
+ *
+ * Preprocess cpuid leaves. Both xen and Veridian use identical cpuid 
+ * leaves for getting info from the hypervisor. 
+ *
+ * Calling exit state:
+ *	None.
+ */
+static int
+nsPreProcessCpuIdLeaves(unsigned int input, struct cpu_user_regs *regs)
+{
+	uint32_t idx;
+	struct domain	*d = current->domain;
+	int	extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
+
+	if (extid == 1) {
+		/*
+		 * Enlightened Windows guest; need to remap and handle 
+		 * leaves used by PV front-end drivers.
+		 */
+		if ((input >= 0x40000000) && (input <= 0x40000005)) {
+			return (0);
+		}
+		/*
+	 	 * PV drivers use cpuid to query the hypervisor for details. On
+	 	 * Windows we will use the following leaves for this:
+		 *
+		 * 4096: VMM Sinature (corresponds to 0x40000000 on Linux)
+		 * 4097: VMM Version (corresponds to 0x40000001 on Linux)
+		 * 4098: Hypercall details (corresponds to 0x40000002 on Linux)
+		 */
+		if ((input >= 0x40001000) && (input <= 0x40001002)) {
+			idx = (input - 0x40001000);
+			switch (idx) {
+			case 0:
+				regs->eax = 0x40000002; /* Largest leaf  */
+				regs->ebx = 0x566e6558;/*Signature 1: "XenV" */
+				regs->ecx = 0x65584d4d; /*Signature 2: "MMXe" */
+				regs->edx = 0x4d4d566e; /*Signature 3: "nVMM"*/
+				break;
+			case 1:
+				regs->eax = 
+				(XEN_VERSION << 16) | 
+				XEN_SUBVERSION;
+				regs->ebx = 0;          /* Reserved */
+				regs->ecx = 0;          /* Reserved */
+				regs->edx = 0;          /* Reserved */
+				break;
+
+			case 2:
+				regs->eax = 1; /*Number of hypercall-transfer pages*/
+				/*In linux this is 0x40000000 */
+				regs->ebx = 0x40001000; /* MSR base address */
+				regs->ecx = 0;          /* Features 1 */
+				regs->edx = 0;          /* Features 2 */
+				break;
+			}
+		}
+		return (1);
+	} else {
+		/*
+		 * For now this is all other "enlightened guests"
+		 */
+		if ((input >= 0x40000000) && (input <= 0x40000002)) {
+			/*
+			 * These leaves have already been correctly
+			 * processed; just return.
+			 */
+			return (1);
+		} 
+		return (0);
+	}
+}
+
+/*
+ * static int 
+ * nsDoCpuId(unsigned int input, struct cpu_user_regs *regs)
+ * NS intercept for cpuid instruction
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static int 
+nsDoCpuId(unsigned int input, struct cpu_user_regs *regs)
+{
+	uint32_t idx;
+
+	/*
+	 * hvmloader uses cpuid to set up a hypercall page; we don't want to
+	 * intercept calls coming from the bootstrap (bios) code in the HVM 
+	 * guest; we discriminate based on the instruction pointer.
+	 */
+	if (nsCallFromBios(regs)) {
+		/*
+		 * We don't intercept this.
+		 */
+		return (0);
+	}
+		
+	if (input == 0x00000001) { 
+		regs->ecx = (regs->ecx | 0x80000000);
+		return (1);
+	} 
+
+	if (nsPreProcessCpuIdLeaves(input, regs)) {
+		return (0);
+	}
+	idx = (input - 0x40000000);
+
+	switch (idx) {
+	case 0:
+		/*
+		 * 0x40000000: Hypervisor identification. 
+		 */
+		regs->eax = 0x40000005; /* For now clamp this */
+		regs->ebx = 0x65766f4e; /* "Nove" */ 
+		regs->ecx = 0x68536c6c; /* "llSh" */
+		regs->edx = 0x76486d69; /* "imHv" */ 
+		break;
+
+	case 1:
+		/*
+		 * 0x40000001: Hypervisor identification. 
+		 */
+		regs->eax = 0x31237648; /* "Hv#1*/
+		regs->ebx = 0; /* Reserved */ 
+		regs->ecx = 0; /* Reserved */
+		regs->edx = 0; /* Reserved */ 
+		break;
+	case 2:
+		/*
+		 * 0x40000002: Guest Info 
+		 */
+		if (nsOsRegistered()) {
+			regs->eax = nsGetGuestMajor();
+			regs->ebx = 
+			(nsGetGuestMajor() << 16) | nsGetGuestMinor();
+			regs->ecx = nsGetGuestServicePack();
+			regs->edx = 
+			(nsGetGuestServiceBranchInfo() << 24) |
+			nsGetGuestServiceNumber();
+		} else {
+			regs->eax = 0;
+			regs->ebx = 0;
+			regs->ecx = 0;
+			regs->edx = 0;
+		}
+		break;
+	case 3:
+		/*
+		 * 0x40000003: Feature identification.
+		 */
+		regs->eax = nsGetSupportedSyntheticMsrs();
+		/* We only support AcessSelfPartitionId bit 1 */
+		regs->ebx = 0x2; 
+		regs->ecx = 0; /* Reserved */
+		regs->edx = 0; /*No MWAIT (bit 0), No debugging (bit 1)*/
+		break;
+	case 4:
+		/*
+		 * 0x40000004: Imlementation recommendations.
+		 */
+		regs->eax = nsGetRecommendations();
+		regs->ebx = 0; /* Reserved */
+		regs->ecx = 0; /* Reserved */
+		regs->edx = 0; /* Reserved */
+		break;
+	case 5:
+		/*
+		 * 0x40000005: Implementation limits.
+		 * Currently we retrieve maximum number of vcpus and 
+		 * logical processors (hardware threads) supported.
+		 */
+		regs->eax = nsGetMaxVcpusSupported();
+		regs->ebx = nsGetMaxLcpusSupported();
+		regs->ecx = 0; /* Reserved */
+		regs->edx = 0; /* Reserved */
+		break;
+
+	default:
+		/*
+		 * We don't handle this leaf.
+		 */
+		return (0);
+
+	}
+	return (1);
+}
+
+/*
+ * static int
+ * nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs)
+ * NS intercept for reading MSRS.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+	
+static int
+nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs)
+{
+	nsPartition_t	*curp = nsGetCurrentPartition();
+	unsigned int	vcpuIndex = nsGetCurrentVcpuIndex();
+	u64 msrContent = 0;
+	nsVcpu_t	*curVcpu = &curp->nsVcpuState[vcpuIndex];
+	int		synInt, timer;
+	struct domain	*d = current->domain;
+	int	extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
+	u64		timerCount;
+
+	/*
+	 * hvmloader uses rdmsr; we don't want to
+	 * intercept calls coming from the bootstrap (bios) code in the HVM 
+	 * guest; we descriminate based on the instruction pointer.
+	 */
+	if (nsCallFromBios(regs)) {
+		/*
+		 * We don't intercept this.
+		 */
+		return (0);
+	}
+	if (extid > 1) {
+		/*
+		 * For now this is all other "Enlightened" operating systems
+		 * other than Longhorn.
+		 */
+		if (idx == 0x40000000) {
+			/*
+			 * PV driver hypercall setup. Let xen handle this.
+			 */
+			return (0);
+		}
+		if (idx == 0x40001000) {
+			idx = 0x40000000;
+		}
+	}
+	switch (idx) {
+	case NS_MSR_GUEST_OS_ID: 
+		nsLockAcquire(curVcpu, &curp->nsLock);
+		regs->eax = (u32)(curp->nsGuestIdMsr & 0xFFFFFFFF);
+		regs->edx = (u32)(curp->nsGuestIdMsr >> 32);
+		nsLockRelease(curVcpu, &curp->nsLock);
+		break;
+	case NS_MSR_HYPERCALL:
+		nsLockAcquire(curVcpu, &curp->nsLock);
+		regs->eax = (u32)(curp->nsHypercallMsr & 0xFFFFFFFF);
+		regs->edx = (u32)(curp->nsHypercallMsr >> 32);
+		nsLockRelease(curVcpu, &curp->nsLock);
+		if ((((u32)curp->nsHypercallMsr) & (0x00000001)) != 0) {
+			curVcpu->nsVcpuFlags |= NS_VCPU_UP;
+		}
+		break;
+	case NS_MSR_VP_INDEX:
+		regs->eax = (u32)(vcpuIndex);
+		regs->edx = (u32)(0x0);
+		break;
+	case NS_MSR_ICR:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+			goto msrReadError;
+		}
+		nsReadIcr(&msrContent);
+		NS_STATS_COLLECT(NS_ICR_READ, &curVcpu->nsVcpStats);
+		regs->eax = (u32)(msrContent & 0xFFFFFFFF);
+		regs->edx = (u32)(msrContent >> 32);
+		break;
+	case NS_MSR_TPR:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+			goto msrReadError;
+		}
+		nsReadTpr(&msrContent);
+		NS_STATS_COLLECT(NS_TPR_READ, &curVcpu->nsVcpStats);
+		regs->eax = (u32)(msrContent & 0xFFFFFFFF);
+		regs->edx = (u32)(msrContent >> 32);
+		break;
+	/*
+	 * The following synthetic MSRs are implemented in the Novell Shim.
+	 */
+	case NS_MSR_SCONTROL:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrReadError;
+		}
+		regs->eax = (u32)(curVcpu->nsVcpSControlMsr & 0xFFFFFFFF);
+		regs->edx = (u32)(curVcpu->nsVcpSControlMsr >> 32);
+		break;
+	case NS_MSR_SVERSION:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrReadError;
+		}
+		regs->eax = (u32)(curVcpu->nsVcpSVersionMsr & 0xFFFFFFFF);
+		regs->edx = (u32)(curVcpu->nsVcpSVersionMsr >> 32);
+		break;
+	case NS_MSR_SIEFP:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrReadError;
+		}
+		regs->eax = (u32)(curVcpu->nsVcpSIefpMsr & 0xFFFFFFFF);
+		regs->edx = (u32)(curVcpu->nsVcpSIefpMsr >> 32);
+		break;
+	case NS_MSR_SIMP:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrReadError;
+		}
+		regs->eax = (u32)(curVcpu->nsVcpSimpMsr & 0xFFFFFFFF);
+		regs->edx = (u32)(curVcpu->nsVcpSimpMsr >> 32);
+		break;
+	case NS_MSR_SINT0:
+		synInt = 0;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT1:
+		synInt = 1;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT2:
+		synInt = 2;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT3:
+		synInt = 3;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT4:
+		synInt = 4;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT5:
+		synInt = 5;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT6:
+		synInt = 6;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT7:
+		synInt = 7;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT8:
+		synInt = 8;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT9:
+		synInt = 9;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT10:
+		synInt = 10;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT11:
+		synInt = 11;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT12:
+		synInt = 12;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT13:
+		synInt = 13;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT14:
+		synInt = 14;
+		goto	synIntReadProcess;
+	case NS_MSR_SINT15:
+		synInt = 15;
+synIntReadProcess:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrReadError;
+		}
+		regs->eax = (u32)(curVcpu->nsVcpSIntMsr[synInt] & 0xFFFFFFFF);
+		regs->edx = (u32)(curVcpu->nsVcpSIntMsr[synInt] >> 32);
+		break;
+
+	case NS_MSR_SEOM:
+		/*
+		 * This is a write only register; reads return 0.
+		 */
+		regs->eax = 0;
+		regs->edx = 0;
+		break;
+	case NS_MSR_TIME_REF_COUNT:
+		if (!nsAccessTimeRefCnt(curp, &msrContent)) {
+			goto msrReadError;
+		}
+		regs->eax = (u32)(msrContent & 0xFFFFFFFF);
+		regs->edx = (u32)(msrContent >> 32);
+		break;
+	/*
+	 * Synthetic timer MSRs.
+	 */
+	case NS_MSR_TIMER0_CONFIG:
+		timer = 0;
+		goto	processTimerConfigRead;
+	case NS_MSR_TIMER1_CONFIG:
+		timer = 1;
+		goto	processTimerConfigRead;
+	case NS_MSR_TIMER2_CONFIG:
+		timer = 2;
+		goto	processTimerConfigRead;
+	case NS_MSR_TIMER3_CONFIG:
+		timer = 3;
+processTimerConfigRead:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+			goto msrReadError;
+		}
+		regs->eax = 
+		(u32)(curVcpu->nsVcpTimers[timer].config & 0xFFFFFFFF);
+		regs->edx = 
+		(u32)(curVcpu->nsVcpTimers[timer].config >> 32);
+		break;
+	case NS_MSR_TIMER0_COUNT:
+		timer = 0;
+		goto processTimerCountRead;
+	case NS_MSR_TIMER1_COUNT:
+		timer = 1;
+		goto processTimerCountRead;
+	case NS_MSR_TIMER2_COUNT:
+		timer = 2;
+		goto processTimerCountRead;
+	case NS_MSR_TIMER3_COUNT:
+		timer = 3;
+processTimerCountRead:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+			goto msrReadError;
+		}
+		timerCount = curVcpu->nsVcpTimers[timer].count;
+		if (timerCount > ((NOW())/100)) {
+			timerCount -= ((NOW())/100);
+		} else {
+			timerCount = 0;
+		}
+		regs->eax = 
+		(u32)(timerCount & 0xFFFFFFFF);
+		regs->edx = 
+		(u32)(timerCount >> 32);
+		break;
+	case NS_MSR_PVDRV_HCALL:
+		regs->eax = 0;
+		regs->edx = 0;
+		break; 
+	case NS_MSR_SYSTEM_RESET:
+		regs->eax = 0;
+		regs->edx = 0;
+		break; 
+	default:
+		/*
+		 * We did not handle the MSR address specified; 
+		 * let the caller figure out
+		 * What to do.
+		 */
+		return (0);
+	}
+	return (1);
+msrReadError:
+	/*
+	 * Have to inject #GP fault.
+	 */
+	nsInjectException(TRAP_gp_fault);
+	return (1);
+}
+
+/*
+ * static int
+ * nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs)
+ * NS intercept for writing MSRS.
+ *
+ * Calling/Exit State:
+ *	None.
+ */
+
+static int
+nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs)
+{
+	nsPartition_t	*curp = nsGetCurrentPartition();
+	unsigned int	vcpuIndex = nsGetCurrentVcpuIndex();
+	u64 msrContent = 0;
+	nsVcpu_t	*curVcpu = &curp->nsVcpuState[vcpuIndex];
+	int		synInt, timer;
+	struct domain	*d = current->domain;
+	int	extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR];
+
+	/*
+	 * hvmloader uses wrmsr; we don't want to
+	 * intercept calls coming from the bootstrap (bios) code in the HVM 
+	 * guest; we descriminate based on the instruction pointer.
+	 */
+	if (nsCallFromBios(regs)) {
+		/*
+		 * We don't intercept this.
+		 */
+		return (0);
+	}
+	msrContent =
+	(u32)regs->eax | ((u64)regs->edx << 32);
+	if (extid > 1) {
+		/*
+		 * For now this is all other "Enlightened" operating systems
+		 * other than Longhorn.
+		 */
+		if (idx == 0x40000000) {
+			/*
+			 * PV driver hypercall setup. Let xen handle this.
+			 */
+			return (0);
+		}
+		if (idx == 0x40001000) {
+			idx = 0x40000000;
+		}
+	}
+	switch (idx) {
+	case NS_MSR_GUEST_OS_ID: 
+		nsWriteGuestIdMsr(curp, curVcpu,  msrContent);
+		break; 
+	case NS_MSR_HYPERCALL:
+		nsWriteHypercallMsr(curp, curVcpu, msrContent);
+		break;
+
+	case NS_MSR_VP_INDEX:
+		goto msrWriteError;
+		
+	case NS_MSR_EOI:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+			goto msrWriteError;
+		}
+		nsWriteEoi(msrContent);
+		NS_STATS_COLLECT(NS_EOI_WRITE, &curVcpu->nsVcpStats);
+		break;
+	case NS_MSR_ICR:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+			goto msrWriteError;
+		}
+		nsWriteIcr(msrContent);
+		NS_STATS_COLLECT(NS_ICR_WRITE, &curVcpu->nsVcpStats);
+		break;
+	case NS_MSR_TPR:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) {
+			goto msrWriteError;
+		}
+		nsWriteTpr(msrContent);
+		NS_STATS_COLLECT(NS_TPR_WRITE, &curVcpu->nsVcpStats);
+		break;
+
+	/*
+	 * The following MSRs are synthetic MSRs supported in the Novell Shim.
+	 */
+	case NS_MSR_SCONTROL:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrWriteError;
+		}
+		curVcpu->nsVcpSControlMsr = msrContent; 
+		break;
+	case NS_MSR_SVERSION:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrWriteError;
+		}
+		/*
+		 * This is a read-only MSR; generate #GP
+		 */
+		nsInjectException(TRAP_gp_fault);
+		break;
+	case NS_MSR_SIEFP:
+	case NS_MSR_SIMP:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrWriteError;
+		} 
+		nsWriteSxMsr(idx, curp, curVcpu, msrContent);
+		break;
+	case NS_MSR_SINT0:
+		synInt = 0;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT1:
+		synInt = 1;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT2:
+		synInt = 2;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT3:
+		synInt = 3;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT4:
+		synInt = 4;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT5:
+		synInt = 5;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT6:
+		synInt = 6;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT7:
+		synInt = 7;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT8:
+		synInt = 8;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT9:
+		synInt = 9;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT10:
+		synInt = 10;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT11:
+		synInt = 11;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT12:
+		synInt = 12;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT13:
+		synInt = 13;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT14:
+		synInt = 14;
+		goto	synIntWrProcess;
+	case NS_MSR_SINT15:
+		synInt = 15;
+synIntWrProcess:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrWriteError;
+		}
+		/*
+		 * XXXKYS: We assume that the synInt registers will be 
+		 * first written before the interrupt generation can occur.
+		 * Specifically if SINT is masked all interrupts that may have 
+		 * been generated will be lost. Also when SINT is disabled; 
+		 * its effects will be only felt for subsequent interrupts that 
+		 * may be posted. XXXKYS: CHECK
+		 */
+		curVcpu->nsVcpSIntMsr[synInt] = msrContent; 
+		break;
+
+	case NS_MSR_SEOM:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) {
+			goto msrWriteError;
+		}
+		curVcpu->nsVcpEomMsr = msrContent; 
+		nsProcessMessageQ(curp, curVcpu);
+		break;
+	case NS_MSR_TIME_REF_COUNT: 
+		/*
+		 * This is a read-only msr.
+		 */
+		goto msrWriteError;
+	
+	/*
+	 * Synthetic timer MSRs.
+	 */
+	case NS_MSR_TIMER0_CONFIG:
+		timer = 0;
+		goto	processTimerConfig;
+	case NS_MSR_TIMER1_CONFIG:
+		timer = 1;
+		goto	processTimerConfig;
+	case NS_MSR_TIMER2_CONFIG:
+		timer = 2;
+		goto	processTimerConfig;
+	case NS_MSR_TIMER3_CONFIG:
+		timer = 3;
+processTimerConfig:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+			goto msrWriteError;
+		}
+		/*
+		 * Assume that the client is going to write the whole msr. 
+		 */
+		if (!(msrContent & 0x9)) {
+			/*
+			 * We are neither setting Auto Enable or Enable; 
+			 * silently exit.
+			 * Should this be considered to turn off a 
+			 * timer that may be currently 
+			 * active; XXXKYS: Check. For now we are 
+			 * not doing anything here.
+			 */
+			break;
+		}
+		if (!(((u32)(msrContent >> 16)) & 0x0000000f)) {
+			/*
+			 * sintx is 0; clear the enable bit(s).
+			 */
+			msrContent &= ~(0x1);
+		}
+		curVcpu->nsVcpTimers[timer].config = msrContent;
+		/*
+		 * XXXKYS: Can any order be assumed here; 
+		 * should we just act on whatever is in the 
+		 * count register. For now act as if the count 
+		 * register is valid and act on it.
+		 */
+		if (msrContent & 0x1) {
+			nsScheduleTimeOut(&curVcpu->nsVcpTimers[timer]); 
+			NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats);
+		}
+		break;
+	case NS_MSR_TIMER0_COUNT:
+		timer = 0;
+		goto processTimerCount;
+	case NS_MSR_TIMER1_COUNT:
+		timer = 1;
+		goto processTimerCount;
+	case NS_MSR_TIMER2_COUNT:
+		timer = 2;
+		goto processTimerCount;
+	case NS_MSR_TIMER3_COUNT:
+		timer = 3;
+processTimerCount:
+		if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) {
+			goto msrWriteError;
+		}
+		curVcpu->nsVcpTimers[timer].count = 
+		(msrContent + ((NOW())/100));
+		if ((curVcpu->nsVcpTimers[timer].config | 0x9)) {
+			nsScheduleTimeOut(&curVcpu->nsVcpTimers[timer]); 
+			NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats);
+		}
+		
+		break;
+	case NS_MSR_PVDRV_HCALL:
+		/*
+		 * Establish the hypercall page for PV drivers.
+		 */
+		nsXenVector.extWrmsrHypervisorRegs(0x40000000, regs->eax,
+						   regs->edx);
+		break; 
+	case NS_MSR_SYSTEM_RESET:
+		/*
+		 * Shutdown the domain/partition.
+	 	 */
+		if (msrContent & 0x1) {
+			domain_shutdown(d, SHUTDOWN_reboot);
+		}
+		break; 
+		
+	default:
+		/*
+		 * We did not handle the MSR address; 
+		 * let the caller deal with this.
+		 */
+		return (0);
+	}
+	return (1);
+msrWriteError:
+	/*
+	 * Have to inject #GP fault.
+	 */
+	nsInjectException(TRAP_gp_fault);
+	return (1);
+}

[-- Attachment #5: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
  2008-02-19 22:11 [PATCH][RFC] Supporting Enlightened Windows 2008 Server Ky Srinivasan
@ 2008-02-20  9:44 ` Keir Fraser
  2008-02-20 16:14   ` Ky Srinivasan
  0 siblings, 1 reply; 15+ messages in thread
From: Keir Fraser @ 2008-02-20  9:44 UTC (permalink / raw)
  To: Ky Srinivasan, xen-devel

On 19/2/08 22:11, "Ky Srinivasan" <ksrinivasan@novell.com> wrote:

> B) Implement an adapter that implements the required Hyper-V functionality.
> 
> We  have implemented only a subset of Hyper-V functionality that is required
> for enlightened windows 2008 guest today. However, we have the framework in
> place to implement any additional functionality that the windows guests may
> leverage going forward. The framework is extensible and one can easily
> implement OS specific enlightenments.

The appearance is that you hook on every significant emulation point in the
HVM code and implement a shadow hypervisor. Is that an accurate description?
What performance wins do you get from Hyper-V emulation, ignoring the
obvious wins you will get from installing PV I/O drivers? I think this is a
particularly interesting question since it was our impression that the PV
memory-management interfaces, which you appear to jump through hoops to
support, were not of much benefit with Xen's more sophisticated pagetable
algorithm (or with Intel EPT or AMD NPT).

 -- Keir

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH][RFC] Supporting Enlightened Windows 2008 Server
  2008-02-20  9:44 ` Keir Fraser
@ 2008-02-20 16:14   ` Ky Srinivasan
  0 siblings, 0 replies; 15+ messages in thread
From: Ky Srinivasan @ 2008-02-20 16:14 UTC (permalink / raw)
  To: Keir Fraser, xen-devel

>>> On Wed, Feb 20, 2008 at  4:44 AM, in message
<C3E1A703.1CB24%Keir.Fraser@cl.cam.ac.uk>, Keir Fraser
<Keir.Fraser@cl.cam.ac.uk> wrote: 
> On 19/2/08 22:11, "Ky Srinivasan" <ksrinivasan@novell.com> wrote:
> 
>> B) Implement an adapter that implements the required Hyper- V functionality.
>> 
>> We  have implemented only a subset of Hyper- V functionality that is required
>> for enlightened windows 2008 guest today. However, we have the framework in
>> place to implement any additional functionality that the windows guests may
>> leverage going forward. The framework is extensible and one can easily
>> implement OS specific enlightenments.
> 
> The appearance is that you hook on every significant emulation point in the
> HVM code and implement a shadow hypervisor. Is that an accurate description?

My initial goal was to get a framework in place where one could implement any OS specific enhancements without cluttering up the rest of the Xen code. To that end, this code does hook into various emulation points of interest in the hvm code. A secondary goal was to minimize the changes to the base Xen code. Furthermore, I wanted to reuse as much of the base Xen code as possible. So, I would not characterize this as a shadow hypervisor, but rather as an adapter that bridges the semantic gap between the APIs of interest in Hyper-V and Xen. 

> What performance wins do you get from Hyper- V emulation, ignoring the
> obvious wins you will get from installing PV I/O drivers? I think this is a
> particularly interesting question since it was our impression that the PV
> memory- management interfaces, which you appear to jump through hoops to
> support, were not of much benefit with Xen's more sophisticated pagetable
> algorithm (or with Intel EPT or AMD NPT).

We have done very minimal performance evaluation to date. On NetBench I have seen a 10% improvement. You are right, TLB flush enlightenments did not make much of a difference. These results are on a uniprocessor configuration. Of the enlightenments, MSR based APIC access made the most difference.  Going forward, we will be doing a more comprehensive performance evaluation with the goal of having parity with the MS platform. 

Regards,

K. Y

> 
>  --  Keir
> 
> 
> 
> _______________________________________________
> Xen- devel mailing list
> Xen- devel@lists.xensource.com
> http://lists.xensource.com/xen- devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2008-03-07 13:30 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-05 22:15 [PATCH][RFC] Supporting Enlightened Windows 2008 Server Ky Srinivasan
2008-03-05 22:28 ` Daniel P. Berrange
2008-03-05 22:38   ` Daniel P. Berrange
2008-03-07  1:06     ` Ky Srinivasan
2008-03-07  1:05   ` Ky Srinivasan
2008-03-06  7:28 ` Keir Fraser
2008-03-06 10:15   ` Tim Deegan
2008-03-07  1:10     ` [PATCH][RFC] Supporting Enlightened Windows 2008Server Ky Srinivasan
2008-03-07 11:57       ` Tim Deegan
2008-03-07 13:19       ` Keir Fraser
2008-03-07 13:30       ` Keir Fraser
2008-03-07  1:08   ` Ky Srinivasan
  -- strict thread matches above, loose matches on Subject: below --
2008-02-19 22:11 [PATCH][RFC] Supporting Enlightened Windows 2008 Server Ky Srinivasan
2008-02-20  9:44 ` Keir Fraser
2008-02-20 16:14   ` Ky Srinivasan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.