All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zachary Amsden <zach@vmware.com>
To: Andi Kleen <ak@muc.de>, Linus Torvalds <torvalds@osdl.org>,
	Rusty Russell <rusty@rustcorp.com.au>,
	Jeremy Fitzhardinge <jeremy@goop.org>,
	Chris Wright <chrisw@sous-sol.org>, Dan Hecht <dhecht@vmware.com>,
	Dan Arai <arai@vmware.com>, Andrew Morton <akpm@osdl.org>,
	Virtualization Mailing List <virtualization@lists.osdl.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Zachary Amsden <zach@vmware.com>
Cc: Anthony Liguori <anthony@codemonkey.ws>
Subject: [PATCH 8/9] Vmi apic ops.diff
Date: Thu, 1 Mar 2007 18:55:47 -0800	[thread overview]
Message-ID: <200703020255.l222tltk009694@zach-dev.vmware.com> (raw)

Use para_fill instead of directly setting the APIC ops to the result of the
vmi_get_function call - this allows one to implement a VMI ROM without
implementing APIC functions, just using the native APIC functions.

While doing this, I realized that there is a lot more cleanup that should
have been done.  Basically, we should never assume that the ROM implements
a specific set of functions, and always allow fallback to the native
implementation.

This is critical for future compatibility.

Signed-off-by: Anthony Liguori <anthony@codemonkey.ws>
Signed-off-by: Zachary Amsden <zach@vmware.com>

diff -r 0ba8434a5c7e arch/i386/kernel/vmi.c
--- a/arch/i386/kernel/vmi.c	Thu Mar 01 16:49:27 2007 -0800
+++ b/arch/i386/kernel/vmi.c	Thu Mar 01 16:49:33 2007 -0800
@@ -54,6 +54,7 @@ static int disable_tsc;
 static int disable_tsc;
 static int disable_mtrr;
 static int disable_noidle;
+static int disable_vmi_timer;
 
 /* Cached VMI operations */
 struct {
@@ -662,12 +663,12 @@ void vmi_bringup(void)
 void vmi_bringup(void)
 {
  	/* We must establish the lowmem mapping for MMU ops to work */
-	if (vmi_rom)
+	if (vmi_ops.set_linear_mapping)
 		vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0);
 }
 
 /*
- * Return a pointer to the VMI function or a NOP stub
+ * Return a pointer to a VMI function or NULL if unimplemented
  */
 static void *vmi_get_function(int vmicall)
 {
@@ -678,12 +679,13 @@ static void *vmi_get_function(int vmical
 	if (rel->type == VMI_RELOCATION_CALL_REL)
 		return (void *)rel->eip;
 	else
-		return (void *)vmi_nop;
+		return NULL;
 }
 
 /*
  * Helper macro for making the VMI paravirt-ops fill code readable.
- * For unimplemented operations, fall back to default.
+ * For unimplemented operations, fall back to default, unless nop
+ * is returned by the ROM.
  */
 #define para_fill(opname, vmicall)				\
 do {								\
@@ -692,8 +694,28 @@ do {								\
 	if (rel->type != VMI_RELOCATION_NONE) {			\
 		BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);	\
 		paravirt_ops.opname = (void *)rel->eip;		\
+	} else if (rel->type == VMI_RELOCATION_NOP) 		\
+		paravirt_ops.opname = (void *)vmi_nop;		\
+} while (0)
+
+/*
+ * Helper macro for making the VMI paravirt-ops fill code readable.
+ * For cached operations which do not match the VMI ROM ABI and must
+ * go through a tranlation stub.  Ignore NOPs, since it is not clear
+ * a NOP * VMI function corresponds to a NOP paravirt-op when the
+ * functions are not in 1-1 correspondence.
+ */
+#define para_wrap(opname, wrapper, cache, vmicall)		\
+do {								\
+	reloc = call_vrom_long_func(vmi_rom, get_reloc,		\
+				    VMI_CALL_##vmicall);	\
+	BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL);		\
+	if (rel->type == VMI_RELOCATION_CALL_REL) {		\
+		paravirt_ops.opname = wrapper;			\
+		vmi_ops.cache = (void *)rel->eip;		\
 	}							\
 } while (0)
+
 
 /*
  * Activate the VMI interface and switch into paravirtualized mode
@@ -731,13 +753,8 @@ static inline int __init activate_vmi(vo
 	 *  rdpmc is not yet used in Linux
 	 */
 
-	/* CPUID is special, so very special */
-	reloc = call_vrom_long_func(vmi_rom, get_reloc,	VMI_CALL_CPUID);
-	if (rel->type != VMI_RELOCATION_NONE) {
-		BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
-		vmi_ops.cpuid = (void *)rel->eip;
-		paravirt_ops.cpuid = vmi_cpuid;
-	}
+	/* CPUID is special, so very special it gets wrapped like a present */
+	para_wrap(cpuid, vmi_cpuid, cpuid, CPUID);
 
 	para_fill(clts, CLTS);
 	para_fill(get_debugreg, GetDR);
@@ -754,6 +771,7 @@ static inline int __init activate_vmi(vo
 	para_fill(restore_fl, SetInterruptMask);
 	para_fill(irq_disable, DisableInterrupts);
 	para_fill(irq_enable, EnableInterrupts);
+
 	/* irq_save_disable !!! sheer pain */
 	patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK],
 		     (char *)paravirt_ops.save_fl);
@@ -761,26 +779,18 @@ static inline int __init activate_vmi(vo
 		     (char *)paravirt_ops.irq_disable);
 
 	para_fill(wbinvd, WBINVD);
+	para_fill(read_tsc, RDTSC);
+
+	/* The following we emulate with trap and emulate for now */
 	/* paravirt_ops.read_msr = vmi_rdmsr */
 	/* paravirt_ops.write_msr = vmi_wrmsr */
-	para_fill(read_tsc, RDTSC);
 	/* paravirt_ops.rdpmc = vmi_rdpmc */
 
-	/* TR interface doesn't pass TR value */
-	reloc = call_vrom_long_func(vmi_rom, get_reloc,	VMI_CALL_SetTR);
-	if (rel->type != VMI_RELOCATION_NONE) {
-		BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
-		vmi_ops.set_tr = (void *)rel->eip;
-		paravirt_ops.load_tr_desc = vmi_set_tr;
-	}
+	/* TR interface doesn't pass TR value, wrap */
+	para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR);
 
 	/* LDT is special, too */
-	reloc = call_vrom_long_func(vmi_rom, get_reloc,	VMI_CALL_SetLDT);
-	if (rel->type != VMI_RELOCATION_NONE) {
-		BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
-		vmi_ops._set_ldt = (void *)rel->eip;
-		paravirt_ops.set_ldt = vmi_set_ldt;
-	}
+	para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT);
 
 	para_fill(load_gdt, SetGDT);
 	para_fill(load_idt, SetIDT);
@@ -791,25 +801,14 @@ static inline int __init activate_vmi(vo
 	para_fill(write_ldt_entry, WriteLDTEntry);
 	para_fill(write_gdt_entry, WriteGDTEntry);
 	para_fill(write_idt_entry, WriteIDTEntry);
-	reloc = call_vrom_long_func(vmi_rom, get_reloc,
-				    VMI_CALL_UpdateKernelStack);
-	if (rel->type != VMI_RELOCATION_NONE) {
-		BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
-		vmi_ops.set_kernel_stack = (void *)rel->eip;
-		paravirt_ops.load_esp0 = vmi_load_esp0;
-	}
-
+	para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
 	para_fill(set_iopl_mask, SetIOPLMask);
-	paravirt_ops.io_delay = (void *)vmi_nop;
-
+	para_fill(io_delay, IODelay);
 	para_fill(set_lazy_mode, SetLazyMode);
 
-	reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_FlushTLB);
-	if (rel->type != VMI_RELOCATION_NONE) {
-		vmi_ops.flush_tlb = (void *)rel->eip;
-		paravirt_ops.flush_tlb_user = vmi_flush_tlb_user;
-		paravirt_ops.flush_tlb_kernel = vmi_flush_tlb_kernel;
-	}
+	/* user and kernel flush are just handled with different flags to FlushTLB */
+	para_wrap(flush_tlb_user, vmi_flush_tlb_user, flush_tlb, FlushTLB);
+	para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, flush_tlb, FlushTLB);
 	para_fill(flush_tlb_single, InvalPage);
 
 	/*
@@ -824,28 +823,40 @@ static inline int __init activate_vmi(vo
 	vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE);
 	vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE);
 #endif
-	vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
+
+	if (vmi_ops.set_pte) {
+		paravirt_ops.set_pte = vmi_set_pte;
+		paravirt_ops.set_pte_at = vmi_set_pte_at;
+		paravirt_ops.set_pmd = vmi_set_pmd;
+#ifdef CONFIG_X86_PAE
+		paravirt_ops.set_pte_atomic = vmi_set_pte_atomic;
+		paravirt_ops.set_pte_present = vmi_set_pte_present;
+		paravirt_ops.set_pud = vmi_set_pud;
+		paravirt_ops.pte_clear = vmi_pte_clear;
+		paravirt_ops.pmd_clear = vmi_pmd_clear;
+#endif
+	}
+
+	if (vmi_ops.update_pte) {
+		paravirt_ops.pte_update = vmi_update_pte;
+		paravirt_ops.pte_update_defer = vmi_update_pte_defer;
+	}
+
 	vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
+	if (vmi_ops.allocate_page) {
+		paravirt_ops.alloc_pt = vmi_allocate_pt;
+		paravirt_ops.alloc_pd = vmi_allocate_pd;
+		paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone;
+	}
+
 	vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
-
-	paravirt_ops.map_pt_hook = vmi_map_pt_hook;
-	paravirt_ops.alloc_pt = vmi_allocate_pt;
-	paravirt_ops.alloc_pd = vmi_allocate_pd;
-	paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone;
-	paravirt_ops.release_pt = vmi_release_pt;
-	paravirt_ops.release_pd = vmi_release_pd;
-	paravirt_ops.set_pte = vmi_set_pte;
-	paravirt_ops.set_pte_at = vmi_set_pte_at;
-	paravirt_ops.set_pmd = vmi_set_pmd;
-	paravirt_ops.pte_update = vmi_update_pte;
-	paravirt_ops.pte_update_defer = vmi_update_pte_defer;
-#ifdef CONFIG_X86_PAE
-	paravirt_ops.set_pte_atomic = vmi_set_pte_atomic;
-	paravirt_ops.set_pte_present = vmi_set_pte_present;
-	paravirt_ops.set_pud = vmi_set_pud;
-	paravirt_ops.pte_clear = vmi_pte_clear;
-	paravirt_ops.pmd_clear = vmi_pmd_clear;
-#endif
+	if (vmi_ops.release_page) {
+		paravirt_ops.release_pt = vmi_release_pt;
+		paravirt_ops.release_pd = vmi_release_pd;
+	}
+	para_wrap(map_pt_hook, vmi_map_pt_hook, set_linear_mapping,
+		  SetLinearMapping);
+
 	/*
 	 * These MUST always be patched.  Don't support indirect jumps
 	 * through these operations, as the VMI interface may use either
@@ -857,21 +868,20 @@ static inline int __init activate_vmi(vo
 	paravirt_ops.iret = (void *)0xbadbab0;
 
 #ifdef CONFIG_SMP
-	paravirt_ops.startup_ipi_hook = vmi_startup_ipi_hook;
-	vmi_ops.set_initial_ap_state = vmi_get_function(VMI_CALL_SetInitialAPState);
+	para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
-	paravirt_ops.apic_read = vmi_get_function(VMI_CALL_APICRead);
-	paravirt_ops.apic_write = vmi_get_function(VMI_CALL_APICWrite);
-	paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite);
+	para_fill(apic_read, APICRead);
+	para_fill(apic_write, APICWrite);
+	para_fill(apic_write_atomic, APICWrite);
 #endif
 
 	/*
 	 * Check for VMI timer functionality by probing for a cycle frequency method
 	 */
 	reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency);
-	if (rel->type != VMI_RELOCATION_NONE) {
+	if (!disable_vmi_timer && rel->type != VMI_RELOCATION_NONE) {
 		vmi_timer_ops.get_cycle_frequency = (void *)rel->eip;
 		vmi_timer_ops.get_cycle_counter =
 			vmi_get_function(VMI_CALL_GetCycleCounter);
@@ -891,13 +901,19 @@ static inline int __init activate_vmi(vo
 #endif
 		paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
  		paravirt_ops.get_cpu_khz = vmi_cpu_khz;
-	}
-	if (!disable_noidle)
+
+		/* We have true wallclock functions; disable CMOS clock sync */
+		no_sync_cmos_clock = 1;
+	} else {
+		disable_noidle = 1;
+		disable_vmi_timer = 1;
+	}
+
+	/* No idle HZ mode only works if VMI timer and no idle is enabled */
+	if (disable_noidle || disable_vmi_timer)
 		para_fill(safe_halt, Halt);
-	else {
-		vmi_ops.halt = vmi_get_function(VMI_CALL_Halt);
-		paravirt_ops.safe_halt = vmi_safe_halt;
-	}
+	else
+		para_wrap(safe_halt, vmi_safe_halt, halt, Halt);
 
 	/*
 	 * Alternative instruction rewriting doesn't happen soon enough
@@ -933,10 +949,9 @@ void __init vmi_init(void)
 	activate_vmi();
 
 #ifdef CONFIG_X86_IO_APIC
+	/* This is virtual hardware; timer routing is wired correctly */
 	no_timer_check = 1;
 #endif
-	no_sync_cmos_clock = 1;
-
 	local_irq_restore(flags & X86_EFLAGS_IF);
 }
 
@@ -960,6 +975,9 @@ static int __init parse_vmi(char *arg)
 	} else if (!strcmp(arg, "disable_mtrr")) {
 		clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability);
 		disable_mtrr = 1;
+	} else if (!strcmp(arg, "disable_timer")) {
+		disable_vmi_timer = 1;
+		disable_noidle = 1;
 	} else if (!strcmp(arg, "disable_noidle"))
 		disable_noidle = 1;
 	return 0;
diff -r 0ba8434a5c7e include/asm-i386/vmi.h
--- a/include/asm-i386/vmi.h	Thu Mar 01 16:49:27 2007 -0800
+++ b/include/asm-i386/vmi.h	Thu Mar 01 16:49:33 2007 -0800
@@ -97,6 +97,7 @@
 #define VMI_CALL_SetInitialAPState	62
 #define VMI_CALL_APICWrite		63
 #define VMI_CALL_APICRead		64
+#define VMI_CALL_IODelay		65
 #define VMI_CALL_SetLazyMode		73
 
 /*

                 reply	other threads:[~2007-03-02  2:55 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200703020255.l222tltk009694@zach-dev.vmware.com \
    --to=zach@vmware.com \
    --cc=ak@muc.de \
    --cc=akpm@osdl.org \
    --cc=anthony@codemonkey.ws \
    --cc=arai@vmware.com \
    --cc=chrisw@sous-sol.org \
    --cc=dhecht@vmware.com \
    --cc=jeremy@goop.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rusty@rustcorp.com.au \
    --cc=torvalds@osdl.org \
    --cc=virtualization@lists.osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.