linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC, PATCH 17/24] i386 Vmi msr patch
@ 2006-03-13 18:12 Zachary Amsden
  2006-03-14 16:23 ` Andi Kleen
  2006-03-22 20:21 ` Andi Kleen
  0 siblings, 2 replies; 6+ messages in thread
From: Zachary Amsden @ 2006-03-13 18:12 UTC (permalink / raw)
  To: Linus Torvalds, Linux Kernel Mailing List,
	Virtualization Mailing List, Xen-devel, Andrew Morton,
	Zachary Amsden, Dan Hecht, Dan Arai, Anne Holler,
	Pratap Subrahmanyam, Christopher Li, Joshua LeVasseur,
	Chris Wright, Rik Van Riel, Jyothy Reddy, Jack Lo, Kip Macy,
	Jan Beulich, Ky Srinivasan, Wim Coekaerts, Leendert van Doorn,
	Zachary Amsden

Fairly straightforward code motion of MSR / TSC / PMC accessors
to the sub-arch level.  Note that rdmsr/wrmsr_safe functions are
not moved; Linux relies on the fault behavior here in the event
that certain MSRs are not supported on hardware, and combining
this with a VMI wrapper is overly complicated.  The instructions
are virtualizable with trap and emulate, not on critical code
paths, and only used as part of the MSR /proc device, which is
highly sketchy to use inside a virtual machine, but must be
allowed as part of the compile, since it is useful on native.

Signed-off-by: Zachary Amsden <zach@vmware.com>

Index: linux-2.6.16-rc5/include/asm-i386/msr.h
===================================================================
--- linux-2.6.16-rc5.orig/include/asm-i386/msr.h	2006-03-08 10:31:10.000000000 -0800
+++ linux-2.6.16-rc5/include/asm-i386/msr.h	2006-03-08 10:32:07.000000000 -0800
@@ -1,22 +1,14 @@
 #ifndef __ASM_MSR_H
 #define __ASM_MSR_H
 
+#include <mach_msr.h>
+
 /*
  * Access to machine-specific registers (available on 586 and better only)
  * Note: the rd* operations modify the parameters directly (without using
  * pointer indirection), this allows gcc to optimize better
  */
 
-#define rdmsr(msr,val1,val2) \
-	__asm__ __volatile__("rdmsr" \
-			  : "=a" (val1), "=d" (val2) \
-			  : "c" (msr))
-
-#define wrmsr(msr,val1,val2) \
-	__asm__ __volatile__("wrmsr" \
-			  : /* no outputs */ \
-			  : "c" (msr), "a" (val1), "d" (val2))
-
 #define rdmsrl(msr,val) do { \
 	unsigned long l__,h__; \
 	rdmsr (msr, l__, h__);  \
@@ -62,22 +54,6 @@ static inline void wrmsrl (unsigned long
 		     : "c" (msr), "i" (-EFAULT));\
 	ret__; })
 
-#define rdtsc(low,high) \
-     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
-
-#define rdtscl(low) \
-     __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
-
-#define rdtscll(val) \
-     __asm__ __volatile__("rdtsc" : "=A" (val))
-
-#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
-
-#define rdpmc(counter,low,high) \
-     __asm__ __volatile__("rdpmc" \
-			  : "=a" (low), "=d" (high) \
-			  : "c" (counter))
-
 /* symbolic names for some interesting MSRs */
 /* Intel defined MSRs. */
 #define MSR_IA32_P5_MC_ADDR		0
Index: linux-2.6.16-rc5/include/asm-i386/mach-vmi/mach_msr.h
===================================================================
--- linux-2.6.16-rc5.orig/include/asm-i386/mach-vmi/mach_msr.h	2006-03-08 10:32:07.000000000 -0800
+++ linux-2.6.16-rc5/include/asm-i386/mach-vmi/mach_msr.h	2006-03-08 10:32:30.000000000 -0800
@@ -0,0 +1,79 @@
+#ifndef MACH_MSR_H
+#define MACH_MSR_H
+
+#include <vmi.h>
+
+static inline u64 vmi_rdmsr(const u32 msr)
+{
+	u64 ret;
+	vmi_wrap_call(
+		RDMSR, "rdmsr",
+		VMI_OREG64 (ret),
+		1, "c" (msr),
+		VMI_CLOBBER(TWO_RETURNS));
+	return ret;
+}
+
+#define rdmsr(msr,val1,val2) \
+do { \
+	u64 _val = vmi_rdmsr(msr); \
+	val1 = (u32)_val; \
+	val2 = (u32)(_val >> 32); \
+} while (0)
+
+static inline void wrmsr(const u32 msr, const u32 valLo, const u32 valHi)
+{
+	vmi_wrap_call(
+		WRMSR, "wrmsr",
+		VMI_NO_OUTPUT,
+		3, XCONC("a"(valLo), "d"(valHi), "c"(msr)),
+		VMI_CLOBBER_EXTENDED(ZERO_RETURNS, "memory"));
+}
+
+static inline u64 vmi_rdtsc(void)
+{
+	u64 ret;
+	vmi_wrap_call(
+		RDTSC, "rdtsc",
+		VMI_OREG64 (ret),
+		0, VMI_NO_INPUT,
+		VMI_CLOBBER(TWO_RETURNS));
+	return ret;
+}
+
+#define rdtsc(low,high) \
+do { \
+	u64 _val = vmi_rdtsc(); \
+	low = (u32)_val; \
+	high = (u32)(_val >> 32); \
+} while (0)
+
+#define rdtscl(low) \
+do { \
+	u64 _val = vmi_rdtsc(); \
+	low = (u32)_val; \
+} while (0)
+
+#define rdtscll(val) do { val = vmi_rdtsc(); } while (0)
+
+#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
+
+static inline u64 vmi_rdpmc(const u32 counter)
+{
+	u64 ret;
+	vmi_wrap_call(
+		RDPMC, "rdpmc",
+		VMI_OREG64 (ret),
+		1, "c" (counter),
+		VMI_CLOBBER(TWO_RETURNS));
+	return ret;
+}
+
+#define rdpmc(counter,val1,val2) \
+do { \
+	u64 _val = vmi_rdpmc(counter); \
+	val1 = (u32)_val; \
+	val2 = (u32)(_val >> 32); \
+} while (0)
+
+#endif
Index: linux-2.6.16-rc5/include/asm-i386/mach-default/mach_msr.h
===================================================================
--- linux-2.6.16-rc5.orig/include/asm-i386/mach-default/mach_msr.h	2006-03-08 10:32:07.000000000 -0800
+++ linux-2.6.16-rc5/include/asm-i386/mach-default/mach_msr.h	2006-03-08 10:32:07.000000000 -0800
@@ -0,0 +1,30 @@
+#ifndef MACH_MSR_H
+#define MACH_MSR_H
+
+#define rdmsr(msr,val1,val2) \
+	__asm__ __volatile__("rdmsr" \
+			  : "=a" (val1), "=d" (val2) \
+			  : "c" (msr))
+
+#define wrmsr(msr,val1,val2) \
+	__asm__ __volatile__("wrmsr" \
+			  : /* no outputs */ \
+			  : "c" (msr), "a" (val1), "d" (val2))
+
+#define rdtsc(low,high) \
+     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
+
+#define rdtscl(low) \
+     __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
+
+#define rdtscll(val) \
+     __asm__ __volatile__("rdtsc" : "=A" (val))
+
+#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
+
+#define rdpmc(counter,low,high) \
+     __asm__ __volatile__("rdpmc" \
+			  : "=a" (low), "=d" (high) \
+			  : "c" (counter))
+
+#endif

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC, PATCH 17/24] i386 Vmi msr patch
  2006-03-13 18:12 [RFC, PATCH 17/24] i386 Vmi msr patch Zachary Amsden
@ 2006-03-14 16:23 ` Andi Kleen
  2006-03-14 16:32   ` Zachary Amsden
  2006-03-22 20:21 ` Andi Kleen
  1 sibling, 1 reply; 6+ messages in thread
From: Andi Kleen @ 2006-03-14 16:23 UTC (permalink / raw)
  To: virtualization
  Cc: Zachary Amsden, Linus Torvalds, Linux Kernel Mailing List,
	Xen-devel, Andrew Morton, Dan Hecht, Dan Arai, Anne Holler,
	Pratap Subrahmanyam, Christopher Li, Joshua LeVasseur,
	Chris Wright, Rik Van Riel, Jyothy Reddy, Jack Lo, Kip Macy,
	Jan Beulich, Ky Srinivasan, Wim Coekaerts, Leendert van Doorn

On Monday 13 March 2006 19:12, Zachary Amsden wrote:
> Fairly straightforward code motion of MSR / TSC / PMC accessors
> to the sub-arch level.  Note that rdmsr/wrmsr_safe functions are
> not moved; Linux relies on the fault behavior here in the event
> that certain MSRs are not supported on hardware, and combining
> this with a VMI wrapper is overly complicated.  The instructions
> are virtualizable with trap and emulate, not on critical code
> paths, and only used as part of the MSR /proc device, which is
> highly sketchy to use inside a virtual machine, but must be
> allowed as part of the compile, since it is useful on native.

I'm not aware of any MSR access being on a critical code
path on a 32bit kernel. 

And I don't think it's a good idea to virtualize the TSC 
without CPU support.

Why would you want to do any of this?

-Andi

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC, PATCH 17/24] i386 Vmi msr patch
  2006-03-14 16:23 ` Andi Kleen
@ 2006-03-14 16:32   ` Zachary Amsden
  2006-03-14 17:43     ` Andi Kleen
  0 siblings, 1 reply; 6+ messages in thread
From: Zachary Amsden @ 2006-03-14 16:32 UTC (permalink / raw)
  To: Andi Kleen
  Cc: virtualization, Linus Torvalds, Linux Kernel Mailing List,
	Xen-devel, Andrew Morton, Dan Hecht, Dan Arai, Anne Holler,
	Pratap Subrahmanyam, Christopher Li, Joshua LeVasseur,
	Chris Wright, Rik Van Riel, Jyothy Reddy, Jack Lo, Kip Macy,
	Jan Beulich, Ky Srinivasan, Wim Coekaerts, Leendert van Doorn

Andi Kleen wrote:
> On Monday 13 March 2006 19:12, Zachary Amsden wrote:
>   
>> Fairly straightforward code motion of MSR / TSC / PMC accessors
>> to the sub-arch level.  Note that rdmsr/wrmsr_safe functions are
>> not moved; Linux relies on the fault behavior here in the event
>> that certain MSRs are not supported on hardware, and combining
>> this with a VMI wrapper is overly complicated.  The instructions
>> are virtualizable with trap and emulate, not on critical code
>> paths, and only used as part of the MSR /proc device, which is
>> highly sketchy to use inside a virtual machine, but must be
>> allowed as part of the compile, since it is useful on native.
>>     
>
> I'm not aware of any MSR access being on a critical code
> path on a 32bit kernel. 
>   

There aren't really any.  There are some unexpected ones - such as 
setting SYSENTER_CS during a context switch, but only if leaving v8086 
mode, which isn't a common case.  But most importantly, the MSR 
functions were challenging to get correct, because they combine two 
novel elements - 64 bit values, as well as non-C calling conventions.  
They were actually some of the first functions I inlined, because I knew 
there would be problems, and the solutions would yield more powerful 
inlining macros.

> And I don't think it's a good idea to virtualize the TSC 
> without CPU support.
>   

We currently don't support configurations without a TSC.  But we're not 
trying to virtualize the TSC without CPU support.  It is possible.  But 
I have no idea _why_ you would want to do such a thing.

Zach

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC, PATCH 17/24] i386 Vmi msr patch
  2006-03-14 16:32   ` Zachary Amsden
@ 2006-03-14 17:43     ` Andi Kleen
  2006-03-14 18:03       ` Zachary Amsden
  0 siblings, 1 reply; 6+ messages in thread
From: Andi Kleen @ 2006-03-14 17:43 UTC (permalink / raw)
  To: Zachary Amsden
  Cc: virtualization, Linus Torvalds, Linux Kernel Mailing List,
	Xen-devel, Andrew Morton, Dan Hecht, Dan Arai, Anne Holler,
	Pratap Subrahmanyam, Christopher Li, Joshua LeVasseur,
	Chris Wright, Rik Van Riel, Jyothy Reddy, Jack Lo, Kip Macy,
	Jan Beulich, Ky Srinivasan, Wim Coekaerts, Leendert van Doorn

On Tuesday 14 March 2006 17:32, Zachary Amsden wrote:

> There aren't really any.  There are some unexpected ones - such as
> setting SYSENTER_CS during a context switch, but only if leaving v8086
> mode, which isn't a common case.  But most importantly, the MSR
> functions were challenging to get correct, because they combine two
> novel elements - 64 bit values, as well as non-C calling conventions.
> They were actually some of the first functions I inlined, because I knew
> there would be problems, and the solutions would yield more powerful
> inlining macros.

That doesn't seem like a good rationale to add it to the kernel.

>
> > And I don't think it's a good idea to virtualize the TSC
> > without CPU support.
>
> We currently don't support configurations without a TSC.  But we're not
> trying to virtualize the TSC without CPU support.  It is possible.  But
> I have no idea _why_ you would want to do such a thing.

Don't change it then?

BTW I think it will be pretty tough to find enough competent reviewers
for your patchkit.

And is the spec still in flux or are you trying to implement an interface
for an specification that is already put into stone? 

-Andi

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC, PATCH 17/24] i386 Vmi msr patch
  2006-03-14 17:43     ` Andi Kleen
@ 2006-03-14 18:03       ` Zachary Amsden
  0 siblings, 0 replies; 6+ messages in thread
From: Zachary Amsden @ 2006-03-14 18:03 UTC (permalink / raw)
  To: Andi Kleen
  Cc: virtualization, Linus Torvalds, Linux Kernel Mailing List,
	Xen-devel, Andrew Morton, Dan Hecht, Dan Arai, Anne Holler,
	Pratap Subrahmanyam, Christopher Li, Joshua LeVasseur,
	Chris Wright, Rik Van Riel, Jyothy Reddy, Jack Lo, Kip Macy,
	Jan Beulich, Ky Srinivasan, Wim Coekaerts, Leendert van Doorn

Andi Kleen wrote:
>>> And I don't think it's a good idea to virtualize the TSC
>>> without CPU support.
>>>       
>> We currently don't support configurations without a TSC.  But we're not
>> trying to virtualize the TSC without CPU support.  It is possible.  But
>> I have no idea _why_ you would want to do such a thing.
>>     
>
> Don't change it then?
>   

I misunderstood you.  I thought when you said, "I don't think it's a 
good idea virtualize the TSC without CPU support" that meant on CPUs 
without a hardware TSC.  If you really meant on CPUs without 
virtualization hardware, well, that is something we do, and it is not 
only possible, it is necessary for many non-paravirtualized operating 
systems.  As I mention in the interface documentation, TSC and the 
performance counters in general are very problematic in a virtual 
machine - they can be visible to userspace, and they are hard to keep 
accurate.  Long term, dropping kernel usage of the TSC is a good thing 
to do for VMs.

The primary motivation for the change here was to get rid of the call to 
the VMI ROM for TSC support.  I am in the process of removing the ROM 
call sites so that they can instead be patched into the kernel 
directly.  I am actually unsure if there are any uses of the TSC left on 
critical paths with the new VMI timer support, but I inlined the code 
here for consistency.

I agree that long term, it doesn't need to be done, and these 
instructions can all go back to trap and emulate.  Perhaps they are even 
worth dropping from the list of VMI calls, since they really are 
problematic and/or not useful in a virtual machine.  This again, is a 
good point for debate.  We're open to suggestions, but keep in mind that 
the fact that other operating systems and hypervisors might find 
something useful in virtualizing them.  Maybe not.

> BTW I think it will be pretty tough to find enough competent reviewers
> for your patchkit.
>
> And is the spec still in flux or are you trying to implement an interface
> for an specification that is already put into stone? 
>   

Everything is still very much open to change, and nothing is cast in 
stone - this is about finding the best interface for Linux, and it is 
clear that it needs some iteration before that is found.  Which is why 
we are looking for feedback, exactly like this.

Thanks,

Zach

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC, PATCH 17/24] i386 Vmi msr patch
  2006-03-13 18:12 [RFC, PATCH 17/24] i386 Vmi msr patch Zachary Amsden
  2006-03-14 16:23 ` Andi Kleen
@ 2006-03-22 20:21 ` Andi Kleen
  1 sibling, 0 replies; 6+ messages in thread
From: Andi Kleen @ 2006-03-22 20:21 UTC (permalink / raw)
  To: virtualization
  Cc: Zachary Amsden, Linus Torvalds, Linux Kernel Mailing List,
	Xen-devel, Andrew Morton, Dan Hecht, Dan Arai, Anne Holler,
	Pratap Subrahmanyam, Christopher Li, Joshua LeVasseur,
	Chris Wright, Rik Van Riel, Jyothy Reddy, Jack Lo, Kip Macy,
	Jan Beulich, Ky Srinivasan, Wim Coekaerts, Leendert van Doorn


> -#define rdtsc(low,high) \
> -     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
> -
> -#define rdtscl(low) \
> -     __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
> -
> -#define rdtscll(val) \
> -     __asm__ __volatile__("rdtsc" : "=A" (val))
> -
> -#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
> -
> -#define rdpmc(counter,low,high) \
> -     __asm__ __volatile__("rdpmc" \
> -			  : "=a" (low), "=d" (high) \
> -			  : "c" (counter))

The kernel doesn't use rdpmc. And moving rdtsc is useless
as I wrote earlier.

But mostly it is used from user space and you will break everything there.

-Andi

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2006-03-22 20:59 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-03-13 18:12 [RFC, PATCH 17/24] i386 Vmi msr patch Zachary Amsden
2006-03-14 16:23 ` Andi Kleen
2006-03-14 16:32   ` Zachary Amsden
2006-03-14 17:43     ` Andi Kleen
2006-03-14 18:03       ` Zachary Amsden
2006-03-22 20:21 ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).