* [PATCH 1/4] core, x86: Add user return notifiers
2009-09-16 12:45 [PATCH 0/4] User return notifiers / just-in-time MSR switching for KVM Avi Kivity
@ 2009-09-16 12:45 ` Avi Kivity
2009-09-18 18:14 ` H. Peter Anvin
2009-09-16 12:45 ` [PATCH 2/4] KVM: VMX: Move MSR_KERNEL_GS_BASE out of the vmx autoload msr area Avi Kivity
` (3 subsequent siblings)
4 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-09-16 12:45 UTC (permalink / raw)
To: Ingo Molnar, Marcelo Tosatti; +Cc: kvm, linux-kernel
Add a general per-cpu notifier that is called whenever the kernel is
about to return to userspace. The notifier uses a thread_info flag
and existing checks, so there is no impact on user return or context
switch fast paths.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/Kconfig | 10 ++++++++++
arch/x86/Kconfig | 1 +
arch/x86/include/asm/thread_info.h | 7 +++++--
arch/x86/kernel/process.c | 2 ++
arch/x86/kernel/signal.c | 3 +++
kernel/Makefile | 1 +
6 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 99193b1..2275b46 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -70,6 +70,13 @@ config KRETPROBES
def_bool y
depends on KPROBES && HAVE_KRETPROBES
+config USER_RETURN_NOTIFIER
+ bool
+ depends on HAVE_USER_RETURN_NOTIFIER
+ help
+ Provide a kernel-internal notification when a cpu is about to
+ switch to user mode.
+
config HAVE_IOREMAP_PROT
bool
@@ -113,4 +120,7 @@ config HAVE_DMA_API_DEBUG
config HAVE_DEFAULT_NO_SPIN_MUTEXES
bool
+config HAVE_USER_RETURN_NOTIFIER
+ bool
+
source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 13ffa5d..5fc4fae 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -50,6 +50,7 @@ config X86
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
select HAVE_ARCH_KMEMCHECK
+ select HAVE_USER_RETURN_NOTIFIER
config OUTPUT_FORMAT
string
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index fad7d40..a8f46de 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -83,6 +83,7 @@ struct thread_info {
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
+#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* 32bit process */
#define TIF_FORK 18 /* ret_from_fork */
@@ -107,6 +108,7 @@ struct thread_info {
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
+#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_FORK (1 << TIF_FORK)
@@ -141,13 +143,14 @@ struct thread_info {
/* Only used for 64 bit */
#define _TIF_DO_NOTIFY_MASK \
- (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
+ (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \
+ _TIF_USER_RETURN_NOTIFY)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
-#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
#define PREEMPT_ACTIVE 0x10000000
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 071166a..7ea6972 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,6 +9,7 @@
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <linux/random.h>
+#include <linux/user-return-notifier.h>
#include <trace/power.h>
#include <asm/system.h>
#include <asm/apic.h>
@@ -227,6 +228,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
+ propagate_user_return_notify(prev_p, next_p);
}
int sys_fork(struct pt_regs *regs)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4c57875..bc0db10 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -19,6 +19,7 @@
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/uaccess.h>
+#include <linux/user-return-notifier.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
@@ -870,6 +871,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
+ if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
+ fire_user_return_notifiers();
#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET);
diff --git a/kernel/Makefile b/kernel/Makefile
index 2093a69..11c5de5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_SLOW_WORK) += slow-work.o
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
+obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
--
1.6.4.1
^ permalink raw reply related [flat|nested] 28+ messages in thread* Re: [PATCH 1/4] core, x86: Add user return notifiers
2009-09-16 12:45 ` [PATCH 1/4] core, x86: Add user return notifiers Avi Kivity
@ 2009-09-18 18:14 ` H. Peter Anvin
2009-09-19 6:40 ` [PATCH v2] " Avi Kivity
2009-09-19 6:40 ` [PATCH 1/4] " Avi Kivity
0 siblings, 2 replies; 28+ messages in thread
From: H. Peter Anvin @ 2009-09-18 18:14 UTC (permalink / raw)
To: Avi Kivity; +Cc: Ingo Molnar, Marcelo Tosatti, kvm, linux-kernel
On 09/16/2009 05:45 AM, Avi Kivity wrote:
> Add a general per-cpu notifier that is called whenever the kernel is
> about to return to userspace. The notifier uses a thread_info flag
> and existing checks, so there is no impact on user return or context
> switch fast paths.
>
> Signed-off-by: Avi Kivity <avi@redhat.com>
The concept of this patch looks entirely sane, as does the code,
*except* for:
+#include <linux/user-return-notifier.h>
... missing header file, and ...
+obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
... missing source file.
Could you fix the patch, please?
-hpa
--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel. I don't speak on their behalf.
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH v2] core, x86: Add user return notifiers
2009-09-18 18:14 ` H. Peter Anvin
@ 2009-09-19 6:40 ` Avi Kivity
2009-09-22 9:25 ` Avi Kivity
2009-09-19 6:40 ` [PATCH 1/4] " Avi Kivity
1 sibling, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-09-19 6:40 UTC (permalink / raw)
To: Ingo Molnar, H. Peter Anvin; +Cc: linux-kernel, kvm, Marcelo Tosatti
Add a general per-cpu notifier that is called whenever the kernel is
about to return to userspace. The notifier uses a thread_info flag
and existing checks, so there is no impact on user return or context
switch fast paths.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
v2: include new files in patch
arch/Kconfig | 10 +++++++
arch/x86/Kconfig | 1 +
arch/x86/include/asm/thread_info.h | 7 +++-
arch/x86/kernel/process.c | 2 +
arch/x86/kernel/signal.c | 3 ++
include/linux/user-return-notifier.h | 42 +++++++++++++++++++++++++++++++
kernel/Makefile | 1 +
kernel/user-return-notifier.c | 46 ++++++++++++++++++++++++++++++++++
8 files changed, 110 insertions(+), 2 deletions(-)
create mode 100644 include/linux/user-return-notifier.h
create mode 100644 kernel/user-return-notifier.c
diff --git a/arch/Kconfig b/arch/Kconfig
index beea3cc..b1d0757 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -82,6 +82,13 @@ config KRETPROBES
def_bool y
depends on KPROBES && HAVE_KRETPROBES
+config USER_RETURN_NOTIFIER
+ bool
+ depends on HAVE_USER_RETURN_NOTIFIER
+ help
+ Provide a kernel-internal notification when a cpu is about to
+ switch to user mode.
+
config HAVE_IOREMAP_PROT
bool
@@ -125,4 +132,7 @@ config HAVE_DMA_API_DEBUG
config HAVE_DEFAULT_NO_SPIN_MUTEXES
bool
+config HAVE_USER_RETURN_NOTIFIER
+ bool
+
source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fc20fdc..ed21d6a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -50,6 +50,7 @@ config X86
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
select HAVE_ARCH_KMEMCHECK
+ select HAVE_USER_RETURN_NOTIFIER
config OUTPUT_FORMAT
string
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index d27d0a2..375c917 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -83,6 +83,7 @@ struct thread_info {
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
+#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* 32bit process */
#define TIF_FORK 18 /* ret_from_fork */
@@ -107,6 +108,7 @@ struct thread_info {
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
+#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_FORK (1 << TIF_FORK)
@@ -142,13 +144,14 @@ struct thread_info {
/* Only used for 64 bit */
#define _TIF_DO_NOTIFY_MASK \
- (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
+ (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \
+ _TIF_USER_RETURN_NOTIFY)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
-#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
#define PREEMPT_ACTIVE 0x10000000
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 071166a..7ea6972 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,6 +9,7 @@
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <linux/random.h>
+#include <linux/user-return-notifier.h>
#include <trace/power.h>
#include <asm/system.h>
#include <asm/apic.h>
@@ -227,6 +228,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
+ propagate_user_return_notify(prev_p, next_p);
}
int sys_fork(struct pt_regs *regs)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 81e5823..13aa99c 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -19,6 +19,7 @@
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/uaccess.h>
+#include <linux/user-return-notifier.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
@@ -872,6 +873,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (current->replacement_session_keyring)
key_replace_session_keyring();
}
+ if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
+ fire_user_return_notifiers();
#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET);
diff --git a/include/linux/user-return-notifier.h b/include/linux/user-return-notifier.h
new file mode 100644
index 0000000..ef04e2e
--- /dev/null
+++ b/include/linux/user-return-notifier.h
@@ -0,0 +1,42 @@
+#ifndef _LINUX_USER_RETURN_NOTIFIER_H
+#define _LINUX_USER_RETURN_NOTIFIER_H
+
+#ifdef CONFIG_USER_RETURN_NOTIFIER
+
+#include <linux/list.h>
+#include <linux/sched.h>
+
+struct user_return_notifier {
+ void (*on_user_return)(struct user_return_notifier *urn);
+ struct hlist_node link;
+};
+
+
+void user_return_notifier_register(struct user_return_notifier *urn);
+void user_return_notifier_unregister(struct user_return_notifier *urn);
+
+static inline void propagate_user_return_notify(struct task_struct *prev,
+ struct task_struct *next)
+{
+ if (test_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY)) {
+ clear_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY);
+ set_tsk_thread_flag(next, TIF_USER_RETURN_NOTIFY);
+ }
+}
+
+void fire_user_return_notifiers(void);
+
+#else
+
+struct user_return_notifier {};
+
+static inline propagate_user_return_notify(struct task_struct *prev,
+ struct task_struct *next)
+{
+}
+
+static inline void fire_user_return_notifiers(void) {}
+
+#endif
+
+#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 961379c..f6abe84 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -98,6 +98,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_SLOW_WORK) += slow-work.o
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
+obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c
new file mode 100644
index 0000000..530ccb8
--- /dev/null
+++ b/kernel/user-return-notifier.c
@@ -0,0 +1,46 @@
+
+#include <linux/user-return-notifier.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+
+static DEFINE_PER_CPU(struct hlist_head, return_notifier_list);
+
+#define URN_LIST_HEAD per_cpu(return_notifier_list, raw_smp_processor_id())
+
+/*
+ * Request a notification when the current cpu returns to userspace. Must be
+ * called in atomic context. The notifier will also be called in atomic
+ * context.
+ */
+void user_return_notifier_register(struct user_return_notifier *urn)
+{
+ set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
+ hlist_add_head(&urn->link, &URN_LIST_HEAD);
+}
+EXPORT_SYMBOL_GPL(user_return_notifier_register);
+
+/*
+ * Removes a registered user return notifier. Must be called from atomic
+ * context, and from the same cpu registration occured in.
+ */
+void user_return_notifier_unregister(struct user_return_notifier *urn)
+{
+ hlist_del(&urn->link);
+ if (hlist_empty(&URN_LIST_HEAD))
+ clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
+}
+EXPORT_SYMBOL_GPL(user_return_notifier_unregister);
+
+/* Calls registered user return notifiers */
+void fire_user_return_notifiers(void)
+{
+ struct user_return_notifier *urn;
+ struct hlist_node *tmp1, *tmp2;
+ struct hlist_head *head;
+
+ head = &get_cpu_var(return_notifier_list);
+ hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link)
+ urn->on_user_return(urn);
+ put_cpu_var();
+}
--
1.6.4.1
^ permalink raw reply related [flat|nested] 28+ messages in thread* Re: [PATCH v2] core, x86: Add user return notifiers
2009-09-19 6:40 ` [PATCH v2] " Avi Kivity
@ 2009-09-22 9:25 ` Avi Kivity
2009-09-22 9:37 ` Arjan van de Ven
2009-09-22 14:32 ` Ingo Molnar
0 siblings, 2 replies; 28+ messages in thread
From: Avi Kivity @ 2009-09-22 9:25 UTC (permalink / raw)
To: Ingo Molnar, H. Peter Anvin; +Cc: linux-kernel, kvm, Marcelo Tosatti
On 09/19/2009 09:40 AM, Avi Kivity wrote:
> Add a general per-cpu notifier that is called whenever the kernel is
> about to return to userspace. The notifier uses a thread_info flag
> and existing checks, so there is no impact on user return or context
> switch fast paths.
>
Ingo/Peter?
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v2] core, x86: Add user return notifiers
2009-09-22 9:25 ` Avi Kivity
@ 2009-09-22 9:37 ` Arjan van de Ven
2009-09-22 9:48 ` Avi Kivity
2009-09-22 14:32 ` Ingo Molnar
1 sibling, 1 reply; 28+ messages in thread
From: Arjan van de Ven @ 2009-09-22 9:37 UTC (permalink / raw)
To: Avi Kivity
Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, kvm, Marcelo Tosatti
On Tue, 22 Sep 2009 12:25:33 +0300
Avi Kivity <avi@redhat.com> wrote:
> On 09/19/2009 09:40 AM, Avi Kivity wrote:
> > Add a general per-cpu notifier that is called whenever the kernel is
> > about to return to userspace. The notifier uses a thread_info flag
> > and existing checks, so there is no impact on user return or context
> > switch fast paths.
> >
>
> Ingo/Peter?
isn't this like really expensive when used ?
--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v2] core, x86: Add user return notifiers
2009-09-22 9:37 ` Arjan van de Ven
@ 2009-09-22 9:48 ` Avi Kivity
0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-09-22 9:48 UTC (permalink / raw)
To: Arjan van de Ven
Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, kvm, Marcelo Tosatti
On 09/22/2009 12:37 PM, Arjan van de Ven wrote:
> On Tue, 22 Sep 2009 12:25:33 +0300
> Avi Kivity<avi@redhat.com> wrote:
>
>
>> On 09/19/2009 09:40 AM, Avi Kivity wrote:
>>
>>> Add a general per-cpu notifier that is called whenever the kernel is
>>> about to return to userspace. The notifier uses a thread_info flag
>>> and existing checks, so there is no impact on user return or context
>>> switch fast paths.
>>>
>>>
>> Ingo/Peter?
>>
> isn't this like really expensive when used ?
>
No, why? It triggers a call to do_notify_resume() and a walks a list of
length 1.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v2] core, x86: Add user return notifiers
2009-09-22 9:25 ` Avi Kivity
2009-09-22 9:37 ` Arjan van de Ven
@ 2009-09-22 14:32 ` Ingo Molnar
2009-09-22 14:45 ` Avi Kivity
` (3 more replies)
1 sibling, 4 replies; 28+ messages in thread
From: Ingo Molnar @ 2009-09-22 14:32 UTC (permalink / raw)
To: Avi Kivity, Peter Zijlstra
Cc: H. Peter Anvin, linux-kernel, kvm, Marcelo Tosatti
* Avi Kivity <avi@redhat.com> wrote:
> On 09/19/2009 09:40 AM, Avi Kivity wrote:
>> Add a general per-cpu notifier that is called whenever the kernel is
>> about to return to userspace. The notifier uses a thread_info flag
>> and existing checks, so there is no impact on user return or context
>> switch fast paths.
>>
>
> Ingo/Peter?
Would be nice to convert some existing open-coded return-to-user-space
logic to this facility. One such candidate would be lockdep_sys_exit?
Ingo
^ permalink raw reply [flat|nested] 28+ messages in thread* Re: [PATCH v2] core, x86: Add user return notifiers
2009-09-22 14:32 ` Ingo Molnar
@ 2009-09-22 14:45 ` Avi Kivity
2009-09-22 15:50 ` Avi Kivity
2009-09-22 15:19 ` H. Peter Anvin
` (2 subsequent siblings)
3 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-09-22 14:45 UTC (permalink / raw)
To: Ingo Molnar
Cc: Peter Zijlstra, H. Peter Anvin, linux-kernel, kvm,
Marcelo Tosatti
On 09/22/2009 05:32 PM, Ingo Molnar wrote:
> * Avi Kivity<avi@redhat.com> wrote:
>
>
>> On 09/19/2009 09:40 AM, Avi Kivity wrote:
>>
>>> Add a general per-cpu notifier that is called whenever the kernel is
>>> about to return to userspace. The notifier uses a thread_info flag
>>> and existing checks, so there is no impact on user return or context
>>> switch fast paths.
>>>
>>>
>> Ingo/Peter?
>>
> Would be nice to convert some existing open-coded return-to-user-space
> logic to this facility. One such candidate would be lockdep_sys_exit?
>
I only implemented this for x86, while lockdep is arch independent. If
arch support is added, it should be trivial.
I think perf counters could use preempt notifiers though, these are arch
independent.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 28+ messages in thread* Re: [PATCH v2] core, x86: Add user return notifiers
2009-09-22 14:45 ` Avi Kivity
@ 2009-09-22 15:50 ` Avi Kivity
2009-09-22 17:08 ` Avi Kivity
0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-09-22 15:50 UTC (permalink / raw)
To: Ingo Molnar
Cc: Peter Zijlstra, H. Peter Anvin, linux-kernel, kvm,
Marcelo Tosatti
On 09/22/2009 05:45 PM, Avi Kivity wrote:
>> Would be nice to convert some existing open-coded return-to-user-space
>> logic to this facility. One such candidate would be lockdep_sys_exit?
>
> I only implemented this for x86, while lockdep is arch independent.
> If arch support is added, it should be trivial.
>
The lockdep_sys_exit bit is actually x86/s390 only, and can easily be
adapted to use the new functionality on x86 only. I'll try it out.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v2] core, x86: Add user return notifiers
2009-09-22 15:50 ` Avi Kivity
@ 2009-09-22 17:08 ` Avi Kivity
0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-09-22 17:08 UTC (permalink / raw)
To: Ingo Molnar
Cc: Peter Zijlstra, H. Peter Anvin, linux-kernel, kvm,
Marcelo Tosatti
On 09/22/2009 06:50 PM, Avi Kivity wrote:
> On 09/22/2009 05:45 PM, Avi Kivity wrote:
>>> Would be nice to convert some existing open-coded return-to-user-space
>>> logic to this facility. One such candidate would be lockdep_sys_exit?
>>
>> I only implemented this for x86, while lockdep is arch independent.
>> If arch support is added, it should be trivial.
>>
>
> The lockdep_sys_exit bit is actually x86/s390 only, and can easily be
> adapted to use the new functionality on x86 only. I'll try it out.
Unfortunately it doesn't work out well. The notifier is called until
explicitly unregistered (since it relies on a bit in TIF_NOTIFY_MASK),
so we have to disarm it on the first return to usersspace or it spins
forever. We could re-arm it on the next kernel entry, but we don't have
a kernel entry notifier so we'll just be moving hooks from one point to
another.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v2] core, x86: Add user return notifiers
2009-09-22 14:32 ` Ingo Molnar
2009-09-22 14:45 ` Avi Kivity
@ 2009-09-22 15:19 ` H. Peter Anvin
2009-10-01 15:21 ` Avi Kivity
2009-09-22 16:50 ` Peter Zijlstra
2009-09-22 18:06 ` Peter Zijlstra
3 siblings, 1 reply; 28+ messages in thread
From: H. Peter Anvin @ 2009-09-22 15:19 UTC (permalink / raw)
To: Ingo Molnar
Cc: Avi Kivity, Peter Zijlstra, linux-kernel, kvm, Marcelo Tosatti
Ingo Molnar wrote:
> * Avi Kivity <avi@redhat.com> wrote:
>
>> On 09/19/2009 09:40 AM, Avi Kivity wrote:
>>> Add a general per-cpu notifier that is called whenever the kernel is
>>> about to return to userspace. The notifier uses a thread_info flag
>>> and existing checks, so there is no impact on user return or context
>>> switch fast paths.
>>>
>> Ingo/Peter?
>
> Would be nice to convert some existing open-coded return-to-user-space
> logic to this facility. One such candidate would be lockdep_sys_exit?
>
> Ingo
Sorry, limited bandwidth due to LinuxCon, but I like the concept, and
the previous (partial) patch was really clean. I agree with Ingo that
arch support so we can use this as a general facility would be nice, but
I don't consider that as a prerequisite for merging.
-hpa
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v2] core, x86: Add user return notifiers
2009-09-22 15:19 ` H. Peter Anvin
@ 2009-10-01 15:21 ` Avi Kivity
2009-10-01 15:25 ` H. Peter Anvin
0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-10-01 15:21 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Ingo Molnar, Peter Zijlstra, linux-kernel, kvm, Marcelo Tosatti
On 09/22/2009 06:19 PM, H. Peter Anvin wrote:
> Ingo Molnar wrote:
>> * Avi Kivity <avi@redhat.com> wrote:
>>
>>> On 09/19/2009 09:40 AM, Avi Kivity wrote:
>>>> Add a general per-cpu notifier that is called whenever the kernel is
>>>> about to return to userspace. The notifier uses a thread_info flag
>>>> and existing checks, so there is no impact on user return or context
>>>> switch fast paths.
>>> Ingo/Peter?
>>
>> Would be nice to convert some existing open-coded
>> return-to-user-space logic to this facility. One such candidate would
>> be lockdep_sys_exit?
>>
>> Ingo
>
> Sorry, limited bandwidth due to LinuxCon, but I like the concept, and
> the previous (partial) patch was really clean. I agree with Ingo that
> arch support so we can use this as a general facility would be nice,
> but I don't consider that as a prerequisite for merging.
Re-ping?
If accepted, please merge just the core patch and I will carry all of
them in parallel. Once tip is merged I'll drop my copy of the first patch.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v2] core, x86: Add user return notifiers
2009-10-01 15:21 ` Avi Kivity
@ 2009-10-01 15:25 ` H. Peter Anvin
2009-10-01 15:30 ` Avi Kivity
0 siblings, 1 reply; 28+ messages in thread
From: H. Peter Anvin @ 2009-10-01 15:25 UTC (permalink / raw)
To: Avi Kivity
Cc: Ingo Molnar, Peter Zijlstra, linux-kernel, kvm, Marcelo Tosatti
On 10/01/2009 08:21 AM, Avi Kivity wrote:
>
> Re-ping?
>
> If accepted, please merge just the core patch and I will carry all of
> them in parallel. Once tip is merged I'll drop my copy of the first patch.
>
I was just talking to Ingo about this... would you consider this .32 or
.33 material at his time? It came in awfully late for .32...
-hpa
--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel. I don't speak on their behalf.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v2] core, x86: Add user return notifiers
2009-10-01 15:25 ` H. Peter Anvin
@ 2009-10-01 15:30 ` Avi Kivity
2009-10-01 18:33 ` H. Peter Anvin
0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-10-01 15:30 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Ingo Molnar, Peter Zijlstra, linux-kernel, kvm, Marcelo Tosatti
On 10/01/2009 05:25 PM, H. Peter Anvin wrote:
> On 10/01/2009 08:21 AM, Avi Kivity wrote:
>
>> Re-ping?
>>
>> If accepted, please merge just the core patch and I will carry all of
>> them in parallel. Once tip is merged I'll drop my copy of the first patch.
>>
>>
> I was just talking to Ingo about this... would you consider this .32 or
> .33 material at his time? It came in awfully late for .32...
>
While I'd prefer it to be in .32, as there's much churn in both trees, I
guess we should follow the process and schedule it for .33.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v2] core, x86: Add user return notifiers
2009-10-01 15:30 ` Avi Kivity
@ 2009-10-01 18:33 ` H. Peter Anvin
0 siblings, 0 replies; 28+ messages in thread
From: H. Peter Anvin @ 2009-10-01 18:33 UTC (permalink / raw)
To: Avi Kivity
Cc: Ingo Molnar, Peter Zijlstra, linux-kernel, kvm, Marcelo Tosatti
On 10/01/2009 08:30 AM, Avi Kivity wrote:
>
> While I'd prefer it to be in .32, as there's much churn in both trees, I
> guess we should follow the process and schedule it for .33.
>
OK, sounds like a plan. I'll pull it into -tip.
-hpa
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v2] core, x86: Add user return notifiers
2009-09-22 14:32 ` Ingo Molnar
2009-09-22 14:45 ` Avi Kivity
2009-09-22 15:19 ` H. Peter Anvin
@ 2009-09-22 16:50 ` Peter Zijlstra
2009-09-22 16:52 ` Avi Kivity
2009-09-22 18:06 ` Peter Zijlstra
3 siblings, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2009-09-22 16:50 UTC (permalink / raw)
To: Ingo Molnar
Cc: Avi Kivity, H. Peter Anvin, linux-kernel, kvm, Marcelo Tosatti
On Tue, 2009-09-22 at 16:32 +0200, Ingo Molnar wrote:
> * Avi Kivity <avi@redhat.com> wrote:
>
> > On 09/19/2009 09:40 AM, Avi Kivity wrote:
> >> Add a general per-cpu notifier that is called whenever the kernel is
> >> about to return to userspace. The notifier uses a thread_info flag
> >> and existing checks, so there is no impact on user return or context
> >> switch fast paths.
> >>
> >
> > Ingo/Peter?
>
> Would be nice to convert some existing open-coded return-to-user-space
> logic to this facility. One such candidate would be lockdep_sys_exit?
And here I was thinking this was one of the hottest code paths in the
whole kernel...
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v2] core, x86: Add user return notifiers
2009-09-22 16:50 ` Peter Zijlstra
@ 2009-09-22 16:52 ` Avi Kivity
2009-09-22 16:55 ` Peter Zijlstra
0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-09-22 16:52 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, kvm, Marcelo Tosatti
On 09/22/2009 07:50 PM, Peter Zijlstra wrote:
>> Would be nice to convert some existing open-coded return-to-user-space
>> logic to this facility. One such candidate would be lockdep_sys_exit?
>>
> And here I was thinking this was one of the hottest code paths in the
> whole kernel...
>
If you're using lockdep, surely that's not your biggest worry?
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v2] core, x86: Add user return notifiers
2009-09-22 16:52 ` Avi Kivity
@ 2009-09-22 16:55 ` Peter Zijlstra
2009-09-22 17:05 ` Avi Kivity
0 siblings, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2009-09-22 16:55 UTC (permalink / raw)
To: Avi Kivity
Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, kvm, Marcelo Tosatti
On Tue, 2009-09-22 at 19:52 +0300, Avi Kivity wrote:
> On 09/22/2009 07:50 PM, Peter Zijlstra wrote:
> >> Would be nice to convert some existing open-coded return-to-user-space
> >> logic to this facility. One such candidate would be lockdep_sys_exit?
> >>
> > And here I was thinking this was one of the hottest code paths in the
> > whole kernel...
> >
>
> If you're using lockdep, surely that's not your biggest worry?
No, but that's all under #ifdef and fully disappears when not enabled.
Generic return-tu-user notifiers don't sound like they will though.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v2] core, x86: Add user return notifiers
2009-09-22 16:55 ` Peter Zijlstra
@ 2009-09-22 17:05 ` Avi Kivity
0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-09-22 17:05 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, kvm, Marcelo Tosatti
On 09/22/2009 07:55 PM, Peter Zijlstra wrote:
>> If you're using lockdep, surely that's not your biggest worry?
>>
> No, but that's all under #ifdef and fully disappears when not enabled.
> Generic return-tu-user notifiers don't sound like they will though.
>
They will if not selected. If selected and not armed, they will have
zero runtime impact since they piggyback on existing branches
(_TIF_DO_NOTIFY_MASK and near relatives). If selected and armed they'll
cause __switch_to_xtra() on every context switch and do_notity_resume()
on syscall exit until disarmed, but then you've asked for it.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v2] core, x86: Add user return notifiers
2009-09-22 14:32 ` Ingo Molnar
` (2 preceding siblings ...)
2009-09-22 16:50 ` Peter Zijlstra
@ 2009-09-22 18:06 ` Peter Zijlstra
3 siblings, 0 replies; 28+ messages in thread
From: Peter Zijlstra @ 2009-09-22 18:06 UTC (permalink / raw)
To: Ingo Molnar
Cc: Avi Kivity, H. Peter Anvin, linux-kernel, kvm, Marcelo Tosatti
On Tue, 2009-09-22 at 16:32 +0200, Ingo Molnar wrote:
> Would be nice to convert some existing open-coded return-to-user-space
> logic to this facility. One such candidate would be lockdep_sys_exit?
I don't really like lockdep_sys_exit() in such a call,
lockdep_sys_exit() is currently placed such that it will execute as the
last C code before returning to user-space.
Put it any earlier and you've got a window where people can leak a lock
to userspace undetected.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/4] core, x86: Add user return notifiers
2009-09-18 18:14 ` H. Peter Anvin
2009-09-19 6:40 ` [PATCH v2] " Avi Kivity
@ 2009-09-19 6:40 ` Avi Kivity
1 sibling, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-09-19 6:40 UTC (permalink / raw)
To: H. Peter Anvin; +Cc: Ingo Molnar, Marcelo Tosatti, kvm, linux-kernel
On 09/18/2009 09:14 PM, H. Peter Anvin wrote:
> On 09/16/2009 05:45 AM, Avi Kivity wrote:
>
>> Add a general per-cpu notifier that is called whenever the kernel is
>> about to return to userspace. The notifier uses a thread_info flag
>> and existing checks, so there is no impact on user return or context
>> switch fast paths.
>>
>> Signed-off-by: Avi Kivity<avi@redhat.com>
>>
> The concept of this patch looks entirely sane, as does the code,
> *except* for:
>
> +#include<linux/user-return-notifier.h>
>
> ... missing header file, and ...
>
> +obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
>
> ... missing source file.
>
> Could you fix the patch, please?
>
>
Doh. Sent out fixed patch.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH 2/4] KVM: VMX: Move MSR_KERNEL_GS_BASE out of the vmx autoload msr area
2009-09-16 12:45 [PATCH 0/4] User return notifiers / just-in-time MSR switching for KVM Avi Kivity
2009-09-16 12:45 ` [PATCH 1/4] core, x86: Add user return notifiers Avi Kivity
@ 2009-09-16 12:45 ` Avi Kivity
2009-09-16 12:45 ` [PATCH 3/4] KVM: x86 shared msr infrastructure Avi Kivity
` (2 subsequent siblings)
4 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-09-16 12:45 UTC (permalink / raw)
To: Ingo Molnar, Marcelo Tosatti; +Cc: kvm, linux-kernel
Currently MSR_KERNEL_GS_BASE is saved and restored as part of the
guest/host msr reloading. Since we wish to lazy-restore all the other
msrs, save and reload MSR_KERNEL_GS_BASE explicitly instead of using
the common code.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/kvm/vmx.c | 34 +++++++++++++++++++++-------------
1 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d3213ac..547881a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -80,7 +80,8 @@ struct vcpu_vmx {
int save_nmsrs;
int msr_offset_efer;
#ifdef CONFIG_X86_64
- int msr_offset_kernel_gs_base;
+ u64 msr_host_kernel_gs_base;
+ u64 msr_guest_kernel_gs_base;
#endif
struct vmcs *vmcs;
struct {
@@ -183,7 +184,7 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
*/
static const u32 vmx_msr_index[] = {
#ifdef CONFIG_X86_64
- MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE,
+ MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
#endif
MSR_EFER, MSR_K6_STAR,
};
@@ -649,10 +650,10 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
#endif
#ifdef CONFIG_X86_64
- if (is_long_mode(&vmx->vcpu))
- save_msrs(vmx->host_msrs +
- vmx->msr_offset_kernel_gs_base, 1);
-
+ if (is_long_mode(&vmx->vcpu)) {
+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ }
#endif
load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
load_transition_efer(vmx);
@@ -686,6 +687,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
save_msrs(vmx->guest_msrs, vmx->save_nmsrs);
load_msrs(vmx->host_msrs, vmx->save_nmsrs);
reload_host_efer(vmx);
+#ifdef CONFIG_X86_64
+ if (is_long_mode(&vmx->vcpu)) {
+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ }
+#endif
}
static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -915,9 +922,6 @@ static void setup_msrs(struct vcpu_vmx *vmx)
index = __find_msr_index(vmx, MSR_CSTAR);
if (index >= 0)
move_msr_up(vmx, index, save_nmsrs++);
- index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE);
- if (index >= 0)
- move_msr_up(vmx, index, save_nmsrs++);
/*
* MSR_K6_STAR is only needed on long mode guests, and only
* if efer.sce is enabled.
@@ -929,10 +933,6 @@ static void setup_msrs(struct vcpu_vmx *vmx)
#endif
vmx->save_nmsrs = save_nmsrs;
-#ifdef CONFIG_X86_64
- vmx->msr_offset_kernel_gs_base =
- __find_msr_index(vmx, MSR_KERNEL_GS_BASE);
-#endif
vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER);
if (cpu_has_vmx_msr_bitmap()) {
@@ -990,6 +990,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
case MSR_GS_BASE:
data = vmcs_readl(GUEST_GS_BASE);
break;
+ case MSR_KERNEL_GS_BASE:
+ vmx_load_host_state(to_vmx(vcpu));
+ data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
+ break;
case MSR_EFER:
return kvm_get_msr_common(vcpu, msr_index, pdata);
#endif
@@ -1043,6 +1047,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
case MSR_GS_BASE:
vmcs_writel(GUEST_GS_BASE, data);
break;
+ case MSR_KERNEL_GS_BASE:
+ vmx_load_host_state(vmx);
+ vmx->msr_guest_kernel_gs_base = data;
+ break;
#endif
case MSR_IA32_SYSENTER_CS:
vmcs_write32(GUEST_SYSENTER_CS, data);
--
1.6.4.1
^ permalink raw reply related [flat|nested] 28+ messages in thread* [PATCH 3/4] KVM: x86 shared msr infrastructure
2009-09-16 12:45 [PATCH 0/4] User return notifiers / just-in-time MSR switching for KVM Avi Kivity
2009-09-16 12:45 ` [PATCH 1/4] core, x86: Add user return notifiers Avi Kivity
2009-09-16 12:45 ` [PATCH 2/4] KVM: VMX: Move MSR_KERNEL_GS_BASE out of the vmx autoload msr area Avi Kivity
@ 2009-09-16 12:45 ` Avi Kivity
2009-09-16 21:21 ` Marcelo Tosatti
2009-09-16 12:45 ` [PATCH 4/4] KVM: VMX: Use " Avi Kivity
2009-10-25 12:23 ` [PATCH 0/4] User return notifiers / just-in-time MSR switching for KVM Avi Kivity
4 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-09-16 12:45 UTC (permalink / raw)
To: Ingo Molnar, Marcelo Tosatti; +Cc: kvm, linux-kernel
The various syscall-related MSRs are fairly expensive to switch. Currently
we switch them on every vcpu preemption, which is far too often:
- if we're switching to a kernel thread (idle task, threaded interrupt,
kernel-mode virtio server (vhost-net), for example) and back, then
there's no need to switch those MSRs since kernel threasd won't
be exiting to userspace.
- if we're switching to another guest running an identical OS, most likely
those MSRs will have the same value, so there's little point in reloading
them.
- if we're running the same OS on the guest and host, the MSRs will have
identical values and reloading is unnecessary.
This patch uses the new user return notifiers to implement last-minute
switching, and checks the msr values to avoid unnecessary reloading.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 3 +
arch/x86/kvm/Kconfig | 1 +
arch/x86/kvm/x86.c | 79 +++++++++++++++++++++++++++++++++++++++
3 files changed, 83 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 45226f0..863bde8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -799,4 +799,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
+void kvm_define_shared_msr(unsigned index, u32 msr);
+void kvm_set_shared_msr(unsigned index, u64 val);
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b84e571..4cd4983 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -28,6 +28,7 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
+ select USER_RETURN_NOTIFIER
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1d454d9..0e7c40c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -37,6 +37,7 @@
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
#include <linux/cpufreq.h>
+#include <linux/user-return-notifier.h>
#include <trace/events/kvm.h>
#undef TRACE_INCLUDE_FILE
#define CREATE_TRACE_POINTS
@@ -87,6 +88,25 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
int ignore_msrs = 0;
module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
+#define KVM_NR_SHARED_MSRS 16
+
+struct kvm_shared_msrs_global {
+ int nr;
+ struct kvm_shared_msr {
+ u32 msr;
+ u64 value;
+ } msrs[KVM_NR_SHARED_MSRS];
+};
+
+struct kvm_shared_msrs {
+ struct user_return_notifier urn;
+ bool registered;
+ u64 current_value[KVM_NR_SHARED_MSRS];
+};
+
+static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
+static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
+
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "pf_fixed", VCPU_STAT(pf_fixed) },
{ "pf_guest", VCPU_STAT(pf_guest) },
@@ -123,6 +143,64 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
+static void kvm_on_user_return(struct user_return_notifier *urn)
+{
+ unsigned slot;
+ struct kvm_shared_msr *global;
+ struct kvm_shared_msrs *locals
+ = container_of(urn, struct kvm_shared_msrs, urn);
+
+ for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
+ global = &shared_msrs_global.msrs[slot];
+ if (global->value != locals->current_value[slot]) {
+ wrmsrl(global->msr, global->value);
+ locals->current_value[slot] = global->value;
+ }
+ }
+ locals->registered = false;
+ user_return_notifier_unregister(urn);
+}
+
+void kvm_define_shared_msr(unsigned slot, u32 msr)
+{
+ int cpu;
+ u64 value;
+
+ if (slot >= shared_msrs_global.nr)
+ shared_msrs_global.nr = slot + 1;
+ shared_msrs_global.msrs[slot].msr = msr;
+ rdmsrl_safe(msr, &value);
+ shared_msrs_global.msrs[slot].value = value;
+ for_each_online_cpu(cpu)
+ per_cpu(shared_msrs, cpu).current_value[slot] = value;
+}
+EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
+
+static void kvm_shared_msr_cpu_online(void)
+{
+ unsigned i;
+ struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs);
+
+ for (i = 0; i < shared_msrs_global.nr; ++i)
+ locals->current_value[i] = shared_msrs_global.msrs[i].value;
+}
+
+void kvm_set_shared_msr(unsigned slot, u64 value)
+{
+ struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
+
+ if (value == smsr->current_value[slot])
+ return;
+ smsr->current_value[slot] = value;
+ wrmsrl(shared_msrs_global.msrs[slot].msr, value);
+ if (!smsr->registered) {
+ smsr->urn.on_user_return = kvm_on_user_return;
+ user_return_notifier_register(&smsr->urn);
+ smsr->registered = true;
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
+
unsigned long segment_base(u16 selector)
{
struct descriptor_table gdt;
@@ -4696,6 +4774,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
void kvm_arch_hardware_enable(void *garbage)
{
+ kvm_shared_msr_cpu_online();
kvm_x86_ops->hardware_enable(garbage);
}
--
1.6.4.1
^ permalink raw reply related [flat|nested] 28+ messages in thread* Re: [PATCH 3/4] KVM: x86 shared msr infrastructure
2009-09-16 12:45 ` [PATCH 3/4] KVM: x86 shared msr infrastructure Avi Kivity
@ 2009-09-16 21:21 ` Marcelo Tosatti
2009-09-16 21:24 ` Avi Kivity
0 siblings, 1 reply; 28+ messages in thread
From: Marcelo Tosatti @ 2009-09-16 21:21 UTC (permalink / raw)
To: Avi Kivity; +Cc: Ingo Molnar, kvm, linux-kernel
On Wed, Sep 16, 2009 at 03:45:33PM +0300, Avi Kivity wrote:
> The various syscall-related MSRs are fairly expensive to switch. Currently
> we switch them on every vcpu preemption, which is far too often:
>
> - if we're switching to a kernel thread (idle task, threaded interrupt,
> kernel-mode virtio server (vhost-net), for example) and back, then
> there's no need to switch those MSRs since kernel threasd won't
> be exiting to userspace.
>
> - if we're switching to another guest running an identical OS, most likely
> those MSRs will have the same value, so there's little point in reloading
> them.
>
> - if we're running the same OS on the guest and host, the MSRs will have
> identical values and reloading is unnecessary.
>
> This patch uses the new user return notifiers to implement last-minute
> switching, and checks the msr values to avoid unnecessary reloading.
>
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
> arch/x86/include/asm/kvm_host.h | 3 +
> arch/x86/kvm/Kconfig | 1 +
> arch/x86/kvm/x86.c | 79 +++++++++++++++++++++++++++++++++++++++
> 3 files changed, 83 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 45226f0..863bde8 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -799,4 +799,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
> int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
> int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
>
> +void kvm_define_shared_msr(unsigned index, u32 msr);
> +void kvm_set_shared_msr(unsigned index, u64 val);
> +
> #endif /* _ASM_X86_KVM_HOST_H */
> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> index b84e571..4cd4983 100644
> --- a/arch/x86/kvm/Kconfig
> +++ b/arch/x86/kvm/Kconfig
> @@ -28,6 +28,7 @@ config KVM
> select HAVE_KVM_IRQCHIP
> select HAVE_KVM_EVENTFD
> select KVM_APIC_ARCHITECTURE
> + select USER_RETURN_NOTIFIER
> ---help---
> Support hosting fully virtualized guest machines using hardware
> virtualization extensions. You will need a fairly recent
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 1d454d9..0e7c40c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -37,6 +37,7 @@
> #include <linux/iommu.h>
> #include <linux/intel-iommu.h>
> #include <linux/cpufreq.h>
> +#include <linux/user-return-notifier.h>
> #include <trace/events/kvm.h>
> #undef TRACE_INCLUDE_FILE
> #define CREATE_TRACE_POINTS
> @@ -87,6 +88,25 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
> int ignore_msrs = 0;
> module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
>
> +#define KVM_NR_SHARED_MSRS 16
> +
> +struct kvm_shared_msrs_global {
> + int nr;
> + struct kvm_shared_msr {
> + u32 msr;
> + u64 value;
> + } msrs[KVM_NR_SHARED_MSRS];
> +};
> +
> +struct kvm_shared_msrs {
> + struct user_return_notifier urn;
> + bool registered;
> + u64 current_value[KVM_NR_SHARED_MSRS];
> +};
> +
> +static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
Does this assume the MSRs in question are consistent across CPUs?
I guess that is not true with arch_prctl(ARCH_SET_GS/ARCH_GET_GS) ?
^ permalink raw reply [flat|nested] 28+ messages in thread* Re: [PATCH 3/4] KVM: x86 shared msr infrastructure
2009-09-16 21:21 ` Marcelo Tosatti
@ 2009-09-16 21:24 ` Avi Kivity
0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-09-16 21:24 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Ingo Molnar, kvm, linux-kernel
On 09/17/2009 12:21 AM, Marcelo Tosatti wrote:
>> +static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
>>
> Does this assume the MSRs in question are consistent across CPUs?
>
Yes. And they are.
> I guess that is not true with arch_prctl(ARCH_SET_GS/ARCH_GET_GS) ?
>
These aren't shared MSRs and are saved and restored independently.
--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH 4/4] KVM: VMX: Use shared msr infrastructure
2009-09-16 12:45 [PATCH 0/4] User return notifiers / just-in-time MSR switching for KVM Avi Kivity
` (2 preceding siblings ...)
2009-09-16 12:45 ` [PATCH 3/4] KVM: x86 shared msr infrastructure Avi Kivity
@ 2009-09-16 12:45 ` Avi Kivity
2009-10-25 12:23 ` [PATCH 0/4] User return notifiers / just-in-time MSR switching for KVM Avi Kivity
4 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-09-16 12:45 UTC (permalink / raw)
To: Ingo Molnar, Marcelo Tosatti; +Cc: kvm, linux-kernel
Instead of reloading syscall MSRs on every preemption, use the new shared
msr infrastructure to reload them at the last possible minute (just before
exit to userspace).
Improves vcpu/idle/vcpu switches by about 2000 cycles (when EFER needs to be
reloaded as well).
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/kvm/vmx.c | 108 +++++++++++++++++++--------------------------------
1 files changed, 40 insertions(+), 68 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 547881a..8dae1dc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -67,6 +67,11 @@ struct vmcs {
char data[0];
};
+struct shared_msr_entry {
+ unsigned index;
+ u64 data;
+};
+
struct vcpu_vmx {
struct kvm_vcpu vcpu;
struct list_head local_vcpus_link;
@@ -74,8 +79,7 @@ struct vcpu_vmx {
int launched;
u8 fail;
u32 idt_vectoring_info;
- struct kvm_msr_entry *guest_msrs;
- struct kvm_msr_entry *host_msrs;
+ struct shared_msr_entry *guest_msrs;
int nmsrs;
int save_nmsrs;
int msr_offset_efer;
@@ -89,7 +93,6 @@ struct vcpu_vmx {
u16 fs_sel, gs_sel, ldt_sel;
int gs_ldt_reload_needed;
int fs_reload_needed;
- int guest_efer_loaded;
} host_state;
struct {
int vm86_active;
@@ -176,6 +179,8 @@ static struct kvm_vmx_segment_field {
VMX_SEGMENT_FIELD(LDTR),
};
+static u64 host_efer;
+
static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
/*
@@ -190,22 +195,6 @@ static const u32 vmx_msr_index[] = {
};
#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
-static void load_msrs(struct kvm_msr_entry *e, int n)
-{
- int i;
-
- for (i = 0; i < n; ++i)
- wrmsrl(e[i].index, e[i].data);
-}
-
-static void save_msrs(struct kvm_msr_entry *e, int n)
-{
- int i;
-
- for (i = 0; i < n; ++i)
- rdmsrl(e[i].index, e[i].data);
-}
-
static inline int is_page_fault(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -348,7 +337,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
int i;
for (i = 0; i < vmx->nmsrs; ++i)
- if (vmx->guest_msrs[i].index == msr)
+ if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
return i;
return -1;
}
@@ -379,7 +368,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa)
: : "a" (&operand), "c" (ext) : "cc", "memory");
}
-static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
+static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
{
int i;
@@ -570,17 +559,15 @@ static void reload_tss(void)
load_TR_desc();
}
-static void load_transition_efer(struct vcpu_vmx *vmx)
+static bool update_transition_efer(struct vcpu_vmx *vmx)
{
int efer_offset = vmx->msr_offset_efer;
- u64 host_efer;
u64 guest_efer;
u64 ignore_bits;
if (efer_offset < 0)
- return;
- host_efer = vmx->host_msrs[efer_offset].data;
- guest_efer = vmx->guest_msrs[efer_offset].data;
+ return false;
+ guest_efer = vmx->vcpu.arch.shadow_efer;
/*
* NX is emulated; LMA and LME handled by hardware; SCE meaninless
@@ -594,26 +581,18 @@ static void load_transition_efer(struct vcpu_vmx *vmx)
ignore_bits &= ~(u64)EFER_SCE;
#endif
if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits))
- return;
+ return false;
- vmx->host_state.guest_efer_loaded = 1;
guest_efer &= ~ignore_bits;
guest_efer |= host_efer & ignore_bits;
- wrmsrl(MSR_EFER, guest_efer);
- vmx->vcpu.stat.efer_reload++;
-}
-
-static void reload_host_efer(struct vcpu_vmx *vmx)
-{
- if (vmx->host_state.guest_efer_loaded) {
- vmx->host_state.guest_efer_loaded = 0;
- load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1);
- }
+ vmx->guest_msrs[efer_offset].data = guest_efer;
+ return true;
}
static void vmx_save_host_state(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int i;
if (vmx->host_state.loaded)
return;
@@ -655,8 +634,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
}
#endif
- load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
- load_transition_efer(vmx);
+ for (i = 0; i < vmx->save_nmsrs; ++i)
+ kvm_set_shared_msr(i, vmx->guest_msrs[i].data);
}
static void __vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -684,9 +663,6 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
local_irq_restore(flags);
}
reload_tss();
- save_msrs(vmx->guest_msrs, vmx->save_nmsrs);
- load_msrs(vmx->host_msrs, vmx->save_nmsrs);
- reload_host_efer(vmx);
#ifdef CONFIG_X86_64
if (is_long_mode(&vmx->vcpu)) {
rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
@@ -886,14 +862,11 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
#ifdef CONFIG_X86_64
static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
{
- struct kvm_msr_entry tmp;
+ struct shared_msr_entry tmp;
tmp = vmx->guest_msrs[to];
vmx->guest_msrs[to] = vmx->guest_msrs[from];
vmx->guest_msrs[from] = tmp;
- tmp = vmx->host_msrs[to];
- vmx->host_msrs[to] = vmx->host_msrs[from];
- vmx->host_msrs[from] = tmp;
}
#endif
@@ -904,15 +877,13 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
*/
static void setup_msrs(struct vcpu_vmx *vmx)
{
- int save_nmsrs;
+ int save_nmsrs, index;
unsigned long *msr_bitmap;
vmx_load_host_state(vmx);
save_nmsrs = 0;
#ifdef CONFIG_X86_64
if (is_long_mode(&vmx->vcpu)) {
- int index;
-
index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
if (index >= 0)
move_msr_up(vmx, index, save_nmsrs++);
@@ -931,9 +902,11 @@ static void setup_msrs(struct vcpu_vmx *vmx)
move_msr_up(vmx, index, save_nmsrs++);
}
#endif
- vmx->save_nmsrs = save_nmsrs;
+ vmx->msr_offset_efer = index = __find_msr_index(vmx, MSR_EFER);
+ if (index >= 0 && update_transition_efer(vmx))
+ move_msr_up(vmx, index, save_nmsrs++);
- vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER);
+ vmx->save_nmsrs = save_nmsrs;
if (cpu_has_vmx_msr_bitmap()) {
if (is_long_mode(&vmx->vcpu))
@@ -975,7 +948,7 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
{
u64 data;
- struct kvm_msr_entry *msr;
+ struct shared_msr_entry *msr;
if (!pdata) {
printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
@@ -994,9 +967,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
vmx_load_host_state(to_vmx(vcpu));
data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
break;
+#endif
case MSR_EFER:
return kvm_get_msr_common(vcpu, msr_index, pdata);
-#endif
case MSR_IA32_TSC:
data = guest_read_tsc();
break;
@@ -1010,6 +983,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
default:
+ vmx_load_host_state(to_vmx(vcpu));
msr = find_msr_entry(to_vmx(vcpu), msr_index);
if (msr) {
vmx_load_host_state(to_vmx(vcpu));
@@ -1031,7 +1005,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct kvm_msr_entry *msr;
+ struct shared_msr_entry *msr;
u64 host_tsc;
int ret = 0;
@@ -1543,8 +1517,10 @@ continue_rmode:
static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
+ struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
+ if (!msr)
+ return;
vcpu->arch.shadow_efer = efer;
if (!msr)
return;
@@ -2383,10 +2359,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
if (wrmsr_safe(index, data_low, data_high) < 0)
continue;
data = data_low | ((u64)data_high << 32);
- vmx->host_msrs[j].index = index;
- vmx->host_msrs[j].reserved = 0;
- vmx->host_msrs[j].data = data;
- vmx->guest_msrs[j] = vmx->host_msrs[j];
+ vmx->guest_msrs[j].index = i;
+ vmx->guest_msrs[j].data = 0;
++vmx->nmsrs;
}
@@ -3774,7 +3748,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
__clear_bit(vmx->vpid, vmx_vpid_bitmap);
spin_unlock(&vmx_vpid_lock);
vmx_free_vmcs(vcpu);
- kfree(vmx->host_msrs);
kfree(vmx->guest_msrs);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vmx);
@@ -3801,10 +3774,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
goto uninit_vcpu;
}
- vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!vmx->host_msrs)
- goto free_guest_msrs;
-
vmx->vmcs = alloc_vmcs();
if (!vmx->vmcs)
goto free_msrs;
@@ -3835,8 +3804,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
free_vmcs:
free_vmcs(vmx->vmcs);
free_msrs:
- kfree(vmx->host_msrs);
-free_guest_msrs:
kfree(vmx->guest_msrs);
uninit_vcpu:
kvm_vcpu_uninit(&vmx->vcpu);
@@ -3986,7 +3953,12 @@ static struct kvm_x86_ops vmx_x86_ops = {
static int __init vmx_init(void)
{
- int r;
+ int r, i;
+
+ rdmsrl_safe(MSR_EFER, &host_efer);
+
+ for (i = 0; i < NR_VMX_MSR; ++i)
+ kvm_define_shared_msr(i, vmx_msr_index[i]);
vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_io_bitmap_a)
--
1.6.4.1
^ permalink raw reply related [flat|nested] 28+ messages in thread* Re: [PATCH 0/4] User return notifiers / just-in-time MSR switching for KVM
2009-09-16 12:45 [PATCH 0/4] User return notifiers / just-in-time MSR switching for KVM Avi Kivity
` (3 preceding siblings ...)
2009-09-16 12:45 ` [PATCH 4/4] KVM: VMX: Use " Avi Kivity
@ 2009-10-25 12:23 ` Avi Kivity
4 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-10-25 12:23 UTC (permalink / raw)
To: Ingo Molnar, Marcelo Tosatti; +Cc: kvm, linux-kernel
On 09/16/2009 03:45 PM, Avi Kivity wrote:
> The various x86 syscall related MSRs (MSR_LSTAR and friends, EFER when SCE
> needs to be updated) are fairly expensive to read or write. Since different
> operating systems can set different values for these MSRs, KVM needs to reload
> them when switching to a different guest or to the host.
>
> Switching on every guest entry/exit is too expensive, so KVM reloads on
> guest preemption, which is a lot rarer. Even so, preemption based reload
> is suboptimal:
>
> - if we're switching to a kernel thread and back, there's no need to reload
> the MSRs. Examples of kernel threads we're likely to switch to are:
>
> - the idle task
> - a threaded interrupt handler
> - a kernel-mode virtio server (vhost-net)
>
> - if we're switching to a guest running the same OS, the MSRs will have the
> same values and there's no need to reload them
>
> - if the guest and host run the same OS, again the MSRs need not be reloaded.
>
> This patchset implements just-in-time reloads to defer them to the last
> possible instant. When we do reload, we check whether the values have in
> fact changed and reload conditionally.
>
> For the just-in-time reloads the first patch implements "user return
> notifiers", a callback invoked just before return to userspace. This has
> been written so that there is no code impact if KVM is not configured, and
> no runtime impact if KVM is not running.
>
> The patchset improves guest/idle/guest switches by about 2000 cycles.
>
>
I've applied this to kvm.git master.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 28+ messages in thread