[PATCH 0/3] Scheduler preemption hooks, #2

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/3] Scheduler preemption hooks, #2
@ 2007-07-25 12:43 Avi Kivity
  2007-07-25 12:43 ` [PATCH 1/3] SCHED: Generic hooks for trapping task preemption Avi Kivity
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Avi Kivity @ 2007-07-25 12:43 UTC (permalink / raw)
  To: kvm-devel; +Cc: linux-kernel, Ingo Molnar, shaohua.li, rusty, Andi Kleen

Changes since #2:
- better Kconfig
- documentation
- spinlock->mutex conversion from Shaohua

This patchset adds a general sceduler mechanism that allows tasks to
be notified when they are preempted and scheduled back in, and converts
kvm to use it.  The main benefits are that the kvm mmu can now perform
allocations, enabling features such as guest paging, and reducing preemption
latency to a bounded value for realtime workloads.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/3] SCHED: Generic hooks for trapping task preemption
  2007-07-25 12:43 [PATCH 0/3] Scheduler preemption hooks, #2 Avi Kivity
@ 2007-07-25 12:43 ` Avi Kivity
  2007-07-25 12:49   ` Avi Kivity
  2007-07-25 12:49   ` Ingo Molnar
  2007-07-25 12:43 ` [PATCH 2/3] KVM: Use the scheduler preemption hooks to make kvm preemptible Avi Kivity
  2007-07-25 12:43 ` [PATCH 3/3] KVM: Convert vm lock to a mutex Avi Kivity
  2 siblings, 2 replies; 10+ messages in thread
From: Avi Kivity @ 2007-07-25 12:43 UTC (permalink / raw)
  To: kvm-devel
  Cc: linux-kernel, Ingo Molnar, shaohua.li, rusty, Andi Kleen,
	Avi Kivity

This adds a general mechanism whereby a task can request the scheduler to
notify it whenever it is preempted or scheduled back in.  This allows the
task to swap any special-purpose registers like the fpu or Intel's VT
registers.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/linux/preempt.h |   43 +++++++++++++++++++++++++++++++
 include/linux/sched.h   |    4 +++
 kernel/Kconfig.preempt  |    2 +
 kernel/sched.c          |   65 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 114 insertions(+), 0 deletions(-)

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index d0926d6..788d2e5 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -8,6 +8,7 @@
 
 #include <linux/thread_info.h>
 #include <linux/linkage.h>
+#include <linux/list.h>
 
 #ifdef CONFIG_DEBUG_PREEMPT
   extern void fastcall add_preempt_count(int val);
@@ -60,4 +61,46 @@ do { \
 
 #endif
 
+#ifdef CONFIG_PREEMPT_HOOKS
+
+struct preempt_hook;
+
+/**
+ * preempt_ops - hooks called when a task is preempted and rescheduled
+ * @sched_in: we're about to be rescheduled:
+ *    hook: struct preempt_hook for the task being scheduled
+ *    cpu:  cpu we're scheduled on
+ * @sched_out: we've just been preempted
+ *    hook: struct preempt_hook for the task being preempted
+ *    next: the task that's kicking us out
+ */
+struct preempt_ops {
+	void (*sched_in)(struct preempt_hook *hook, int cpu);
+	void (*sched_out)(struct preempt_hook *hook, struct task_struct *next);
+};
+
+/**
+ * preempt_hook - key for installing preemption hooks
+ * @link: internal use
+ * @ops: defines the hook functions to be called
+ *
+ * Usually used in conjunction with container_of().
+ */
+struct preempt_hook {
+	struct hlist_node link;
+	struct preempt_ops *ops;
+};
+
+void preempt_hook_register(struct preempt_hook *hook);
+void preempt_hook_unregister(struct preempt_hook *hook);
+
+static inline void preempt_hook_init(struct preempt_hook *hook,
+				     struct preempt_ops *ops)
+{
+	INIT_HLIST_NODE(&hook->link);
+	hook->ops = ops;
+}
+
+#endif
+
 #endif /* __LINUX_PREEMPT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 33b9b48..c5e3f19 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -935,6 +935,10 @@ struct task_struct {
 	struct sched_class *sched_class;
 	struct sched_entity se;
 
+#ifdef CONFIG_PREEMPT_HOOKS
+	struct hlist_head preempt_hooks;  /* list of struct preempt_hook */
+#endif
+
 	unsigned short ioprio;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	unsigned int btrace_seq;
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index c64ce9c..6af9c72 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -63,3 +63,5 @@ config PREEMPT_BKL
 	  Say Y here if you are building a kernel for a desktop system.
 	  Say N if you are unsure.
 
+config PREEMPT_HOOKS
+	bool
diff --git a/kernel/sched.c b/kernel/sched.c
index 93cf241..100fe93 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1592,6 +1592,10 @@ static void __sched_fork(struct task_struct *p)
 	INIT_LIST_HEAD(&p->run_list);
 	p->se.on_rq = 0;
 
+#ifdef CONFIG_PREEMPT_HOOKS
+	INIT_HLIST_HEAD(&p->preempt_hooks);
+#endif
+
 	/*
 	 * We mark the process as running here, but have not actually
 	 * inserted it onto the runqueue yet. This guarantees that
@@ -1673,6 +1677,61 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	task_rq_unlock(rq, &flags);
 }
 
+#ifdef CONFIG_PREEMPT_HOOKS
+
+/**
+ * preempt_hook_register - tell me when current is being being preempted
+ *                         and rescheduled
+ */
+void preempt_hook_register(struct preempt_hook *hook)
+{
+	hlist_add_head(&hook->link, &current->preempt_hooks);
+}
+EXPORT_SYMBOL_GPL(preempt_hook_register);
+
+/**
+ * preempt_hook_unregister - no longer interested in preemption notifications
+ *
+ * This is safe to call from within a preemption hook.
+ */
+void preempt_hook_unregister(struct preempt_hook *hook)
+{
+	hlist_del(&hook->link);
+}
+EXPORT_SYMBOL_GPL(preempt_hook_unregister);
+
+static void fire_sched_in_preempt_hooks(struct task_struct *tsk)
+{
+	struct preempt_hook *hook;
+	struct hlist_node *node;
+
+	hlist_for_each_entry(hook, node, &tsk->preempt_hooks, link)
+		hook->ops->sched_in(hook, raw_smp_processor_id());
+}
+
+static void fire_sched_out_preempt_hooks(struct task_struct *tsk,
+					 struct task_struct *next)
+{
+	struct preempt_hook *hook;
+	struct hlist_node *node;
+
+	hlist_for_each_entry(hook, node, &tsk->preempt_hooks, link)
+		hook->ops->sched_out(hook, next);
+}
+
+#else
+
+static void fire_sched_in_preempt_hooks(struct task_struct *tsk)
+{
+}
+
+static void fire_sched_out_preempt_hooks(struct task_struct *tsk,
+					 struct task_struct *next)
+{
+}
+
+#endif
+
 /**
  * prepare_task_switch - prepare to switch tasks
  * @rq: the runqueue preparing to switch
@@ -1687,6 +1746,7 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
  */
 static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
 {
+	fire_sched_out_preempt_hooks(current);
 	prepare_lock_switch(rq, next);
 	prepare_arch_switch(next);
 }
@@ -1728,6 +1788,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	prev_state = prev->state;
 	finish_arch_switch(prev);
 	finish_lock_switch(rq, prev);
+	fire_sched_in_preempt_hooks(current);
 	if (mm)
 		mmdrop(mm);
 	if (unlikely(prev_state == TASK_DEAD)) {
@@ -6335,6 +6396,10 @@ void __init sched_init(void)
 
 	set_load_weight(&init_task);
 
+#ifdef CONFIG_PREEMPT_HOOKS
+	INIT_HLIST_HEAD(&init_task.preempt_hooks);
+#endif
+
 #ifdef CONFIG_SMP
 	nr_cpu_ids = highest_cpu + 1;
 	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] SCHED: Generic hooks for trapping task preemption
  2007-07-25 12:43 ` [PATCH 1/3] SCHED: Generic hooks for trapping task preemption Avi Kivity
@ 2007-07-25 12:49   ` Avi Kivity
  2007-07-25 12:49   ` Ingo Molnar
  1 sibling, 0 replies; 10+ messages in thread
From: Avi Kivity @ 2007-07-25 12:49 UTC (permalink / raw)
  To: kvm-devel; +Cc: linux-kernel, Ingo Molnar, shaohua.li, rusty, Andi Kleen

Avi Kivity wrote:
>  /**
>   * prepare_task_switch - prepare to switch tasks
>   * @rq: the runqueue preparing to switch
> @@ -1687,6 +1746,7 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
>   */
>  static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
>  {
> +	fire_sched_out_preempt_hooks(current);
>   

This wants an extra arg, 'next', of course.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] SCHED: Generic hooks for trapping task preemption
  2007-07-25 12:43 ` [PATCH 1/3] SCHED: Generic hooks for trapping task preemption Avi Kivity
  2007-07-25 12:49   ` Avi Kivity
@ 2007-07-25 12:49   ` Ingo Molnar
  2007-07-25 12:51     ` Avi Kivity
                       ` (2 more replies)
  1 sibling, 3 replies; 10+ messages in thread
From: Ingo Molnar @ 2007-07-25 12:49 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel, linux-kernel, shaohua.li, rusty, Andi Kleen


* Avi Kivity <avi@qumranet.com> wrote:

> This adds a general mechanism whereby a task can request the scheduler 
> to notify it whenever it is preempted or scheduled back in.  This 
> allows the task to swap any special-purpose registers like the fpu or 
> Intel's VT registers.

ok, this looks very clean, i've added it to my scheduler tree. You've 
tested this with KVM, on both 32-bit and 64-bit, correct?

	Ingo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] SCHED: Generic hooks for trapping task preemption
  2007-07-25 12:49   ` Ingo Molnar
@ 2007-07-25 12:51     ` Avi Kivity
  2007-07-25 12:52     ` Ingo Molnar
  2007-07-25 13:03     ` Ingo Molnar
  2 siblings, 0 replies; 10+ messages in thread
From: Avi Kivity @ 2007-07-25 12:51 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: kvm-devel, linux-kernel, shaohua.li, rusty, Andi Kleen

Ingo Molnar wrote:
> * Avi Kivity <avi@qumranet.com> wrote:
>
>   
>> This adds a general mechanism whereby a task can request the scheduler 
>> to notify it whenever it is preempted or scheduled back in.  This 
>> allows the task to swap any special-purpose registers like the fpu or 
>> Intel's VT registers.
>>     
>
> ok, this looks very clean, i've added it to my scheduler tree. You've 
> tested this with KVM, on both 32-bit and 64-bit, correct?
>
>   

The previous iteration.  I'll do a run now to make sure it hasn't regressed.

Note that only the first patch will apply to -linus.  The others depend
on a small change that isn't there.


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] SCHED: Generic hooks for trapping task preemption
  2007-07-25 12:49   ` Ingo Molnar
  2007-07-25 12:51     ` Avi Kivity
@ 2007-07-25 12:52     ` Ingo Molnar
  2007-07-25 13:13       ` Avi Kivity
  2007-07-25 13:03     ` Ingo Molnar
  2 siblings, 1 reply; 10+ messages in thread
From: Ingo Molnar @ 2007-07-25 12:52 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel, linux-kernel, shaohua.li, rusty, Andi Kleen


* Ingo Molnar <mingo@elte.hu> wrote:

> > This adds a general mechanism whereby a task can request the 
> > scheduler to notify it whenever it is preempted or scheduled back 
> > in.  This allows the task to swap any special-purpose registers like 
> > the fpu or Intel's VT registers.
> 
> ok, this looks very clean, i've added it to my scheduler tree. You've 
> tested this with KVM, on both 32-bit and 64-bit, correct?

note: i've done a hook -> notifier rename - 'hook' is way too generic 
sounding and also slightly loaded. (updated patch below)

	Ingo

------------------->
From: Avi Kivity <avi@qumranet.com>
Subject: sched: arch preempt notifier mechanism

This adds a general mechanism whereby a task can request the scheduler to
notify it whenever it is preempted or scheduled back in.  This allows the
task to swap any special-purpose registers like the fpu or Intel's VT
registers.

Signed-off-by: Avi Kivity <avi@qumranet.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/preempt.h |   43 +++++++++++++++++++++++++++++++
 include/linux/sched.h   |    4 ++
 kernel/Kconfig.preempt  |    2 +
 kernel/sched.c          |   65 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 114 insertions(+)

Index: linux/include/linux/preempt.h
===================================================================
--- linux.orig/include/linux/preempt.h
+++ linux/include/linux/preempt.h
@@ -8,6 +8,7 @@
 
 #include <linux/thread_info.h>
 #include <linux/linkage.h>
+#include <linux/list.h>
 
 #ifdef CONFIG_DEBUG_PREEMPT
   extern void fastcall add_preempt_count(int val);
@@ -60,4 +61,46 @@ do { \
 
 #endif
 
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+
+struct preempt_notifier;
+
+/**
+ * preempt_ops - notifiers called when a task is preempted and rescheduled
+ * @sched_in: we're about to be rescheduled:
+ *    notifier: struct preempt_notifier for the task being scheduled
+ *    cpu:  cpu we're scheduled on
+ * @sched_out: we've just been preempted
+ *    notifier: struct preempt_notifier for the task being preempted
+ *    next: the task that's kicking us out
+ */
+struct preempt_ops {
+	void (*sched_in)(struct preempt_notifier *notifier, int cpu);
+	void (*sched_out)(struct preempt_notifier *notifier, struct task_struct *next);
+};
+
+/**
+ * preempt_notifier - key for installing preemption notifiers
+ * @link: internal use
+ * @ops: defines the notifier functions to be called
+ *
+ * Usually used in conjunction with container_of().
+ */
+struct preempt_notifier {
+	struct hlist_node link;
+	struct preempt_ops *ops;
+};
+
+void preempt_notifier_register(struct preempt_notifier *notifier);
+void preempt_notifier_unregister(struct preempt_notifier *notifier);
+
+static inline void preempt_notifier_init(struct preempt_notifier *notifier,
+				     struct preempt_ops *ops)
+{
+	INIT_HLIST_NODE(&notifier->link);
+	notifier->ops = ops;
+}
+
+#endif
+
 #endif /* __LINUX_PREEMPT_H */
Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h
+++ linux/include/linux/sched.h
@@ -935,6 +935,10 @@ struct task_struct {
 	struct sched_class *sched_class;
 	struct sched_entity se;
 
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+	struct hlist_head preempt_notifiers;  /* list of struct preempt_notifier */
+#endif
+
 	unsigned short ioprio;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	unsigned int btrace_seq;
Index: linux/kernel/Kconfig.preempt
===================================================================
--- linux.orig/kernel/Kconfig.preempt
+++ linux/kernel/Kconfig.preempt
@@ -63,3 +63,5 @@ config PREEMPT_BKL
 	  Say Y here if you are building a kernel for a desktop system.
 	  Say N if you are unsure.
 
+config PREEMPT_NOTIFIERS
+	bool
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -1592,6 +1592,10 @@ static void __sched_fork(struct task_str
 	INIT_LIST_HEAD(&p->run_list);
 	p->se.on_rq = 0;
 
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+	INIT_HLIST_HEAD(&p->preempt_notifiers);
+#endif
+
 	/*
 	 * We mark the process as running here, but have not actually
 	 * inserted it onto the runqueue yet. This guarantees that
@@ -1673,6 +1677,61 @@ void fastcall wake_up_new_task(struct ta
 	task_rq_unlock(rq, &flags);
 }
 
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+
+/**
+ * preempt_notifier_register - tell me when current is being being preempted
+ *                         and rescheduled
+ */
+void preempt_notifier_register(struct preempt_notifier *notifier)
+{
+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
+}
+EXPORT_SYMBOL_GPL(preempt_notifier_register);
+
+/**
+ * preempt_notifier_unregister - no longer interested in preemption notifications
+ *
+ * This is safe to call from within a preemption notifier.
+ */
+void preempt_notifier_unregister(struct preempt_notifier *notifier)
+{
+	hlist_del(&notifier->link);
+}
+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
+
+static void fire_sched_in_preempt_notifiers(struct task_struct *tsk)
+{
+	struct preempt_notifier *notifier;
+	struct hlist_node *node;
+
+	hlist_for_each_entry(notifier, node, &tsk->preempt_notifiers, link)
+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
+}
+
+static void fire_sched_out_preempt_notifiers(struct task_struct *tsk,
+					 struct task_struct *next)
+{
+	struct preempt_notifier *notifier;
+	struct hlist_node *node;
+
+	hlist_for_each_entry(notifier, node, &tsk->preempt_notifiers, link)
+		notifier->ops->sched_out(notifier, next);
+}
+
+#else
+
+static void fire_sched_in_preempt_notifiers(struct task_struct *tsk)
+{
+}
+
+static void fire_sched_out_preempt_notifiers(struct task_struct *tsk,
+					 struct task_struct *next)
+{
+}
+
+#endif
+
 /**
  * prepare_task_switch - prepare to switch tasks
  * @rq: the runqueue preparing to switch
@@ -1687,6 +1746,7 @@ void fastcall wake_up_new_task(struct ta
  */
 static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
 {
+	fire_sched_out_preempt_notifiers(current);
 	prepare_lock_switch(rq, next);
 	prepare_arch_switch(next);
 }
@@ -1728,6 +1788,7 @@ static inline void finish_task_switch(st
 	prev_state = prev->state;
 	finish_arch_switch(prev);
 	finish_lock_switch(rq, prev);
+	fire_sched_in_preempt_notifiers(current);
 	if (mm)
 		mmdrop(mm);
 	if (unlikely(prev_state == TASK_DEAD)) {
@@ -6335,6 +6396,10 @@ void __init sched_init(void)
 
 	set_load_weight(&init_task);
 
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+	INIT_HLIST_HEAD(&init_task.preempt_notifiers);
+#endif
+
 #ifdef CONFIG_SMP
 	nr_cpu_ids = highest_cpu + 1;
 	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] SCHED: Generic hooks for trapping task preemption
  2007-07-25 12:52     ` Ingo Molnar
@ 2007-07-25 13:13       ` Avi Kivity
  0 siblings, 0 replies; 10+ messages in thread
From: Avi Kivity @ 2007-07-25 13:13 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: kvm-devel, linux-kernel, shaohua.li, rusty, Andi Kleen

Ingo Molnar wrote:
> * Ingo Molnar <mingo@elte.hu> wrote:
>
>   
>>> This adds a general mechanism whereby a task can request the 
>>> scheduler to notify it whenever it is preempted or scheduled back 
>>> in.  This allows the task to swap any special-purpose registers like 
>>> the fpu or Intel's VT registers.
>>>       
>> ok, this looks very clean, i've added it to my scheduler tree. You've 
>> tested this with KVM, on both 32-bit and 64-bit, correct?
>>     
>
> note: i've done a hook -> notifier rename - 'hook' is way too generic 
> sounding and also slightly loaded. (updated patch below)
>
>   

Okay, both i386 and x86_64 are happy (with the missing 'next' argument
added, but with the old hook name).

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] SCHED: Generic hooks for trapping task preemption
  2007-07-25 12:49   ` Ingo Molnar
  2007-07-25 12:51     ` Avi Kivity
  2007-07-25 12:52     ` Ingo Molnar
@ 2007-07-25 13:03     ` Ingo Molnar
  2 siblings, 0 replies; 10+ messages in thread
From: Ingo Molnar @ 2007-07-25 13:03 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel, linux-kernel, shaohua.li, rusty, Andi Kleen


* Ingo Molnar <mingo@elte.hu> wrote:

> ok, this looks very clean, i've added it to my scheduler tree. You've 
> tested this with KVM, on both 32-bit and 64-bit, correct?

alas, it didnt build ;-) Tidied and fixed up patch below.

	Ingo

---------------->
Subject: sched: arch preempt notifier mechanism

This adds a general mechanism whereby a task can request the scheduler to
notify it whenever it is preempted or scheduled back in.  This allows the
task to swap any special-purpose registers like the fpu or Intel's VT
registers.

Signed-off-by: Avi Kivity <avi@qumranet.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/preempt.h |   44 ++++++++++++++++++++++++++++
 include/linux/sched.h   |    5 +++
 kernel/Kconfig.preempt  |    2 +
 kernel/sched.c          |   73 ++++++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 122 insertions(+), 2 deletions(-)

Index: linux/include/linux/preempt.h
===================================================================
--- linux.orig/include/linux/preempt.h
+++ linux/include/linux/preempt.h
@@ -8,6 +8,7 @@
 
 #include <linux/thread_info.h>
 #include <linux/linkage.h>
+#include <linux/list.h>
 
 #ifdef CONFIG_DEBUG_PREEMPT
   extern void fastcall add_preempt_count(int val);
@@ -60,4 +61,47 @@ do { \
 
 #endif
 
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+
+struct preempt_notifier;
+
+/**
+ * preempt_ops - notifiers called when a task is preempted and rescheduled
+ * @sched_in: we're about to be rescheduled:
+ *    notifier: struct preempt_notifier for the task being scheduled
+ *    cpu:  cpu we're scheduled on
+ * @sched_out: we've just been preempted
+ *    notifier: struct preempt_notifier for the task being preempted
+ *    next: the task that's kicking us out
+ */
+struct preempt_ops {
+	void (*sched_in)(struct preempt_notifier *notifier, int cpu);
+	void (*sched_out)(struct preempt_notifier *notifier,
+			  struct task_struct *next);
+};
+
+/**
+ * preempt_notifier - key for installing preemption notifiers
+ * @link: internal use
+ * @ops: defines the notifier functions to be called
+ *
+ * Usually used in conjunction with container_of().
+ */
+struct preempt_notifier {
+	struct hlist_node link;
+	struct preempt_ops *ops;
+};
+
+void preempt_notifier_register(struct preempt_notifier *notifier);
+void preempt_notifier_unregister(struct preempt_notifier *notifier);
+
+static inline void preempt_notifier_init(struct preempt_notifier *notifier,
+				     struct preempt_ops *ops)
+{
+	INIT_HLIST_NODE(&notifier->link);
+	notifier->ops = ops;
+}
+
+#endif
+
 #endif /* __LINUX_PREEMPT_H */
Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h
+++ linux/include/linux/sched.h
@@ -935,6 +935,11 @@ struct task_struct {
 	struct sched_class *sched_class;
 	struct sched_entity se;
 
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+	/* list of struct preempt_notifier: */
+	struct hlist_head preempt_notifiers;
+#endif
+
 	unsigned short ioprio;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	unsigned int btrace_seq;
Index: linux/kernel/Kconfig.preempt
===================================================================
--- linux.orig/kernel/Kconfig.preempt
+++ linux/kernel/Kconfig.preempt
@@ -63,3 +63,5 @@ config PREEMPT_BKL
 	  Say Y here if you are building a kernel for a desktop system.
 	  Say N if you are unsure.
 
+config PREEMPT_NOTIFIERS
+	bool
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -1592,6 +1592,10 @@ static void __sched_fork(struct task_str
 	INIT_LIST_HEAD(&p->run_list);
 	p->se.on_rq = 0;
 
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+	INIT_HLIST_HEAD(&p->preempt_notifiers);
+#endif
+
 	/*
 	 * We mark the process as running here, but have not actually
 	 * inserted it onto the runqueue yet. This guarantees that
@@ -1673,6 +1677,63 @@ void fastcall wake_up_new_task(struct ta
 	task_rq_unlock(rq, &flags);
 }
 
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+
+/**
+ * preempt_notifier_register - tell me when current is being being preempted
+ *                         and rescheduled
+ */
+void preempt_notifier_register(struct preempt_notifier *notifier)
+{
+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
+}
+EXPORT_SYMBOL_GPL(preempt_notifier_register);
+
+/**
+ * preempt_notifier_unregister - no longer interested in preemption notifications
+ *
+ * This is safe to call from within a preemption notifier.
+ */
+void preempt_notifier_unregister(struct preempt_notifier *notifier)
+{
+	hlist_del(&notifier->link);
+}
+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
+
+static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
+{
+	struct preempt_notifier *notifier;
+	struct hlist_node *node;
+
+	hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
+}
+
+static void
+fire_sched_out_preempt_notifiers(struct task_struct *curr,
+				 struct task_struct *next)
+{
+	struct preempt_notifier *notifier;
+	struct hlist_node *node;
+
+	hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
+		notifier->ops->sched_out(notifier, next);
+}
+
+#else
+
+static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
+{
+}
+
+static void
+fire_sched_out_preempt_notifiers(struct task_struct *curr,
+				 struct task_struct *next)
+{
+}
+
+#endif
+
 /**
  * prepare_task_switch - prepare to switch tasks
  * @rq: the runqueue preparing to switch
@@ -1685,8 +1746,11 @@ void fastcall wake_up_new_task(struct ta
  * prepare_task_switch sets up locking and calls architecture specific
  * hooks.
  */
-static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
+static inline void
+prepare_task_switch(struct rq *rq, struct task_struct *prev,
+		    struct task_struct *next)
 {
+	fire_sched_out_preempt_notifiers(prev, next);
 	prepare_lock_switch(rq, next);
 	prepare_arch_switch(next);
 }
@@ -1728,6 +1792,7 @@ static inline void finish_task_switch(st
 	prev_state = prev->state;
 	finish_arch_switch(prev);
 	finish_lock_switch(rq, prev);
+	fire_sched_in_preempt_notifiers(current);
 	if (mm)
 		mmdrop(mm);
 	if (unlikely(prev_state == TASK_DEAD)) {
@@ -1768,7 +1833,7 @@ context_switch(struct rq *rq, struct tas
 {
 	struct mm_struct *mm, *oldmm;
 
-	prepare_task_switch(rq, next);
+	prepare_task_switch(rq, prev, next);
 	mm = next->mm;
 	oldmm = prev->active_mm;
 	/*
@@ -6335,6 +6400,10 @@ void __init sched_init(void)
 
 	set_load_weight(&init_task);
 
+#ifdef CONFIG_PREEMPT_NOTIFIERS
+	INIT_HLIST_HEAD(&init_task.preempt_notifiers);
+#endif
+
 #ifdef CONFIG_SMP
 	nr_cpu_ids = highest_cpu + 1;
 	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 2/3] KVM: Use the scheduler preemption hooks to make kvm preemptible
  2007-07-25 12:43 [PATCH 0/3] Scheduler preemption hooks, #2 Avi Kivity
  2007-07-25 12:43 ` [PATCH 1/3] SCHED: Generic hooks for trapping task preemption Avi Kivity
@ 2007-07-25 12:43 ` Avi Kivity
  2007-07-25 12:43 ` [PATCH 3/3] KVM: Convert vm lock to a mutex Avi Kivity
  2 siblings, 0 replies; 10+ messages in thread
From: Avi Kivity @ 2007-07-25 12:43 UTC (permalink / raw)
  To: kvm-devel
  Cc: linux-kernel, Ingo Molnar, shaohua.li, rusty, Andi Kleen,
	Avi Kivity

Current kvm disables preemption while the new virtualization registers are
in use.  This of course is not very good for latency sensitive workloads (one
use of virtualization is to offload user interface and other latency
insensitive stuff to a container, so that it is easier to analyze the
remaining workload).  This patch re-enables preemption for kvm; preemption
is now only disabled when switching the registers in and out, and during
the switch to guest mode and back.

Contains fixes from Shaohua Li <shaohua.li@intel.com>.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 drivers/kvm/Kconfig    |    1 +
 drivers/kvm/kvm.h      |    4 +++-
 drivers/kvm/kvm_main.c |   46 +++++++++++++++++++++++++++++++++++++++-------
 drivers/kvm/mmu.c      |    2 --
 drivers/kvm/svm.c      |    6 ++----
 drivers/kvm/vmx.c      |   17 +++++++++--------
 6 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig
index 6cecc39..65e009b 100644
--- a/drivers/kvm/Kconfig
+++ b/drivers/kvm/Kconfig
@@ -11,6 +11,7 @@ if VIRTUALIZATION
 config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on X86 && EXPERIMENTAL
+	select PREEMPT_HOOKS
 	select ANON_INODES
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index fc27c2f..50ddd3c 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -13,6 +13,7 @@
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/preempt.h>
 #include <asm/signal.h>
 
 #include "vmx.h"
@@ -310,6 +311,7 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
 
 struct kvm_vcpu {
 	struct kvm *kvm;
+	struct preempt_hook preempt_hook;
 	int vcpu_id;
 	union {
 		struct vmcs *vmcs;
@@ -456,7 +458,7 @@ struct kvm_arch_ops {
 	int (*vcpu_create)(struct kvm_vcpu *vcpu);
 	void (*vcpu_free)(struct kvm_vcpu *vcpu);
 
-	void (*vcpu_load)(struct kvm_vcpu *vcpu);
+	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
 	void (*vcpu_put)(struct kvm_vcpu *vcpu);
 	void (*vcpu_decache)(struct kvm_vcpu *vcpu);
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index f6c6b0b..4f45247 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -54,6 +54,8 @@ static cpumask_t cpus_hardware_enabled;
 
 struct kvm_arch_ops *kvm_arch_ops;
 
+static __read_mostly struct preempt_ops kvm_preempt_ops;
+
 #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
 
 static struct kvm_stats_debugfs_item {
@@ -239,13 +241,21 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
  */
 static void vcpu_load(struct kvm_vcpu *vcpu)
 {
+	int cpu;
+
 	mutex_lock(&vcpu->mutex);
-	kvm_arch_ops->vcpu_load(vcpu);
+	cpu = get_cpu();
+	preempt_hook_register(&vcpu->preempt_hook);
+	kvm_arch_ops->vcpu_load(vcpu, cpu);
+	put_cpu();
 }
 
 static void vcpu_put(struct kvm_vcpu *vcpu)
 {
+	preempt_disable();
 	kvm_arch_ops->vcpu_put(vcpu);
+	preempt_hook_unregister(&vcpu->preempt_hook);
+	preempt_enable();
 	mutex_unlock(&vcpu->mutex);
 }
 
@@ -1638,9 +1648,7 @@ void kvm_resched(struct kvm_vcpu *vcpu)
 {
 	if (!need_resched())
 		return;
-	vcpu_put(vcpu);
 	cond_resched();
-	vcpu_load(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_resched);
 
@@ -1706,11 +1714,9 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
 	unsigned bytes;
 	int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
 
-	kvm_arch_ops->vcpu_put(vcpu);
 	q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
 		 PAGE_KERNEL);
 	if (!q) {
-		kvm_arch_ops->vcpu_load(vcpu);
 		free_pio_guest_pages(vcpu);
 		return -ENOMEM;
 	}
@@ -1722,7 +1728,6 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
 		memcpy(p, q, bytes);
 	q -= vcpu->pio.guest_page_offset;
 	vunmap(q);
-	kvm_arch_ops->vcpu_load(vcpu);
 	free_pio_guest_pages(vcpu);
 	return 0;
 }
@@ -2390,6 +2395,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 	int r;
 	struct kvm_vcpu *vcpu;
 	struct page *page;
+	int cpu;
 
 	r = -EINVAL;
 	if (!valid_vcpu(n))
@@ -2430,7 +2436,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 	if (r < 0)
 		goto out_free_vcpus;
 
-	kvm_arch_ops->vcpu_load(vcpu);
+	vcpu->preempt_hook.ops = &kvm_preempt_ops;
+	cpu = get_cpu();
+	preempt_hook_register(&vcpu->preempt_hook);
+	kvm_arch_ops->vcpu_load(vcpu, cpu);
+	put_cpu();
 	r = kvm_mmu_setup(vcpu);
 	if (r >= 0)
 		r = kvm_arch_ops->vcpu_setup(vcpu);
@@ -3147,6 +3157,25 @@ static struct sys_device kvm_sysdev = {
 
 hpa_t bad_page_address;
 
+static inline struct kvm_vcpu *preempt_hook_to_vcpu(struct preempt_hook *hook)
+{
+	return container_of(hook, struct kvm_vcpu, preempt_hook);
+}
+
+static void kvm_sched_in(struct preempt_hook *hook, int cpu)
+{
+	struct kvm_vcpu *vcpu = preempt_hook_to_vcpu(hook);
+
+	kvm_arch_ops->vcpu_load(vcpu, cpu);
+}
+
+static void kvm_sched_out(struct preempt_hook *hook)
+{
+	struct kvm_vcpu *vcpu = preempt_hook_to_vcpu(hook);
+
+	kvm_arch_ops->vcpu_put(vcpu);
+}
+
 int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
 {
 	int r;
@@ -3193,6 +3222,9 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
 		goto out_free;
 	}
 
+	kvm_preempt_ops.sched_in = kvm_sched_in;
+	kvm_preempt_ops.sched_out = kvm_sched_out;
+
 	return r;
 
 out_free:
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 3c33957..fdb967a 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -275,9 +275,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
 	r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT);
 	if (r < 0) {
 		spin_unlock(&vcpu->kvm->lock);
-		kvm_arch_ops->vcpu_put(vcpu);
 		r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
-		kvm_arch_ops->vcpu_load(vcpu);
 		spin_lock(&vcpu->kvm->lock);
 	}
 	return r;
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 850a1b1..e3a4722 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -610,11 +610,10 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 	kfree(vcpu->svm);
 }
 
-static void svm_vcpu_load(struct kvm_vcpu *vcpu)
+static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	int cpu, i;
+	int i;
 
-	cpu = get_cpu();
 	if (unlikely(cpu != vcpu->cpu)) {
 		u64 tsc_this, delta;
 
@@ -640,7 +639,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 		wrmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]);
 
 	rdtscll(vcpu->host_tsc);
-	put_cpu();
 }
 
 static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index dac2f93..90f28f0 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -345,6 +345,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 static void vmx_load_host_state(struct kvm_vcpu *vcpu)
 {
 	struct vmx_host_state *hs = &vcpu->vmx_host_state;
+	unsigned long flags;
 
 	if (!hs->loaded)
 		return;
@@ -357,12 +358,12 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu)
 		 * If we have to reload gs, we must take care to
 		 * preserve our gs base.
 		 */
-		local_irq_disable();
+		local_irq_save(flags);
 		load_gs(hs->gs_sel);
 #ifdef CONFIG_X86_64
 		wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
 #endif
-		local_irq_enable();
+		local_irq_restore(flags);
 
 		reload_tss();
 	}
@@ -376,14 +377,11 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu)
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
  */
-static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
+static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	u64 phys_addr = __pa(vcpu->vmcs);
-	int cpu;
 	u64 tsc_this, delta;
 
-	cpu = get_cpu();
-
 	if (vcpu->cpu != cpu)
 		vcpu_clear(vcpu);
 
@@ -428,7 +426,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	vmx_load_host_state(vcpu);
 	kvm_put_guest_fpu(vcpu);
-	put_cpu();
 }
 
 static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
@@ -2011,6 +2008,8 @@ again:
 	if (unlikely(r))
 		goto out;
 
+	preempt_disable();
+
 	if (!vcpu->mmio_read_completed)
 		do_interrupt_requests(vcpu, kvm_run);
 
@@ -2153,6 +2152,9 @@ again:
 	vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
 
 	asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
+	vcpu->launched = 1;
+
+	preempt_enable();
 
 	if (unlikely(fail)) {
 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -2167,7 +2169,6 @@ again:
 	if (unlikely(prof_on == KVM_PROFILING))
 		profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP));
 
-	vcpu->launched = 1;
 	r = kvm_handle_exit(kvm_run, vcpu);
 	if (r > 0) {
 		/* Give scheduler a change to reschedule. */
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/3] KVM: Convert vm lock to a mutex
  2007-07-25 12:43 [PATCH 0/3] Scheduler preemption hooks, #2 Avi Kivity
  2007-07-25 12:43 ` [PATCH 1/3] SCHED: Generic hooks for trapping task preemption Avi Kivity
  2007-07-25 12:43 ` [PATCH 2/3] KVM: Use the scheduler preemption hooks to make kvm preemptible Avi Kivity
@ 2007-07-25 12:43 ` Avi Kivity
  2 siblings, 0 replies; 10+ messages in thread
From: Avi Kivity @ 2007-07-25 12:43 UTC (permalink / raw)
  To: kvm-devel
  Cc: linux-kernel, Ingo Molnar, shaohua.li, rusty, Andi Kleen,
	Avi Kivity

From: Shaohua Li <shaohua.li@intel.com>

This allows the kvm mmu to perform sleepy operations, such as memory
allocation.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 drivers/kvm/kvm.h      |    2 +-
 drivers/kvm/kvm_main.c |   54 ++++++++++++++++++++++++------------------------
 drivers/kvm/mmu.c      |    8 +++---
 drivers/kvm/svm.c      |    8 +++---
 drivers/kvm/vmx.c      |    8 +++---
 5 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 50ddd3c..ca167c4 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -420,7 +420,7 @@ struct kvm_memory_slot {
 };
 
 struct kvm {
-	spinlock_t lock; /* protects everything except vcpus */
+	struct mutex lock; /* protects everything except vcpus */
 	int naliases;
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
 	int nmemslots;
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 4f45247..6e76345 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -310,7 +310,7 @@ static struct kvm *kvm_create_vm(void)
 		return ERR_PTR(-ENOMEM);
 
 	kvm_io_bus_init(&kvm->pio_bus);
-	spin_lock_init(&kvm->lock);
+	mutex_init(&kvm->lock);
 	INIT_LIST_HEAD(&kvm->active_mmu_pages);
 	kvm_io_bus_init(&kvm->mmio_bus);
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
@@ -457,7 +457,7 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 	int ret;
 	struct page *page;
 
-	spin_lock(&vcpu->kvm->lock);
+	mutex_lock(&vcpu->kvm->lock);
 	page = gfn_to_page(vcpu->kvm, pdpt_gfn);
 	/* FIXME: !page - emulate? 0xff? */
 	pdpt = kmap_atomic(page, KM_USER0);
@@ -476,7 +476,7 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 
 out:
 	kunmap_atomic(pdpt, KM_USER0);
-	spin_unlock(&vcpu->kvm->lock);
+	mutex_unlock(&vcpu->kvm->lock);
 
 	return ret;
 }
@@ -536,9 +536,9 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	kvm_arch_ops->set_cr0(vcpu, cr0);
 	vcpu->cr0 = cr0;
 
-	spin_lock(&vcpu->kvm->lock);
+	mutex_lock(&vcpu->kvm->lock);
 	kvm_mmu_reset_context(vcpu);
-	spin_unlock(&vcpu->kvm->lock);
+	mutex_unlock(&vcpu->kvm->lock);
 	return;
 }
 EXPORT_SYMBOL_GPL(set_cr0);
@@ -577,9 +577,9 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 		return;
 	}
 	kvm_arch_ops->set_cr4(vcpu, cr4);
-	spin_lock(&vcpu->kvm->lock);
+	mutex_lock(&vcpu->kvm->lock);
 	kvm_mmu_reset_context(vcpu);
-	spin_unlock(&vcpu->kvm->lock);
+	mutex_unlock(&vcpu->kvm->lock);
 }
 EXPORT_SYMBOL_GPL(set_cr4);
 
@@ -616,7 +616,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 	}
 
 	vcpu->cr3 = cr3;
-	spin_lock(&vcpu->kvm->lock);
+	mutex_lock(&vcpu->kvm->lock);
 	/*
 	 * Does the new cr3 value map to physical memory? (Note, we
 	 * catch an invalid cr3 even in real-mode, because it would
@@ -630,7 +630,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		inject_gp(vcpu);
 	else
 		vcpu->mmu.new_cr3(vcpu);
-	spin_unlock(&vcpu->kvm->lock);
+	mutex_unlock(&vcpu->kvm->lock);
 }
 EXPORT_SYMBOL_GPL(set_cr3);
 
@@ -707,7 +707,7 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 		mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
 
 raced:
-	spin_lock(&kvm->lock);
+	mutex_lock(&kvm->lock);
 
 	memory_config_version = kvm->memory_config_version;
 	new = old = *memslot;
@@ -736,7 +736,7 @@ raced:
 	 * Do memory allocations outside lock.  memory_config_version will
 	 * detect any races.
 	 */
-	spin_unlock(&kvm->lock);
+	mutex_unlock(&kvm->lock);
 
 	/* Deallocate if slot is being removed */
 	if (!npages)
@@ -775,10 +775,10 @@ raced:
 		memset(new.dirty_bitmap, 0, dirty_bytes);
 	}
 
-	spin_lock(&kvm->lock);
+	mutex_lock(&kvm->lock);
 
 	if (memory_config_version != kvm->memory_config_version) {
-		spin_unlock(&kvm->lock);
+		mutex_unlock(&kvm->lock);
 		kvm_free_physmem_slot(&new, &old);
 		goto raced;
 	}
@@ -796,13 +796,13 @@ raced:
 	kvm_mmu_slot_remove_write_access(kvm, mem->slot);
 	kvm_flush_remote_tlbs(kvm);
 
-	spin_unlock(&kvm->lock);
+	mutex_unlock(&kvm->lock);
 
 	kvm_free_physmem_slot(&old, &new);
 	return 0;
 
 out_unlock:
-	spin_unlock(&kvm->lock);
+	mutex_unlock(&kvm->lock);
 out_free:
 	kvm_free_physmem_slot(&new, &old);
 out:
@@ -820,14 +820,14 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	int n;
 	unsigned long any = 0;
 
-	spin_lock(&kvm->lock);
+	mutex_lock(&kvm->lock);
 
 	/*
 	 * Prevent changes to guest memory configuration even while the lock
 	 * is not taken.
 	 */
 	++kvm->busy;
-	spin_unlock(&kvm->lock);
+	mutex_unlock(&kvm->lock);
 	r = -EINVAL;
 	if (log->slot >= KVM_MEMORY_SLOTS)
 		goto out;
@@ -846,18 +846,18 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
 		goto out;
 
-	spin_lock(&kvm->lock);
+	mutex_lock(&kvm->lock);
 	kvm_mmu_slot_remove_write_access(kvm, log->slot);
 	kvm_flush_remote_tlbs(kvm);
 	memset(memslot->dirty_bitmap, 0, n);
-	spin_unlock(&kvm->lock);
+	mutex_unlock(&kvm->lock);
 
 	r = 0;
 
 out:
-	spin_lock(&kvm->lock);
+	mutex_lock(&kvm->lock);
 	--kvm->busy;
-	spin_unlock(&kvm->lock);
+	mutex_unlock(&kvm->lock);
 	return r;
 }
 
@@ -887,7 +887,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 	    < alias->target_phys_addr)
 		goto out;
 
-	spin_lock(&kvm->lock);
+	mutex_lock(&kvm->lock);
 
 	p = &kvm->aliases[alias->slot];
 	p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -901,7 +901,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 
 	kvm_mmu_zap_all(kvm);
 
-	spin_unlock(&kvm->lock);
+	mutex_unlock(&kvm->lock);
 
 	return 0;
 
@@ -1884,12 +1884,12 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	vcpu->pio.cur_count = now;
 
 	for (i = 0; i < nr_pages; ++i) {
-		spin_lock(&vcpu->kvm->lock);
+		mutex_lock(&vcpu->kvm->lock);
 		page = gva_to_page(vcpu, address + i * PAGE_SIZE);
 		if (page)
 			get_page(page);
 		vcpu->pio.guest_pages[i] = page;
-		spin_unlock(&vcpu->kvm->lock);
+		mutex_unlock(&vcpu->kvm->lock);
 		if (!page) {
 			inject_gp(vcpu);
 			free_pio_guest_pages(vcpu);
@@ -2282,13 +2282,13 @@ static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 	gpa_t gpa;
 
 	vcpu_load(vcpu);
-	spin_lock(&vcpu->kvm->lock);
+	mutex_lock(&vcpu->kvm->lock);
 	gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
 	tr->physical_address = gpa;
 	tr->valid = gpa != UNMAPPED_GVA;
 	tr->writeable = 1;
 	tr->usermode = 0;
-	spin_unlock(&vcpu->kvm->lock);
+	mutex_unlock(&vcpu->kvm->lock);
 	vcpu_put(vcpu);
 
 	return 0;
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index fdb967a..bfe16d5 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -274,9 +274,9 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
 
 	r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT);
 	if (r < 0) {
-		spin_unlock(&vcpu->kvm->lock);
+		mutex_unlock(&vcpu->kvm->lock);
 		r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
-		spin_lock(&vcpu->kvm->lock);
+		mutex_lock(&vcpu->kvm->lock);
 	}
 	return r;
 }
@@ -1067,7 +1067,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
 {
 	int r;
 
-	spin_lock(&vcpu->kvm->lock);
+	mutex_lock(&vcpu->kvm->lock);
 	r = mmu_topup_memory_caches(vcpu);
 	if (r)
 		goto out;
@@ -1075,7 +1075,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
 	kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
 	kvm_mmu_flush_tlb(vcpu);
 out:
-	spin_unlock(&vcpu->kvm->lock);
+	mutex_unlock(&vcpu->kvm->lock);
 	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_load);
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index e3a4722..ac7d8b0 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -905,21 +905,21 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (is_external_interrupt(exit_int_info))
 		push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
 
-	spin_lock(&vcpu->kvm->lock);
+	mutex_lock(&vcpu->kvm->lock);
 
 	fault_address  = vcpu->svm->vmcb->control.exit_info_2;
 	error_code = vcpu->svm->vmcb->control.exit_info_1;
 	r = kvm_mmu_page_fault(vcpu, fault_address, error_code);
 	if (r < 0) {
-		spin_unlock(&vcpu->kvm->lock);
+		mutex_unlock(&vcpu->kvm->lock);
 		return r;
 	}
 	if (!r) {
-		spin_unlock(&vcpu->kvm->lock);
+		mutex_unlock(&vcpu->kvm->lock);
 		return 1;
 	}
 	er = emulate_instruction(vcpu, kvm_run, fault_address, error_code);
-	spin_unlock(&vcpu->kvm->lock);
+	mutex_unlock(&vcpu->kvm->lock);
 
 	switch (er) {
 	case EMULATE_DONE:
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 90f28f0..ece992f 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1596,19 +1596,19 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (is_page_fault(intr_info)) {
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
 
-		spin_lock(&vcpu->kvm->lock);
+		mutex_lock(&vcpu->kvm->lock);
 		r = kvm_mmu_page_fault(vcpu, cr2, error_code);
 		if (r < 0) {
-			spin_unlock(&vcpu->kvm->lock);
+			mutex_unlock(&vcpu->kvm->lock);
 			return r;
 		}
 		if (!r) {
-			spin_unlock(&vcpu->kvm->lock);
+			mutex_unlock(&vcpu->kvm->lock);
 			return 1;
 		}
 
 		er = emulate_instruction(vcpu, kvm_run, cr2, error_code);
-		spin_unlock(&vcpu->kvm->lock);
+		mutex_unlock(&vcpu->kvm->lock);
 
 		switch (er) {
 		case EMULATE_DONE:
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2007-07-25 13:14 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-07-25 12:43 [PATCH 0/3] Scheduler preemption hooks, #2 Avi Kivity
2007-07-25 12:43 ` [PATCH 1/3] SCHED: Generic hooks for trapping task preemption Avi Kivity
2007-07-25 12:49   ` Avi Kivity
2007-07-25 12:49   ` Ingo Molnar
2007-07-25 12:51     ` Avi Kivity
2007-07-25 12:52     ` Ingo Molnar
2007-07-25 13:13       ` Avi Kivity
2007-07-25 13:03     ` Ingo Molnar
2007-07-25 12:43 ` [PATCH 2/3] KVM: Use the scheduler preemption hooks to make kvm preemptible Avi Kivity
2007-07-25 12:43 ` [PATCH 3/3] KVM: Convert vm lock to a mutex Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox