From: Andrea Arcangeli <andrea-l3A5Bk7waGM@public.gmane.org>
To: Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
Subject: Re: external module sched_in event
Date: Sun, 23 Dec 2007 17:49:33 +0100 [thread overview]
Message-ID: <20071223164932.GA8483@v2.random> (raw)
In-Reply-To: <476D61E8.5000102-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
On Sat, Dec 22, 2007 at 09:13:44PM +0200, Avi Kivity wrote:
> Unfortunately, this fails badly on Intel i386:
Hmm ok there's a definitive bug that I forgot a int1 kernel->kernel
switch on x86 has no special debug stack like on x86-64. This will
have a better chance to work, hope I got all offsets right by
memory.... At least the offset "32" in the leal and eax + fastcall
should all be right or I doubt it could survive the double
dereferencing. Likely the one-more-derefence didn't oops there because
you likely have >=1g of ram and there was a 25% chance of crashing due
the lack of sched-in and 75% chance of crashing in the
one-more-dereference in a more meaningful way.
Signed-off-by: Andrea Arcangeli <andrea-l3A5Bk7waGM@public.gmane.org>
diff --git a/kernel/hack-module.awk b/kernel/hack-module.awk
index 7993aa2..5187c96 100644
--- a/kernel/hack-module.awk
+++ b/kernel/hack-module.awk
@@ -24,32 +24,6 @@
printf("MODULE_INFO(version, \"%s\");\n", version)
}
-/^static unsigned long vmcs_readl/ {
- in_vmcs_read = 1
-}
-
-/ASM_VMX_VMREAD_RDX_RAX/ && in_vmcs_read {
- printf("\tstart_special_insn();\n")
-}
-
-/return/ && in_vmcs_read {
- printf("\tend_special_insn();\n");
- in_vmcs_read = 0
-}
-
-/^static void vmcs_writel/ {
- in_vmcs_write = 1
-}
-
-/ASM_VMX_VMWRITE_RAX_RDX/ && in_vmcs_write {
- printf("\tstart_special_insn();\n")
-}
-
-/if/ && in_vmcs_write {
- printf("\tend_special_insn();\n");
- in_vmcs_write = 0
-}
-
/^static void vmx_load_host_state/ {
vmx_load_host_state = 1
}
@@ -74,15 +48,6 @@
print "\tspecial_reload_dr7();"
}
-/static void vcpu_put|static int __vcpu_run|static struct kvm_vcpu \*vmx_create_vcpu/ {
- in_tricky_func = 1
-}
-
-/preempt_disable|get_cpu/ && in_tricky_func {
- printf("\tin_special_section();\n");
- in_tricky_func = 0
-}
-
/unsigned long flags;/ && vmx_load_host_state {
print "\tunsigned long gsbase;"
}
@@ -90,4 +55,3 @@
/local_irq_save/ && vmx_load_host_state {
print "\t\tgsbase = vmcs_readl(HOST_GS_BASE);"
}
-
diff --git a/kernel/preempt.c b/kernel/preempt.c
index 8bb0405..fd6f8dc 100644
--- a/kernel/preempt.c
+++ b/kernel/preempt.c
@@ -6,8 +6,6 @@
static DEFINE_SPINLOCK(pn_lock);
static LIST_HEAD(pn_list);
-static DEFINE_PER_CPU(int, notifier_enabled);
-static DEFINE_PER_CPU(struct task_struct *, last_tsk);
#define dprintk(fmt) do { \
if (0) \
@@ -15,59 +13,95 @@ static DEFINE_PER_CPU(struct task_struct *, last_tsk);
current->pid, raw_smp_processor_id()); \
} while (0)
-static void preempt_enable_notifiers(void)
+static void preempt_enable_sched_out_notifiers(void)
{
- int cpu = raw_smp_processor_id();
-
- if (per_cpu(notifier_enabled, cpu))
- return;
-
- dprintk("\n");
- per_cpu(notifier_enabled, cpu) = 1;
asm volatile ("mov %0, %%db0" : : "r"(schedule));
- asm volatile ("mov %0, %%db7" : : "r"(0x702ul));
+ asm volatile ("mov %0, %%db7" : : "r"(0x701ul));
+#ifdef CONFIG_X86_64
+ current->thread.debugreg7 = 0ul;
+#else
+ current->thread.debugreg[7] = 0ul;
+#endif
+#ifdef TIF_DEBUG
+ clear_tsk_thread_flag(current, TIF_DEBUG);
+#endif
+}
+
+static void preempt_enable_sched_in_notifiers(void * addr)
+{
+ asm volatile ("mov %0, %%db0" : : "r"(addr));
+ asm volatile ("mov %0, %%db7" : : "r"(0x701ul));
+#ifdef CONFIG_X86_64
+ current->thread.debugreg0 = (unsigned long) addr;
+ current->thread.debugreg7 = 0x701ul;
+#else
+ current->thread.debugreg[0] = (unsigned long) addr;
+ current->thread.debugreg[7] = 0x701ul;
+#endif
+#ifdef TIF_DEBUG
+ set_tsk_thread_flag(current, TIF_DEBUG);
+#endif
}
void special_reload_dr7(void)
{
- asm volatile ("mov %0, %%db7" : : "r"(0x702ul));
+ asm volatile ("mov %0, %%db7" : : "r"(0x701ul));
}
EXPORT_SYMBOL_GPL(special_reload_dr7);
-static void preempt_disable_notifiers(void)
+static void __preempt_disable_notifiers(void)
{
- int cpu = raw_smp_processor_id();
-
- if (!per_cpu(notifier_enabled, cpu))
- return;
+ asm volatile ("mov %0, %%db7" : : "r"(0ul));
+}
- dprintk("\n");
- per_cpu(notifier_enabled, cpu) = 0;
- asm volatile ("mov %0, %%db7" : : "r"(0x400ul));
+static void preempt_disable_notifiers(void)
+{
+ __preempt_disable_notifiers();
+#ifdef CONFIG_X86_64
+ current->thread.debugreg7 = 0ul;
+#else
+ current->thread.debugreg[7] = 0ul;
+#endif
+#ifdef TIF_DEBUG
+ clear_tsk_thread_flag(current, TIF_DEBUG);
+#endif
}
-static void __attribute__((used)) preempt_notifier_trigger(void)
+static void fastcall __attribute__((used)) preempt_notifier_trigger(void *** ip)
{
struct preempt_notifier *pn;
int cpu = raw_smp_processor_id();
int found = 0;
- unsigned long flags;
dprintk(" - in\n");
//dump_stack();
- spin_lock_irqsave(&pn_lock, flags);
+ spin_lock(&pn_lock);
list_for_each_entry(pn, &pn_list, link)
if (pn->tsk == current) {
found = 1;
break;
}
- spin_unlock_irqrestore(&pn_lock, flags);
- preempt_disable_notifiers();
+ spin_unlock(&pn_lock);
+
if (found) {
- dprintk("sched_out\n");
- pn->ops->sched_out(pn, NULL);
- per_cpu(last_tsk, cpu) = NULL;
- }
+ if ((void *) *ip != schedule) {
+ dprintk("sched_in\n");
+ preempt_enable_sched_out_notifiers();
+ pn->ops->sched_in(pn, cpu);
+ } else {
+ void * sched_in_addr;
+ dprintk("sched_out\n");
+#ifdef CONFIG_X86_64
+ sched_in_addr = **(ip+3);
+#else
+ /* no special debug stack switch on x86 */
+ sched_in_addr = (void *) *(ip+3);
+#endif
+ preempt_enable_sched_in_notifiers(sched_in_addr);
+ pn->ops->sched_out(pn, NULL);
+ }
+ } else
+ __preempt_disable_notifiers();
dprintk(" - out\n");
}
@@ -104,6 +138,11 @@ asm ("pn_int1_handler: \n\t"
"pop " TMP " \n\t"
"jz .Lnotme \n\t"
SAVE_REGS "\n\t"
+#ifdef CONFIG_X86_64
+ "leaq 120(%rsp),%rdi\n\t"
+#else
+ "leal 32(%esp),%eax\n\t"
+#endif
"call preempt_notifier_trigger \n\t"
RESTORE_REGS "\n\t"
#ifdef CONFIG_X86_64
@@ -121,75 +160,28 @@ asm ("pn_int1_handler: \n\t"
#endif
);
-void in_special_section(void)
-{
- struct preempt_notifier *pn;
- int cpu = raw_smp_processor_id();
- int found = 0;
- unsigned long flags;
-
- if (per_cpu(last_tsk, cpu) == current)
- return;
-
- dprintk(" - in\n");
- spin_lock_irqsave(&pn_lock, flags);
- list_for_each_entry(pn, &pn_list, link)
- if (pn->tsk == current) {
- found = 1;
- break;
- }
- spin_unlock_irqrestore(&pn_lock, flags);
- if (found) {
- dprintk("\n");
- per_cpu(last_tsk, cpu) = current;
- pn->ops->sched_in(pn, cpu);
- preempt_enable_notifiers();
- }
- dprintk(" - out\n");
-}
-EXPORT_SYMBOL_GPL(in_special_section);
-
-void start_special_insn(void)
-{
- preempt_disable();
- in_special_section();
-}
-EXPORT_SYMBOL_GPL(start_special_insn);
-
-void end_special_insn(void)
-{
- preempt_enable();
-}
-EXPORT_SYMBOL_GPL(end_special_insn);
-
void preempt_notifier_register(struct preempt_notifier *notifier)
{
- int cpu = get_cpu();
unsigned long flags;
dprintk(" - in\n");
spin_lock_irqsave(&pn_lock, flags);
- preempt_enable_notifiers();
+ preempt_enable_sched_out_notifiers();
notifier->tsk = current;
list_add(¬ifier->link, &pn_list);
spin_unlock_irqrestore(&pn_lock, flags);
- per_cpu(last_tsk, cpu) = current;
- put_cpu();
dprintk(" - out\n");
}
void preempt_notifier_unregister(struct preempt_notifier *notifier)
{
- int cpu = get_cpu();
unsigned long flags;
dprintk(" - in\n");
spin_lock_irqsave(&pn_lock, flags);
list_del(¬ifier->link);
spin_unlock_irqrestore(&pn_lock, flags);
- per_cpu(last_tsk, cpu) = NULL;
preempt_disable_notifiers();
- put_cpu();
dprintk(" - out\n");
}
@@ -238,7 +230,16 @@ void preempt_notifier_sys_init(void)
static void do_disable(void *blah)
{
- preempt_disable_notifiers();
+#ifdef TIF_DEBUG
+ if (!test_tsk_thread_flag(current, TIF_DEBUG))
+#else
+#ifdef CONFIG_X86_64
+ if (!current->thread.debugreg7)
+#else
+ if (!current->thread.debugreg[7])
+#endif
+#endif
+ __preempt_disable_notifiers();
}
void preempt_notifier_sys_exit(void)
>
> > kvm: emulating preempt notifiers; do not benchmark on this machine
> > loaded kvm module (kvm-56-127-g433be51)
> > vmwrite error: reg c08 value d8 (err 3080)
> > [<f8baf9e2>] vmx_save_host_state+0x4f/0x162 [kvm_intel]
> > [<c0425803>] __cond_resched+0x25/0x3c
> > [<f91a22a4>] kvm_arch_vcpu_ioctl_run+0x16f/0x3a7 [kvm]
> > [<f919f244>] kvm_vcpu_ioctl+0xcb/0x28f [kvm]
> > [<c0421987>] enqueue_entity+0x2c0/0x2ea
> > [<c05a8340>] skb_dequeue+0x39/0x3f
> > [<c0604b6d>] unix_stream_recvmsg+0x3a2/0x4c3
> > [<c0425c82>] scheduler_tick+0x1a1/0x274
> > [<c0487329>] core_sys_select+0x21f/0x2fa
> > [<c043e9e6>] clockevents_program_event+0xb5/0xbc
> > [<c04c6853>] avc_has_perm+0x4e/0x58
> > [<c04c7174>] inode_has_perm+0x66/0x6e
> > [<c0430bed>] recalc_sigpending+0xb/0x1d
> > [<c043231d>] dequeue_signal+0xa9/0x12a
> > [<c043cb95>] getnstimeofday+0x30/0xbf
> > [<c04c7205>] file_has_perm+0x89/0x91
> > [<f919f179>] kvm_vcpu_ioctl+0x0/0x28f [kvm]
> > [<c04861b9>] do_ioctl+0x21/0xa0
> > [<c048646f>] vfs_ioctl+0x237/0x249
> > [<c04864cd>] sys_ioctl+0x4c/0x67
> > [<c0404f26>] sysenter_past_esp+0x5f/0x85
> > =======================
>
> vmwrite error means the vmcs pointer was not loaded, probably because
> the sched_in event did not fire after a vcpu migration.
>
> --
> Do not meddle in the internals of kernels, for they are subtle and quick to panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
next prev parent reply other threads:[~2007-12-23 16:49 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-12-20 16:23 external module sched_in event Andrea Arcangeli
[not found] ` <20071220162353.GA3802-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2007-12-22 19:13 ` Avi Kivity
[not found] ` <476D61E8.5000102-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-12-23 16:49 ` Andrea Arcangeli [this message]
[not found] ` <20071223164932.GA8483-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2007-12-23 17:37 ` Avi Kivity
[not found] ` <476E9CE4.2060705-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-12-24 16:26 ` Andrea Arcangeli
[not found] ` <20071224162639.GH8483-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2007-12-25 9:00 ` Avi Kivity
-- strict thread matches above, loose matches on Subject: below --
2007-12-21 17:40 Andrea Arcangeli
[not found] ` <20071221174048.GB1292-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2007-12-21 17:52 ` Izik Eidus
[not found] ` <476BFD74.2040509-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-12-21 18:22 ` Andrea Arcangeli
2007-12-22 20:24 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20071223164932.GA8483@v2.random \
--to=andrea-l3a5bk7wagm@public.gmane.org \
--cc=avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org \
--cc=kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox