From: Andrea Arcangeli <andrea-l3A5Bk7waGM@public.gmane.org>
To: Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
Subject: Re: external module sched_in event
Date: Sun, 23 Dec 2007 17:49:33 +0100 [thread overview]
Message-ID: <20071223164932.GA8483@v2.random> (raw)
In-Reply-To: <476D61E8.5000102-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
On Sat, Dec 22, 2007 at 09:13:44PM +0200, Avi Kivity wrote:
> Unfortunately, this fails badly on Intel i386:
Hmm ok there's a definitive bug that I forgot a int1 kernel->kernel
switch on x86 has no special debug stack like on x86-64. This will
have a better chance to work, hope I got all offsets right by
memory.... At least the offset "32" in the leal and eax + fastcall
should all be right or I doubt it could survive the double
dereferencing. Likely the one-more-derefence didn't oops there because
you likely have >=1g of ram and there was a 25% chance of crashing due
the lack of sched-in and 75% chance of crashing in the
one-more-dereference in a more meaningful way.
Signed-off-by: Andrea Arcangeli <andrea-l3A5Bk7waGM@public.gmane.org>
diff --git a/kernel/hack-module.awk b/kernel/hack-module.awk
index 7993aa2..5187c96 100644
--- a/kernel/hack-module.awk
+++ b/kernel/hack-module.awk
@@ -24,32 +24,6 @@
printf("MODULE_INFO(version, \"%s\");\n", version)
}
-/^static unsigned long vmcs_readl/ {
- in_vmcs_read = 1
-}
-
-/ASM_VMX_VMREAD_RDX_RAX/ && in_vmcs_read {
- printf("\tstart_special_insn();\n")
-}
-
-/return/ && in_vmcs_read {
- printf("\tend_special_insn();\n");
- in_vmcs_read = 0
-}
-
-/^static void vmcs_writel/ {
- in_vmcs_write = 1
-}
-
-/ASM_VMX_VMWRITE_RAX_RDX/ && in_vmcs_write {
- printf("\tstart_special_insn();\n")
-}
-
-/if/ && in_vmcs_write {
- printf("\tend_special_insn();\n");
- in_vmcs_write = 0
-}
-
/^static void vmx_load_host_state/ {
vmx_load_host_state = 1
}
@@ -74,15 +48,6 @@
print "\tspecial_reload_dr7();"
}
-/static void vcpu_put|static int __vcpu_run|static struct kvm_vcpu \*vmx_create_vcpu/ {
- in_tricky_func = 1
-}
-
-/preempt_disable|get_cpu/ && in_tricky_func {
- printf("\tin_special_section();\n");
- in_tricky_func = 0
-}
-
/unsigned long flags;/ && vmx_load_host_state {
print "\tunsigned long gsbase;"
}
@@ -90,4 +55,3 @@
/local_irq_save/ && vmx_load_host_state {
print "\t\tgsbase = vmcs_readl(HOST_GS_BASE);"
}
-
diff --git a/kernel/preempt.c b/kernel/preempt.c
index 8bb0405..fd6f8dc 100644
--- a/kernel/preempt.c
+++ b/kernel/preempt.c
@@ -6,8 +6,6 @@
static DEFINE_SPINLOCK(pn_lock);
static LIST_HEAD(pn_list);
-static DEFINE_PER_CPU(int, notifier_enabled);
-static DEFINE_PER_CPU(struct task_struct *, last_tsk);
#define dprintk(fmt) do { \
if (0) \
@@ -15,59 +13,95 @@ static DEFINE_PER_CPU(struct task_struct *, last_tsk);
current->pid, raw_smp_processor_id()); \
} while (0)
-static void preempt_enable_notifiers(void)
+static void preempt_enable_sched_out_notifiers(void)
{
- int cpu = raw_smp_processor_id();
-
- if (per_cpu(notifier_enabled, cpu))
- return;
-
- dprintk("\n");
- per_cpu(notifier_enabled, cpu) = 1;
asm volatile ("mov %0, %%db0" : : "r"(schedule));
- asm volatile ("mov %0, %%db7" : : "r"(0x702ul));
+ asm volatile ("mov %0, %%db7" : : "r"(0x701ul));
+#ifdef CONFIG_X86_64
+ current->thread.debugreg7 = 0ul;
+#else
+ current->thread.debugreg[7] = 0ul;
+#endif
+#ifdef TIF_DEBUG
+ clear_tsk_thread_flag(current, TIF_DEBUG);
+#endif
+}
+
+static void preempt_enable_sched_in_notifiers(void * addr)
+{
+ asm volatile ("mov %0, %%db0" : : "r"(addr));
+ asm volatile ("mov %0, %%db7" : : "r"(0x701ul));
+#ifdef CONFIG_X86_64
+ current->thread.debugreg0 = (unsigned long) addr;
+ current->thread.debugreg7 = 0x701ul;
+#else
+ current->thread.debugreg[0] = (unsigned long) addr;
+ current->thread.debugreg[7] = 0x701ul;
+#endif
+#ifdef TIF_DEBUG
+ set_tsk_thread_flag(current, TIF_DEBUG);
+#endif
}
void special_reload_dr7(void)
{
- asm volatile ("mov %0, %%db7" : : "r"(0x702ul));
+ asm volatile ("mov %0, %%db7" : : "r"(0x701ul));
}
EXPORT_SYMBOL_GPL(special_reload_dr7);
-static void preempt_disable_notifiers(void)
+static void __preempt_disable_notifiers(void)
{
- int cpu = raw_smp_processor_id();
-
- if (!per_cpu(notifier_enabled, cpu))
- return;
+ asm volatile ("mov %0, %%db7" : : "r"(0ul));
+}
- dprintk("\n");
- per_cpu(notifier_enabled, cpu) = 0;
- asm volatile ("mov %0, %%db7" : : "r"(0x400ul));
+static void preempt_disable_notifiers(void)
+{
+ __preempt_disable_notifiers();
+#ifdef CONFIG_X86_64
+ current->thread.debugreg7 = 0ul;
+#else
+ current->thread.debugreg[7] = 0ul;
+#endif
+#ifdef TIF_DEBUG
+ clear_tsk_thread_flag(current, TIF_DEBUG);
+#endif
}
-static void __attribute__((used)) preempt_notifier_trigger(void)
+static void fastcall __attribute__((used)) preempt_notifier_trigger(void *** ip)
{
struct preempt_notifier *pn;
int cpu = raw_smp_processor_id();
int found = 0;
- unsigned long flags;
dprintk(" - in\n");
//dump_stack();
- spin_lock_irqsave(&pn_lock, flags);
+ spin_lock(&pn_lock);
list_for_each_entry(pn, &pn_list, link)
if (pn->tsk == current) {
found = 1;
break;
}
- spin_unlock_irqrestore(&pn_lock, flags);
- preempt_disable_notifiers();
+ spin_unlock(&pn_lock);
+
if (found) {
- dprintk("sched_out\n");
- pn->ops->sched_out(pn, NULL);
- per_cpu(last_tsk, cpu) = NULL;
- }
+ if ((void *) *ip != schedule) {
+ dprintk("sched_in\n");
+ preempt_enable_sched_out_notifiers();
+ pn->ops->sched_in(pn, cpu);
+ } else {
+ void * sched_in_addr;
+ dprintk("sched_out\n");
+#ifdef CONFIG_X86_64
+ sched_in_addr = **(ip+3);
+#else
+ /* no special debug stack switch on x86 */
+ sched_in_addr = (void *) *(ip+3);
+#endif
+ preempt_enable_sched_in_notifiers(sched_in_addr);
+ pn->ops->sched_out(pn, NULL);
+ }
+ } else
+ __preempt_disable_notifiers();
dprintk(" - out\n");
}
@@ -104,6 +138,11 @@ asm ("pn_int1_handler: \n\t"
"pop " TMP " \n\t"
"jz .Lnotme \n\t"
SAVE_REGS "\n\t"
+#ifdef CONFIG_X86_64
+ "leaq 120(%rsp),%rdi\n\t"
+#else
+ "leal 32(%esp),%eax\n\t"
+#endif
"call preempt_notifier_trigger \n\t"
RESTORE_REGS "\n\t"
#ifdef CONFIG_X86_64
@@ -121,75 +160,28 @@ asm ("pn_int1_handler: \n\t"
#endif
);
-void in_special_section(void)
-{
- struct preempt_notifier *pn;
- int cpu = raw_smp_processor_id();
- int found = 0;
- unsigned long flags;
-
- if (per_cpu(last_tsk, cpu) == current)
- return;
-
- dprintk(" - in\n");
- spin_lock_irqsave(&pn_lock, flags);
- list_for_each_entry(pn, &pn_list, link)
- if (pn->tsk == current) {
- found = 1;
- break;
- }
- spin_unlock_irqrestore(&pn_lock, flags);
- if (found) {
- dprintk("\n");
- per_cpu(last_tsk, cpu) = current;
- pn->ops->sched_in(pn, cpu);
- preempt_enable_notifiers();
- }
- dprintk(" - out\n");
-}
-EXPORT_SYMBOL_GPL(in_special_section);
-
-void start_special_insn(void)
-{
- preempt_disable();
- in_special_section();
-}
-EXPORT_SYMBOL_GPL(start_special_insn);
-
-void end_special_insn(void)
-{
- preempt_enable();
-}
-EXPORT_SYMBOL_GPL(end_special_insn);
-
void preempt_notifier_register(struct preempt_notifier *notifier)
{
- int cpu = get_cpu();
unsigned long flags;
dprintk(" - in\n");
spin_lock_irqsave(&pn_lock, flags);
- preempt_enable_notifiers();
+ preempt_enable_sched_out_notifiers();
notifier->tsk = current;
list_add(¬ifier->link, &pn_list);
spin_unlock_irqrestore(&pn_lock, flags);
- per_cpu(last_tsk, cpu) = current;
- put_cpu();
dprintk(" - out\n");
}
void preempt_notifier_unregister(struct preempt_notifier *notifier)
{
- int cpu = get_cpu();
unsigned long flags;
dprintk(" - in\n");
spin_lock_irqsave(&pn_lock, flags);
list_del(¬ifier->link);
spin_unlock_irqrestore(&pn_lock, flags);
- per_cpu(last_tsk, cpu) = NULL;
preempt_disable_notifiers();
- put_cpu();
dprintk(" - out\n");
}
@@ -238,7 +230,16 @@ void preempt_notifier_sys_init(void)
static void do_disable(void *blah)
{
- preempt_disable_notifiers();
+#ifdef TIF_DEBUG
+ if (!test_tsk_thread_flag(current, TIF_DEBUG))
+#else
+#ifdef CONFIG_X86_64
+ if (!current->thread.debugreg7)
+#else
+ if (!current->thread.debugreg[7])
+#endif
+#endif
+ __preempt_disable_notifiers();
}
void preempt_notifier_sys_exit(void)
>
> > kvm: emulating preempt notifiers; do not benchmark on this machine
> > loaded kvm module (kvm-56-127-g433be51)
> > vmwrite error: reg c08 value d8 (err 3080)
> > [<f8baf9e2>] vmx_save_host_state+0x4f/0x162 [kvm_intel]
> > [<c0425803>] __cond_resched+0x25/0x3c
> > [<f91a22a4>] kvm_arch_vcpu_ioctl_run+0x16f/0x3a7 [kvm]
> > [<f919f244>] kvm_vcpu_ioctl+0xcb/0x28f [kvm]
> > [<c0421987>] enqueue_entity+0x2c0/0x2ea
> > [<c05a8340>] skb_dequeue+0x39/0x3f
> > [<c0604b6d>] unix_stream_recvmsg+0x3a2/0x4c3
> > [<c0425c82>] scheduler_tick+0x1a1/0x274
> > [<c0487329>] core_sys_select+0x21f/0x2fa
> > [<c043e9e6>] clockevents_program_event+0xb5/0xbc
> > [<c04c6853>] avc_has_perm+0x4e/0x58
> > [<c04c7174>] inode_has_perm+0x66/0x6e
> > [<c0430bed>] recalc_sigpending+0xb/0x1d
> > [<c043231d>] dequeue_signal+0xa9/0x12a
> > [<c043cb95>] getnstimeofday+0x30/0xbf
> > [<c04c7205>] file_has_perm+0x89/0x91
> > [<f919f179>] kvm_vcpu_ioctl+0x0/0x28f [kvm]
> > [<c04861b9>] do_ioctl+0x21/0xa0
> > [<c048646f>] vfs_ioctl+0x237/0x249
> > [<c04864cd>] sys_ioctl+0x4c/0x67
> > [<c0404f26>] sysenter_past_esp+0x5f/0x85
> > =======================
>
> vmwrite error means the vmcs pointer was not loaded, probably because
> the sched_in event did not fire after a vcpu migration.
>
> --
> Do not meddle in the internals of kernels, for they are subtle and quick to panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
next prev parent reply other threads:[~2007-12-23 16:49 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-12-20 16:23 external module sched_in event Andrea Arcangeli
[not found] ` <20071220162353.GA3802-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2007-12-22 19:13 ` Avi Kivity
[not found] ` <476D61E8.5000102-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-12-23 16:49 ` Andrea Arcangeli [this message]
[not found] ` <20071223164932.GA8483-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2007-12-23 17:37 ` Avi Kivity
[not found] ` <476E9CE4.2060705-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-12-24 16:26 ` Andrea Arcangeli
[not found] ` <20071224162639.GH8483-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2007-12-25 9:00 ` Avi Kivity
-- strict thread matches above, loose matches on Subject: below --
2007-12-21 17:40 Andrea Arcangeli
[not found] ` <20071221174048.GB1292-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2007-12-21 17:52 ` Izik Eidus
[not found] ` <476BFD74.2040509-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-12-21 18:22 ` Andrea Arcangeli
2007-12-22 20:24 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20071223164932.GA8483@v2.random \
--to=andrea-l3a5bk7wagm@public.gmane.org \
--cc=avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org \
--cc=kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.