public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Andrea Arcangeli <andrea-l3A5Bk7waGM@public.gmane.org>
To: Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
Subject: Re: external module sched_in event
Date: Sun, 23 Dec 2007 17:49:33 +0100	[thread overview]
Message-ID: <20071223164932.GA8483@v2.random> (raw)
In-Reply-To: <476D61E8.5000102-atKUWr5tajBWk0Htik3J/w@public.gmane.org>

On Sat, Dec 22, 2007 at 09:13:44PM +0200, Avi Kivity wrote:
> Unfortunately, this fails badly on Intel i386:

Hmm ok there's a definitive bug that I forgot a int1 kernel->kernel
switch on x86 has no special debug stack like on x86-64. This will
have a better chance to work, hope I got all offsets right by
memory.... At least the offset "32" in the leal and eax + fastcall
should all be right or I doubt it could survive the double
dereferencing. Likely the one-more-derefence didn't oops there because
you likely have >=1g of ram and there was a 25% chance of crashing due
the lack of sched-in and 75% chance of crashing in the
one-more-dereference in a more meaningful way.

Signed-off-by: Andrea Arcangeli <andrea-l3A5Bk7waGM@public.gmane.org>

diff --git a/kernel/hack-module.awk b/kernel/hack-module.awk
index 7993aa2..5187c96 100644
--- a/kernel/hack-module.awk
+++ b/kernel/hack-module.awk
@@ -24,32 +24,6 @@
     printf("MODULE_INFO(version, \"%s\");\n", version)
 }
 
-/^static unsigned long vmcs_readl/ {
-    in_vmcs_read = 1
-}
-
-/ASM_VMX_VMREAD_RDX_RAX/ && in_vmcs_read {
-    printf("\tstart_special_insn();\n")
-}
-
-/return/ && in_vmcs_read {
-    printf("\tend_special_insn();\n");
-    in_vmcs_read = 0
-}
-
-/^static void vmcs_writel/ {
-    in_vmcs_write = 1
-}
-
-/ASM_VMX_VMWRITE_RAX_RDX/ && in_vmcs_write {
-    printf("\tstart_special_insn();\n")
-}
-
-/if/ && in_vmcs_write {
-    printf("\tend_special_insn();\n");
-    in_vmcs_write = 0
-}
-
 /^static void vmx_load_host_state/ {
     vmx_load_host_state = 1
 }
@@ -74,15 +48,6 @@
     print "\tspecial_reload_dr7();"
 }
 
-/static void vcpu_put|static int __vcpu_run|static struct kvm_vcpu \*vmx_create_vcpu/ {
-    in_tricky_func = 1
-}
-
-/preempt_disable|get_cpu/ && in_tricky_func {
-    printf("\tin_special_section();\n");
-    in_tricky_func = 0
-}
-
 /unsigned long flags;/ &&  vmx_load_host_state {
     print "\tunsigned long gsbase;"
 }
@@ -90,4 +55,3 @@
 /local_irq_save/ &&  vmx_load_host_state {
     print "\t\tgsbase = vmcs_readl(HOST_GS_BASE);"
 }
-
diff --git a/kernel/preempt.c b/kernel/preempt.c
index 8bb0405..fd6f8dc 100644
--- a/kernel/preempt.c
+++ b/kernel/preempt.c
@@ -6,8 +6,6 @@
 
 static DEFINE_SPINLOCK(pn_lock);
 static LIST_HEAD(pn_list);
-static DEFINE_PER_CPU(int, notifier_enabled);
-static DEFINE_PER_CPU(struct task_struct *, last_tsk);
 
 #define dprintk(fmt) do {						\
 		if (0)							\
@@ -15,59 +13,95 @@ static DEFINE_PER_CPU(struct task_struct *, last_tsk);
 			       current->pid, raw_smp_processor_id());	\
 	} while (0)
 
-static void preempt_enable_notifiers(void)
+static void preempt_enable_sched_out_notifiers(void)
 {
-	int cpu = raw_smp_processor_id();
-
-	if (per_cpu(notifier_enabled, cpu))
-		return;
-
-	dprintk("\n");
-	per_cpu(notifier_enabled, cpu) = 1;
 	asm volatile ("mov %0, %%db0" : : "r"(schedule));
-	asm volatile ("mov %0, %%db7" : : "r"(0x702ul));
+	asm volatile ("mov %0, %%db7" : : "r"(0x701ul));
+#ifdef CONFIG_X86_64
+	current->thread.debugreg7 = 0ul;
+#else
+	current->thread.debugreg[7] = 0ul;
+#endif
+#ifdef TIF_DEBUG
+	clear_tsk_thread_flag(current, TIF_DEBUG);
+#endif
+}
+
+static void preempt_enable_sched_in_notifiers(void * addr)
+{
+	asm volatile ("mov %0, %%db0" : : "r"(addr));
+	asm volatile ("mov %0, %%db7" : : "r"(0x701ul));
+#ifdef CONFIG_X86_64
+	current->thread.debugreg0 = (unsigned long) addr;
+	current->thread.debugreg7 = 0x701ul;
+#else
+	current->thread.debugreg[0] = (unsigned long) addr;
+	current->thread.debugreg[7] = 0x701ul;
+#endif
+#ifdef TIF_DEBUG
+	set_tsk_thread_flag(current, TIF_DEBUG);
+#endif
 }
 
 void special_reload_dr7(void)
 {
-	asm volatile ("mov %0, %%db7" : : "r"(0x702ul));
+	asm volatile ("mov %0, %%db7" : : "r"(0x701ul));
 }
 EXPORT_SYMBOL_GPL(special_reload_dr7);
 
-static void preempt_disable_notifiers(void)
+static void __preempt_disable_notifiers(void)
 {
-	int cpu = raw_smp_processor_id();
-
-	if (!per_cpu(notifier_enabled, cpu))
-		return;
+	asm volatile ("mov %0, %%db7" : : "r"(0ul));
+}
 
-	dprintk("\n");
-	per_cpu(notifier_enabled, cpu) = 0;
-	asm volatile ("mov %0, %%db7" : : "r"(0x400ul));
+static void preempt_disable_notifiers(void)
+{
+	__preempt_disable_notifiers();
+#ifdef CONFIG_X86_64
+	current->thread.debugreg7 = 0ul;
+#else
+	current->thread.debugreg[7] = 0ul;
+#endif
+#ifdef TIF_DEBUG
+	clear_tsk_thread_flag(current, TIF_DEBUG);
+#endif
 }
 
-static void  __attribute__((used)) preempt_notifier_trigger(void)
+static void fastcall  __attribute__((used)) preempt_notifier_trigger(void *** ip)
 {
 	struct preempt_notifier *pn;
 	int cpu = raw_smp_processor_id();
 	int found = 0;
-	unsigned long flags;
 
 	dprintk(" - in\n");
 	//dump_stack();
-	spin_lock_irqsave(&pn_lock, flags);
+	spin_lock(&pn_lock);
 	list_for_each_entry(pn, &pn_list, link)
 		if (pn->tsk == current) {
 			found = 1;
 			break;
 		}
-	spin_unlock_irqrestore(&pn_lock, flags);
-	preempt_disable_notifiers();
+	spin_unlock(&pn_lock);
+
 	if (found) {
-		dprintk("sched_out\n");
-		pn->ops->sched_out(pn, NULL);
-		per_cpu(last_tsk, cpu) = NULL;
-	}
+		if ((void *) *ip != schedule) {
+			dprintk("sched_in\n");
+			preempt_enable_sched_out_notifiers();
+			pn->ops->sched_in(pn, cpu);
+		} else {
+			void * sched_in_addr;
+			dprintk("sched_out\n");
+#ifdef CONFIG_X86_64
+			sched_in_addr = **(ip+3);
+#else
+			/* no special debug stack switch on x86 */
+			sched_in_addr = (void *) *(ip+3);
+#endif
+			preempt_enable_sched_in_notifiers(sched_in_addr);
+			pn->ops->sched_out(pn, NULL);
+		}
+	} else
+		__preempt_disable_notifiers();
 	dprintk(" - out\n");
 }
 
@@ -104,6 +138,11 @@ asm ("pn_int1_handler:  \n\t"
      "pop "  TMP " \n\t"
      "jz .Lnotme \n\t"
      SAVE_REGS "\n\t"
+#ifdef CONFIG_X86_64
+     "leaq 120(%rsp),%rdi\n\t"
+#else
+     "leal 32(%esp),%eax\n\t"
+#endif
      "call preempt_notifier_trigger \n\t"
      RESTORE_REGS "\n\t"
 #ifdef CONFIG_X86_64
@@ -121,75 +160,28 @@ asm ("pn_int1_handler:  \n\t"
 #endif
 	);
 
-void in_special_section(void)
-{
-	struct preempt_notifier *pn;
-	int cpu = raw_smp_processor_id();
-	int found = 0;
-	unsigned long flags;
-
-	if (per_cpu(last_tsk, cpu) == current)
-		return;
-
-	dprintk(" - in\n");
-	spin_lock_irqsave(&pn_lock, flags);
-	list_for_each_entry(pn, &pn_list, link)
-		if (pn->tsk == current) {
-			found = 1;
-			break;
-		}
-	spin_unlock_irqrestore(&pn_lock, flags);
-	if (found) {
-		dprintk("\n");
-		per_cpu(last_tsk, cpu) = current;
-		pn->ops->sched_in(pn, cpu);
-		preempt_enable_notifiers();
-	}
-	dprintk(" - out\n");
-}
-EXPORT_SYMBOL_GPL(in_special_section);
-
-void start_special_insn(void)
-{
-	preempt_disable();
-	in_special_section();
-}
-EXPORT_SYMBOL_GPL(start_special_insn);
-
-void end_special_insn(void)
-{
-	preempt_enable();
-}
-EXPORT_SYMBOL_GPL(end_special_insn);
-
 void preempt_notifier_register(struct preempt_notifier *notifier)
 {
-	int cpu = get_cpu();
 	unsigned long flags;
 
 	dprintk(" - in\n");
 	spin_lock_irqsave(&pn_lock, flags);
-	preempt_enable_notifiers();
+	preempt_enable_sched_out_notifiers();
 	notifier->tsk = current;
 	list_add(&notifier->link, &pn_list);
 	spin_unlock_irqrestore(&pn_lock, flags);
-	per_cpu(last_tsk, cpu) = current;
-	put_cpu();
 	dprintk(" - out\n");
 }
 
 void preempt_notifier_unregister(struct preempt_notifier *notifier)
 {
-	int cpu = get_cpu();
 	unsigned long flags;
 
 	dprintk(" - in\n");
 	spin_lock_irqsave(&pn_lock, flags);
 	list_del(&notifier->link);
 	spin_unlock_irqrestore(&pn_lock, flags);
-	per_cpu(last_tsk, cpu) = NULL;
 	preempt_disable_notifiers();
-	put_cpu();
 	dprintk(" - out\n");
 }
 
@@ -238,7 +230,16 @@ void preempt_notifier_sys_init(void)
 
 static void do_disable(void *blah)
 {
-	preempt_disable_notifiers();
+#ifdef TIF_DEBUG
+	if (!test_tsk_thread_flag(current, TIF_DEBUG))
+#else
+#ifdef CONFIG_X86_64
+	if (!current->thread.debugreg7)
+#else
+	if (!current->thread.debugreg[7])
+#endif
+#endif
+		__preempt_disable_notifiers();
 }
 
 void preempt_notifier_sys_exit(void)


> 
> > kvm: emulating preempt notifiers; do not benchmark on this machine
> > loaded kvm module (kvm-56-127-g433be51)
> > vmwrite error: reg c08 value d8 (err 3080)
> >  [<f8baf9e2>] vmx_save_host_state+0x4f/0x162 [kvm_intel]
> >  [<c0425803>] __cond_resched+0x25/0x3c
> >  [<f91a22a4>] kvm_arch_vcpu_ioctl_run+0x16f/0x3a7 [kvm]
> >  [<f919f244>] kvm_vcpu_ioctl+0xcb/0x28f [kvm]
> >  [<c0421987>] enqueue_entity+0x2c0/0x2ea
> >  [<c05a8340>] skb_dequeue+0x39/0x3f
> >  [<c0604b6d>] unix_stream_recvmsg+0x3a2/0x4c3
> >  [<c0425c82>] scheduler_tick+0x1a1/0x274
> >  [<c0487329>] core_sys_select+0x21f/0x2fa
> >  [<c043e9e6>] clockevents_program_event+0xb5/0xbc
> >  [<c04c6853>] avc_has_perm+0x4e/0x58
> >  [<c04c7174>] inode_has_perm+0x66/0x6e
> >  [<c0430bed>] recalc_sigpending+0xb/0x1d
> >  [<c043231d>] dequeue_signal+0xa9/0x12a
> >  [<c043cb95>] getnstimeofday+0x30/0xbf
> >  [<c04c7205>] file_has_perm+0x89/0x91
> >  [<f919f179>] kvm_vcpu_ioctl+0x0/0x28f [kvm]
> >  [<c04861b9>] do_ioctl+0x21/0xa0
> >  [<c048646f>] vfs_ioctl+0x237/0x249
> >  [<c04864cd>] sys_ioctl+0x4c/0x67
> >  [<c0404f26>] sysenter_past_esp+0x5f/0x85
> >  =======================
> 
> vmwrite error means the vmcs pointer was not loaded, probably because
> the sched_in event did not fire after a vcpu migration.
> 
> -- 
> Do not meddle in the internals of kernels, for they are subtle and quick to panic.

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

  parent reply	other threads:[~2007-12-23 16:49 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-12-20 16:23 external module sched_in event Andrea Arcangeli
     [not found] ` <20071220162353.GA3802-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2007-12-22 19:13   ` Avi Kivity
     [not found]     ` <476D61E8.5000102-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-12-23 16:49       ` Andrea Arcangeli [this message]
     [not found]         ` <20071223164932.GA8483-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2007-12-23 17:37           ` Avi Kivity
     [not found]             ` <476E9CE4.2060705-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-12-24 16:26               ` Andrea Arcangeli
     [not found]                 ` <20071224162639.GH8483-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2007-12-25  9:00                   ` Avi Kivity
  -- strict thread matches above, loose matches on Subject: below --
2007-12-21 17:40 Andrea Arcangeli
     [not found] ` <20071221174048.GB1292-lysg2Xt5kKMAvxtiuMwx3w@public.gmane.org>
2007-12-21 17:52   ` Izik Eidus
     [not found]     ` <476BFD74.2040509-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-12-21 18:22       ` Andrea Arcangeli
2007-12-22 20:24       ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20071223164932.GA8483@v2.random \
    --to=andrea-l3a5bk7wagm@public.gmane.org \
    --cc=avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org \
    --cc=kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox