All of lore.kernel.org
 help / color / mirror / Atom feed
From: Keir Fraser <keir.xen@gmail.com>
To: Olaf Hering <olaf@aepfle.de>
Cc: xen-devel@lists.xensource.com
Subject: Re: Need help with fixing the Xen waitqueue feature
Date: Wed, 23 Nov 2011 21:03:39 +0000	[thread overview]
Message-ID: <CAF3102F.25730%keir.xen@gmail.com> (raw)
In-Reply-To: <CAF2F84A.2571A%keir.xen@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1645 bytes --]

On 23/11/2011 19:21, "Keir Fraser" <keir.xen@gmail.com> wrote:

> On 23/11/2011 18:31, "Olaf Hering" <olaf@aepfle.de> wrote:
> 
>> On Wed, Nov 23, Keir Fraser wrote:
>> 
>>> We have quite a big waitqueue problem actually. The current scheme of
>>> per-cpu stacks doesn't work nicely, as the stack pointer will change if a
>>> vcpu goes to sleep and then wakes up on a different cpu. This really doesn't
>>> work nicely with preempted C code, which may implement frame pointers and/or
>>> arbitrarily take the address of on-stack variables. The result will be
>>> hideous cross-stack corruptions, as these frame pointers and cached
>>> addresses of automatic variables will reference the wrong cpu's stack!
>>> Fixing or detecting this in general is not possible afaics.
>> 
>> Yes, I was thinking about that wakeup on different cpu as well.
>> As a quick fix/hack, perhaps the scheduler could make sure the vcpu
>> wakes up on the same cpu?
> 
> Could save old affinity and then vcpu_set_affinity. That will have to do for
> now. Actually it should work okay as long as toolstack doesn't mess with
> affinity meanwhile. I'll sort out a patch for this.

Attached three patches for you to try. They apply in sequence.
00: A fixed version of "domain_crash on stack overflow"
01: Reorders prepare_to_wait so that the vcpu will always be on the
waitqueue on exit (even if it has just been woken).
02: Ensures the vcpu wakes up on the same cpu that it slept on.

We need all of these. Just need testing to make sure they aren't horribly
broken. You should be able to test multi-processor host again with these.

 -- Keir

>  -- Keir
> 
>> Olaf
> 
> 


[-- Attachment #2: 00-prep-to-wait-dom-crash --]
[-- Type: application/octet-stream, Size: 2335 bytes --]

# HG changeset patch
# Parent 84b3e46aa7d24a4605c36940606e7da9679b0e7f

diff -r 84b3e46aa7d2 xen/common/wait.c
--- a/xen/common/wait.c	Wed Nov 23 12:03:37 2011 +0000
+++ b/xen/common/wait.c	Wed Nov 23 19:43:35 2011 +0000
@@ -106,13 +106,16 @@ void wake_up(struct waitqueue_head *wq)
 static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
 {
     char *cpu_info = (char *)get_cpu_info();
+
     asm volatile (
 #ifdef CONFIG_X86_64
         "push %%rax; push %%rbx; push %%rcx; push %%rdx; push %%rdi; "
         "push %%rbp; push %%r8; push %%r9; push %%r10; push %%r11; "
         "push %%r12; push %%r13; push %%r14; push %%r15; call 1f; "
         "1: mov 80(%%rsp),%%rdi; mov 96(%%rsp),%%rcx; mov %%rsp,%%rsi; "
-        "sub %%rsi,%%rcx; rep movsb; mov %%rsp,%%rsi; pop %%rax; "
+        "sub %%rsi,%%rcx; cmp %3,%%rcx; jbe 2f; "
+        "xor %%esi,%%esi; jmp 3f; "
+        "2: rep movsb; mov %%rsp,%%rsi; 3: pop %%rax; "
         "pop %%r15; pop %%r14; pop %%r13; pop %%r12; "
         "pop %%r11; pop %%r10; pop %%r9; pop %%r8; "
         "pop %%rbp; pop %%rdi; pop %%rdx; pop %%rcx; pop %%rbx; pop %%rax"
@@ -120,13 +123,20 @@ static void __prepare_to_wait(struct wai
         "push %%eax; push %%ebx; push %%ecx; push %%edx; push %%edi; "
         "push %%ebp; call 1f; "
         "1: mov 8(%%esp),%%edi; mov 16(%%esp),%%ecx; mov %%esp,%%esi; "
-        "sub %%esi,%%ecx; rep movsb; mov %%esp,%%esi; pop %%eax; "
+        "sub %%esi,%%ecx; cmp %3,%%ecx; jbe 2f; "
+        "xor %%esi,%%esi; jmp 3f; "
+        "2: rep movsb; mov %%esp,%%esi; 3: pop %%eax; "
         "pop %%ebp; pop %%edi; pop %%edx; pop %%ecx; pop %%ebx; pop %%eax"
 #endif
         : "=S" (wqv->esp)
-        : "c" (cpu_info), "D" (wqv->stack)
+        : "c" (cpu_info), "D" (wqv->stack), "i" (PAGE_SIZE)
         : "memory" );
-    BUG_ON((cpu_info - (char *)wqv->esp) > PAGE_SIZE);
+
+    if ( unlikely(wqv->esp == 0) )
+    {
+        gdprintk(XENLOG_ERR, "Stack too large in %s\n", __FUNCTION__);
+        domain_crash_synchronous();
+    }
 }
 
 static void __finish_wait(struct waitqueue_vcpu *wqv)
@@ -162,6 +172,7 @@ void prepare_to_wait(struct waitqueue_he
     struct vcpu *curr = current;
     struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
 
+    ASSERT(!in_atomic());
     ASSERT(list_empty(&wqv->list));
 
     spin_lock(&wq->lock);

[-- Attachment #3: 01-prep-to-wait-reorder --]
[-- Type: application/octet-stream, Size: 920 bytes --]

# HG changeset patch
# Parent cc055dab3606529fcabc28685c5bf0debdfc213c
diff -r cc055dab3606 -r 84628291e585 xen/common/wait.c
--- a/xen/common/wait.c	Wed Nov 23 18:01:44 2011 +0000
+++ b/xen/common/wait.c	Wed Nov 23 18:03:55 2011 +0000
@@ -107,6 +107,8 @@ static void __prepare_to_wait(struct wai
 {
     char *cpu_info = (char *)get_cpu_info();
 
+    ASSERT(wqv->esp == 0);
+
     asm volatile (
 #ifdef CONFIG_X86_64
         "push %%rax; push %%rbx; push %%rcx; push %%rdx; push %%rdi; "
@@ -173,14 +175,13 @@ void prepare_to_wait(struct waitqueue_he
     struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
 
     ASSERT(!in_atomic());
+    __prepare_to_wait(wqv);
+
     ASSERT(list_empty(&wqv->list));
-
     spin_lock(&wq->lock);
     list_add_tail(&wqv->list, &wq->list);
     vcpu_pause_nosync(curr);
     spin_unlock(&wq->lock);
-
-    __prepare_to_wait(wqv);
 }
 
 void finish_wait(struct waitqueue_head *wq)

[-- Attachment #4: 02-waitq-set-vcpu-affinity --]
[-- Type: application/octet-stream, Size: 2130 bytes --]

# HG changeset patch
# Parent 18a1f35af1c55ddbd87bd39a9317c38ffa2a1f7a
diff -r 18a1f35af1c5 -r 0f22064d98ae xen/common/wait.c
--- a/xen/common/wait.c	Wed Nov 23 19:43:46 2011 +0000
+++ b/xen/common/wait.c	Wed Nov 23 20:02:05 2011 +0000
@@ -34,6 +34,8 @@ struct waitqueue_vcpu {
      */
     void *esp;
     char *stack;
+    cpumask_t saved_affinity;
+    unsigned int wakeup_cpu;
 #endif
 };
 
@@ -106,9 +108,19 @@ void wake_up(struct waitqueue_head *wq)
 static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
 {
     char *cpu_info = (char *)get_cpu_info();
+    struct vcpu *curr = current;
 
     ASSERT(wqv->esp == 0);
 
+    /* Save current VCPU affinity; force wakeup on *this* CPU only. */
+    wqv->wakeup_cpu = smp_processor_id();
+    cpumask_copy(&wqv->saved_affinity, curr->cpu_affinity);
+    if ( vcpu_set_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
+    {
+        gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
+        domain_crash_synchronous();
+    }
+
     asm volatile (
 #ifdef CONFIG_X86_64
         "push %%rax; push %%rbx; push %%rcx; push %%rdx; push %%rdi; "
@@ -144,6 +156,7 @@ static void __prepare_to_wait(struct wai
 static void __finish_wait(struct waitqueue_vcpu *wqv)
 {
     wqv->esp = NULL;
+    (void)vcpu_set_affinity(current, &wqv->saved_affinity);
 }
 
 void check_wakeup_from_wait(void)
@@ -155,6 +168,20 @@ void check_wakeup_from_wait(void)
     if ( likely(wqv->esp == NULL) )
         return;
 
+    /* Check if we woke up on the wrong CPU. */
+    if ( unlikely(smp_processor_id() != wqv->wakeup_cpu) )
+    {
+        /* Re-set VCPU affinity and re-enter the scheduler. */
+        struct vcpu *curr = current;
+        cpumask_copy(&wqv->saved_affinity, curr->cpu_affinity);
+        if ( vcpu_set_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
+        {
+            gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
+            domain_crash_synchronous();
+        }
+        wait(); /* takes us back into the scheduler */
+    }
+
     asm volatile (
         "mov %1,%%"__OP"sp; rep movsb; jmp *(%%"__OP"sp)"
         : : "S" (wqv->stack), "D" (wqv->esp),

[-- Attachment #5: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

  reply	other threads:[~2011-11-23 21:03 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20111122150755.GA18727@aepfle.de>
2011-11-22 15:40 ` Need help with fixing the Xen waitqueue feature Keir Fraser
2011-11-22 15:54   ` Keir Fraser
2011-11-22 17:36   ` Olaf Hering
2011-11-22 17:42     ` Keir Fraser
2011-11-22 18:04       ` Olaf Hering
2011-11-22 21:15         ` Olaf Hering
2011-11-22 21:53           ` Keir Fraser
2011-11-23 17:00             ` Olaf Hering
2011-11-23 17:16               ` Keir Fraser
2011-11-23 18:06                 ` Olaf Hering
2011-11-23 18:23                   ` Keir Fraser
2011-11-23 18:18                 ` Keir Fraser
2011-11-23 18:31                   ` Olaf Hering
2011-11-23 19:21                     ` Keir Fraser
2011-11-23 21:03                       ` Keir Fraser [this message]
2011-11-23 22:30                         ` Olaf Hering
2011-11-23 23:12                           ` Keir Fraser
2011-11-24 10:00                             ` Olaf Hering
2011-11-25 12:56                               ` Olaf Hering
2011-11-25 18:26                                 ` Olaf Hering
2011-11-25 19:35                                   ` Keir Fraser
2011-11-24  9:15                         ` Jan Beulich
2011-11-24  9:51                           ` Keir Fraser
2011-11-24  9:58                           ` Keir Fraser
     [not found] <20111108224414.83985CF73A@homiemail-mx7.g.dreamhost.com>
2011-11-09  3:52 ` Andres Lagar-Cavilla
2011-11-09  7:09   ` Olaf Hering
2011-11-09 21:21     ` Andres Lagar-Cavilla
2011-11-22 14:34       ` George Dunlap
2011-11-09 21:30     ` Andres Lagar-Cavilla
2011-11-09 22:11       ` Olaf Hering
2011-11-10  4:29         ` Andres Lagar-Cavilla
2011-11-10  9:20           ` Jan Beulich
2011-11-10  9:26           ` Keir Fraser
2011-11-10 10:18           ` Olaf Hering
2011-11-10 12:05             ` Olaf Hering
     [not found] <20111108214540.EAEBB72C4A1@homiemail-mx8.g.dreamhost.com>
2011-11-09  3:37 ` Andres Lagar-Cavilla
2011-11-09  7:02   ` Olaf Hering
2011-11-08 21:20 Olaf Hering
2011-11-08 22:05 ` Keir Fraser
2011-11-08 22:20   ` Olaf Hering
2011-11-08 22:54     ` Keir Fraser
2011-11-11 22:56       ` Olaf Hering
2011-11-12  7:00         ` Keir Fraser
2011-11-22 11:40           ` Olaf Hering
2011-11-22 13:04             ` Keir Fraser
2011-11-22 13:54               ` Olaf Hering
2011-11-22 14:24                 ` Keir Fraser

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAF3102F.25730%keir.xen@gmail.com \
    --to=keir.xen@gmail.com \
    --cc=olaf@aepfle.de \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.