All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] serialize suspend-resume process
@ 2008-07-31 11:09 BVK Chaitanya
  2008-07-31 11:17 ` Keir Fraser
  0 siblings, 1 reply; 16+ messages in thread
From: BVK Chaitanya @ 2008-07-31 11:09 UTC (permalink / raw)
  To: Xen-devel

[-- Attachment #1: Type: text/plain, Size: 264 bytes --]

Hi,


With current suspend-resume code, there is no way for DOM-0 to identify 
that DOM-U resume is completed.  It can potentially trigger suspend 
again before resume is completed.

Below patch makes suspend-resume process serialized.


regards,
--
bvk-chaitanya

[-- Attachment #2: serialize-suspend.patch --]
[-- Type: text/x-patch, Size: 6055 bytes --]

suspend-resume process is serialized.

Under heavy load a suspend interrupt can create a new kthread and
start suspend process before old kthread's resuming completes.  This
patch serialzes suspend-resume using a counting semaphore and a
separate kthread.  Suspend interrupts up the semaphore and suspend
kthread downs the semaphore in a while(1) loop.

diff -r c2850a7f279d drivers/xen/core/reboot.c
--- a/drivers/xen/core/reboot.c	Wed Jul 30 20:15:23 2008 +0530
+++ b/drivers/xen/core/reboot.c	Thu Jul 31 21:35:46 2008 +0530
@@ -11,6 +11,7 @@
 #include <linux/kmod.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
+#include <linux/kthread.h>
 
 #ifdef HAVE_XEN_PLATFORM_COMPAT_H
 #include <xen/platform-compat.h>
@@ -20,8 +21,6 @@ MODULE_LICENSE("Dual BSD/GPL");
 
 #define SHUTDOWN_INVALID  -1
 #define SHUTDOWN_POWEROFF  0
-#define SHUTDOWN_SUSPEND   2
-#define SHUTDOWN_RESUMING  3
 #define SHUTDOWN_HALT      4
 
 /* Ignore multiple shutdown requests. */
@@ -32,6 +31,11 @@ static int fast_suspend;
 
 static void __shutdown_handler(void *unused);
 static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
+
+static struct suspend_info {
+        struct semaphore nrequest; /* no. of suspend reqs */
+        atomic_t         resuming; /* is domain resuming ? */
+} suspend_info;
 
 int __xen_suspend(int fast_suspend, void (*resume_notifier)(void));
 
@@ -62,58 +66,11 @@ static int shutdown_process(void *__unus
 	return 0;
 }
 
-static void xen_resume_notifier(void)
-{
-	int old_state = xchg(&shutting_down, SHUTDOWN_RESUMING);
-	BUG_ON(old_state != SHUTDOWN_SUSPEND);
-}
-
-static int xen_suspend(void *__unused)
-{
-	int err, old_state;
-
-	daemonize("suspend");
-	err = set_cpus_allowed(current, cpumask_of_cpu(0));
-	if (err) {
-		printk(KERN_ERR "Xen suspend can't run on CPU0 (%d)\n", err);
-		goto fail;
-	}
-
-	do {
-		err = __xen_suspend(fast_suspend, xen_resume_notifier);
-		if (err) {
-			printk(KERN_ERR "Xen suspend failed (%d)\n", err);
-			goto fail;
-		}
-		old_state = cmpxchg(
-			&shutting_down, SHUTDOWN_RESUMING, SHUTDOWN_INVALID);
-	} while (old_state == SHUTDOWN_SUSPEND);
-
-	switch (old_state) {
-	case SHUTDOWN_INVALID:
-	case SHUTDOWN_SUSPEND:
-		BUG();
-	case SHUTDOWN_RESUMING:
-		break;
-	default:
-		schedule_work(&shutdown_work);
-		break;
-	}
-
-	return 0;
-
- fail:
-	old_state = xchg(&shutting_down, SHUTDOWN_INVALID);
-	BUG_ON(old_state != SHUTDOWN_SUSPEND);
-	return 0;
-}
-
 static void __shutdown_handler(void *unused)
 {
 	int err;
 
-	err = kernel_thread((shutting_down == SHUTDOWN_SUSPEND) ?
-			    xen_suspend : shutdown_process,
+	err = kernel_thread(shutdown_process,
 			    NULL, CLONE_FS | CLONE_FILES);
 
 	if (err < 0) {
@@ -131,8 +88,8 @@ static void shutdown_handler(struct xenb
 	struct xenbus_transaction xbt;
 	int err, old_state, new_state = SHUTDOWN_INVALID;
 
-	if ((shutting_down != SHUTDOWN_INVALID) &&
-	    (shutting_down != SHUTDOWN_RESUMING))
+	if ((shutting_down != SHUTDOWN_INVALID) ||
+            atomic_read(&suspend_info.resuming))
 		return;
 
  again:
@@ -155,12 +112,12 @@ static void shutdown_handler(struct xenb
 		goto again;
 	}
 
-	if (strcmp(str, "poweroff") == 0)
-		new_state = SHUTDOWN_POWEROFF;
+        if (strcmp(str, "suspend") == 0)
+                up(&suspend_info.nrequest); /* backward compatibility */
 	else if (strcmp(str, "reboot") == 0)
 		ctrl_alt_del();
-	else if (strcmp(str, "suspend") == 0)
-		new_state = SHUTDOWN_SUSPEND;
+	else if (strcmp(str, "poweroff") == 0)
+		new_state = SHUTDOWN_POWEROFF;
 	else if (strcmp(str, "halt") == 0)
 		new_state = SHUTDOWN_HALT;
 	else
@@ -168,10 +125,8 @@ static void shutdown_handler(struct xenb
 
 	if (new_state != SHUTDOWN_INVALID) {
 		old_state = xchg(&shutting_down, new_state);
-		if (old_state == SHUTDOWN_INVALID)
-			schedule_work(&shutdown_work);
-		else
-			BUG_ON(old_state != SHUTDOWN_RESUMING);
+                BUG_ON(old_state != SHUTDOWN_INVALID);
+                schedule_work(&shutdown_work);
 	}
 
 	kfree(str);
@@ -218,10 +173,41 @@ static struct xenbus_watch sysrq_watch =
 	.callback = sysrq_handler
 };
 
+static void suspend_resume_notifier(void)
+{
+        atomic_set(&suspend_info.resuming, 1);
+        return;
+}
+
+static int suspend_machine(void *unused)
+{
+        int err = 0;
+        struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
+        err = set_cpus_allowed(current, cpumask_of_cpu(0));
+        if (err) {
+                printk(KERN_ERR "Suspend thread couldn't switch to CPU0 (%d)\n", err);
+                return err;
+        }
+
+        sched_setscheduler(current, SCHED_FIFO, &param);
+
+        while (1) {
+                /* decrement no. of suspend requests */
+                down(&suspend_info.nrequest);
+
+                err = __xen_suspend(fast_suspend, &suspend_resume_notifier);
+                if (err)
+                        printk(KERN_ERR "Domain suspend failed (%d)\n", err);
+
+                atomic_set(&suspend_info.resuming, 0);
+        }
+}
+
 static irqreturn_t suspend_int(int irq, void* dev_id, struct pt_regs *ptregs)
 {
-	shutting_down = SHUTDOWN_SUSPEND;
-	schedule_work(&shutdown_work);
+        /* increment no. of suspend requests */
+        up(&suspend_info.nrequest);
 
 	return IRQ_HANDLED;
 }
@@ -251,6 +237,7 @@ static int setup_shutdown_watcher(void)
 static int setup_shutdown_watcher(void)
 {
 	int err;
+        struct task_struct *t = NULL;
 
 	xenbus_scanf(XBT_NIL, "control",
 		     "platform-feature-multiprocessor-suspend",
@@ -267,6 +254,15 @@ static int setup_shutdown_watcher(void)
 		printk(KERN_ERR "Failed to set sysrq watcher\n");
 		return err;
 	}
+
+        sema_init(&suspend_info.nrequest, 0);
+        atomic_set(&suspend_info.resuming, 0);
+        t = kthread_create(suspend_machine, &suspend_info, "ksuspend");
+        if (IS_ERR(t)) {
+                printk(KERN_ERR "Suspend thread creation failed\n");
+                return PTR_ERR(t);
+        }
+        wake_up_process(t);
 
 	/* suspend event channel */
 	err = setup_suspend_evtchn();

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-07-31 11:09 [PATCH] serialize suspend-resume process BVK Chaitanya
@ 2008-07-31 11:17 ` Keir Fraser
  2008-07-31 11:57   ` BVK Chaitanya
  0 siblings, 1 reply; 16+ messages in thread
From: Keir Fraser @ 2008-07-31 11:17 UTC (permalink / raw)
  To: BVK Chaitanya, Xen-devel

On 31/7/08 12:09, "BVK Chaitanya" <bayapuneni_chaitanya@symantec.com> wrote:

> With current suspend-resume code, there is no way for DOM-0 to identify
> that DOM-U resume is completed.  It can potentially trigger suspend
> again before resume is completed.
> 
> Below patch makes suspend-resume process serialized.

It's already supposed to be serialised, and there's a loop in xen_suspend()
to deal with up to one queued suspend request. Your patch may deal with an
arbitrary number of queued suspend requests, but a sane dom0 control stack
should never issue more than one suspend request in a suspend-resume cycle.

 -- Keir

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-07-31 11:17 ` Keir Fraser
@ 2008-07-31 11:57   ` BVK Chaitanya
  2008-07-31 12:23     ` Keir Fraser
  0 siblings, 1 reply; 16+ messages in thread
From: BVK Chaitanya @ 2008-07-31 11:57 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen-devel

Keir Fraser wrote:
> On 31/7/08 12:09, "BVK Chaitanya" <bayapuneni_chaitanya@symantec.com> wrote:
>>
>> Below patch makes suspend-resume process serialized.
> 
> It's already supposed to be serialised, and there's a loop in xen_suspend()
> to deal with up to one queued suspend request. Your patch may deal with an
> arbitrary number of queued suspend requests, but a sane dom0 control stack
> should never issue more than one suspend request in a suspend-resume cycle.
> 

Yes, but there must be a way for dom0 to know that suspend-resume cycle 
is completed.  Currently dom0 only gets suspend completed notification 
-- as part of shutdown hypercall -- but no resume 
(xenbus_suspend_cancel) completed notification.

If dom0 issues second suspend request before resume is completed a _new_ 
kthread is started and will proceed with xen_suspend in parallel.  I saw 
this hitting BUG_ON in netfront_accelerator_add_watch.

Am i missing anything?


regards,
--
bvk-chaitanya

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-07-31 11:57   ` BVK Chaitanya
@ 2008-07-31 12:23     ` Keir Fraser
  2008-07-31 13:04       ` BVK Chaitanya
  0 siblings, 1 reply; 16+ messages in thread
From: Keir Fraser @ 2008-07-31 12:23 UTC (permalink / raw)
  To: BVK Chaitanya; +Cc: Xen-devel

On 31/7/08 12:57, "BVK Chaitanya" <bayapuneni_chaitanya@symantec.com> wrote:

> Yes, but there must be a way for dom0 to know that suspend-resume cycle
> is completed.  Currently dom0 only gets suspend completed notification
> -- as part of shutdown hypercall -- but no resume
> (xenbus_suspend_cancel) completed notification.
> 
> If dom0 issues second suspend request before resume is completed a _new_
> kthread is started and will proceed with xen_suspend in parallel.  I saw
> this hitting BUG_ON in netfront_accelerator_add_watch.

That isn't true. xen_suspend() can only be re-entered after it has switched
the 'shutting_down' variable to SHUTDOWN_INVALID. At this point resume work
is completed (except perhaps for resuming some PV devices via xenbus, which
is done asynchronously).

Your BUG_ON() may be fixed by linux-2.6.18-xen.hg, changeset 622.

 -- Keir

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-07-31 12:23     ` Keir Fraser
@ 2008-07-31 13:04       ` BVK Chaitanya
  2008-07-31 13:07         ` Keir Fraser
  0 siblings, 1 reply; 16+ messages in thread
From: BVK Chaitanya @ 2008-07-31 13:04 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen-devel

Keir Fraser wrote:
> On 31/7/08 12:57, "BVK Chaitanya" <bayapuneni_chaitanya@symantec.com> wrote:
>> If dom0 issues second suspend request before resume is completed a _new_
>> kthread is started and will proceed with xen_suspend in parallel.  I saw
>> this hitting BUG_ON in netfront_accelerator_add_watch.
> 
> That isn't true. xen_suspend() can only be re-entered after it has switched
> the 'shutting_down' variable to SHUTDOWN_INVALID. At this point resume work
> is completed (except perhaps for resuming some PV devices via xenbus, which
> is done asynchronously).
> 

This is the case if suspend is invoked through xenbus interface.

With suspend event channel in place, i see that suspend request doesn't 
go through the shutdown_handler function when suspend is triggered over 
event channel.


regards,
--
bvk-chaitanya

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-07-31 13:04       ` BVK Chaitanya
@ 2008-07-31 13:07         ` Keir Fraser
  2008-07-31 13:34           ` BVK Chaitanya
  0 siblings, 1 reply; 16+ messages in thread
From: Keir Fraser @ 2008-07-31 13:07 UTC (permalink / raw)
  To: BVK Chaitanya; +Cc: Xen-devel




On 31/7/08 14:04, "BVK Chaitanya" <bayapuneni_chaitanya@symantec.com> wrote:

>> That isn't true. xen_suspend() can only be re-entered after it has switched
>> the 'shutting_down' variable to SHUTDOWN_INVALID. At this point resume work
>> is completed (except perhaps for resuming some PV devices via xenbus, which
>> is done asynchronously).
>> 
> 
> This is the case if suspend is invoked through xenbus interface.
> 
> With suspend event channel in place, i see that suspend request doesn't
> go through the shutdown_handler function when suspend is triggered over
> event channel.

Oh, I agree that the shutdown_handler() can be re-entered! But it will *not*
trigger multiple invocations of xen_suspend() -- note it stores away the
suspend request by performing xchg(&shutting_down, ...) but it does *not*
immediately trigger a suspend unless old_state==SHUTDOWN_INVALID (in which
case there is no active current invocation of xen_suspend()).

 -- Keir

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-07-31 13:07         ` Keir Fraser
@ 2008-07-31 13:34           ` BVK Chaitanya
  2008-07-31 13:46             ` Keir Fraser
  0 siblings, 1 reply; 16+ messages in thread
From: BVK Chaitanya @ 2008-07-31 13:34 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen-devel

Keir Fraser wrote:
> 
> On 31/7/08 14:04, "BVK Chaitanya" <bayapuneni_chaitanya@symantec.com> wrote:
> 
>> With suspend event channel in place, i see that suspend request doesn't
>> go through the shutdown_handler function when suspend is triggered over
>> event channel.
> 
> Oh, I agree that the shutdown_handler() can be re-entered! But it will *not*
> trigger multiple invocations of xen_suspend() -- note it stores away the
> suspend request by performing xchg(&shutting_down, ...) but it does *not*
> immediately trigger a suspend unless old_state==SHUTDOWN_INVALID (in which
> case there is no active current invocation of xen_suspend()).
> 


In my tree there are two shutdown_handler functions.

     shutdown_handler
     __shutdown_handler

First one is called through xenbus interface and ensures that 
xen_suspend is serialized.  I agree about this.

Second one is called through suspend_int (handler for suspend event 
channel) as below:



     static irqreturn_t suspend_int(int irq, void* dev_id,
                                    struct pt_regs *ptregs)
     {
         shutting_down = SHUTDOWN_SUSPEND;
         schedule_work(&shutdown_work);

         return IRQ_HANDLED;
     }



where shutdown_work & its callback are:



     static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);

     static void __shutdown_handler(void *unused)
     {
         int err;

         err = kernel_thread((shutting_down == SHUTDOWN_SUSPEND) ?
                             xen_suspend : shutdown_process,
                             NULL, CLONE_FS | CLONE_FILES);

         if (err < 0) {
                 printk(KERN_WARNING "Error creating shutdown"
                        " process (%d): "
                        "retrying...\n", -err);
                 schedule_delayed_work(&shutdown_work, HZ/2);
         }
     }



This second function creates a thread and calls xen_suspend without 
looking for shutting_down variable's value.


I will check my tree again and will get back to you.


--
bvk-chaitanya

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-07-31 13:34           ` BVK Chaitanya
@ 2008-07-31 13:46             ` Keir Fraser
  2008-07-31 14:10               ` BVK Chaitanya
  0 siblings, 1 reply; 16+ messages in thread
From: Keir Fraser @ 2008-07-31 13:46 UTC (permalink / raw)
  To: BVK Chaitanya; +Cc: Xen-devel

On 31/7/08 14:34, "BVK Chaitanya" <bayapuneni_chaitanya@symantec.com> wrote:

> This second function creates a thread and calls xen_suspend without
> looking for shutting_down variable's value.
> 
> 
> I will check my tree again and will get back to you.

Oh dear, well that's just broken... I will fix it, thanks!

 -- Keir

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-07-31 13:46             ` Keir Fraser
@ 2008-07-31 14:10               ` BVK Chaitanya
  2008-07-31 14:12                 ` Neil Turton
  2008-07-31 14:36                 ` Keir Fraser
  0 siblings, 2 replies; 16+ messages in thread
From: BVK Chaitanya @ 2008-07-31 14:10 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen-devel

Keir Fraser wrote:
> On 31/7/08 14:34, "BVK Chaitanya" <bayapuneni_chaitanya@symantec.com> wrote:
> 
>> This second function creates a thread and calls xen_suspend without
>> looking for shutting_down variable's value.
>>
> 
> Oh dear, well that's just broken... I will fix it, thanks!
> 

Great!


BTW, If 622 was what Kieran sent, I am not sure it is really necessary 
or not.  (I couldn't pull 622.)


regards,
--
bvk-chaitanya

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-07-31 14:10               ` BVK Chaitanya
@ 2008-07-31 14:12                 ` Neil Turton
  2008-07-31 14:36                 ` Keir Fraser
  1 sibling, 0 replies; 16+ messages in thread
From: Neil Turton @ 2008-07-31 14:12 UTC (permalink / raw)
  To: BVK Chaitanya; +Cc: Xen-devel, Keir Fraser

BVK Chaitanya wrote:
> BTW, If 622 was what Kieran sent, I am not sure it is really necessary 
> or not.  (I couldn't pull 622.)

The patch which Kieran sent does fix a bug, so I think it is necessary.
 It might not be the bug you were seeing though.

Cheers, Neil.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-07-31 14:10               ` BVK Chaitanya
  2008-07-31 14:12                 ` Neil Turton
@ 2008-07-31 14:36                 ` Keir Fraser
  2008-07-31 15:27                   ` BVK Chaitanya
  1 sibling, 1 reply; 16+ messages in thread
From: Keir Fraser @ 2008-07-31 14:36 UTC (permalink / raw)
  To: BVK Chaitanya; +Cc: Xen-devel

On 31/7/08 15:10, "BVK Chaitanya" <bayapuneni_chaitanya@symantec.com> wrote:

>> Oh dear, well that's just broken... I will fix it, thanks!
>> 
> 
> Great!
> 
> 
> BTW, If 622 was what Kieran sent, I am not sure it is really necessary
> or not.  (I couldn't pull 622.)

Should be fixed by changeset 623. Try pulling from the staging tree:
 http://xenbits.xensource.com/staging/linux-2.6.18-xen.hg

It should be there within a few minutes.

 -- Keir

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-07-31 15:27                   ` BVK Chaitanya
@ 2008-07-31 15:24                     ` Keir Fraser
  2008-08-01  5:31                       ` BVK Chaitanya
  0 siblings, 1 reply; 16+ messages in thread
From: Keir Fraser @ 2008-07-31 15:24 UTC (permalink / raw)
  To: BVK Chaitanya; +Cc: Xen-devel




On 31/7/08 16:27, "BVK Chaitanya" <bayapuneni_chaitanya@symantec.com> wrote:

> It is doing a busy-wait loop till suspend-resume cycle completes, if
> any.  I found that it may go more than 25 msec sometimes.

It's not a busy-wait loop. As long as shutdown_state does not change under
its feet, it will complete in no more than two iterations.

 -- Keir

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-07-31 14:36                 ` Keir Fraser
@ 2008-07-31 15:27                   ` BVK Chaitanya
  2008-07-31 15:24                     ` Keir Fraser
  0 siblings, 1 reply; 16+ messages in thread
From: BVK Chaitanya @ 2008-07-31 15:27 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen-devel

Hi Keir,

diff -r 44e3ace9a1f1 drivers/xen/core/reboot.c
--- a/drivers/xen/core/reboot.c	Thu Jul 31 09:46:58 2008 +0100
+++ b/drivers/xen/core/reboot.c	Fri Aug 01 04:14:43 2008 +0530
@@ -108,6 +116,31 @@ static int xen_suspend(void *__unused)
  	return 0;
  }

+static void switch_shutdown_state(int new_state)
+{
+	int prev_state, old_state = SHUTDOWN_INVALID;
+
+	/* We only drive shutdown_state into an active state. */
+	if (new_state == SHUTDOWN_INVALID)
+		return;
+
+	do {
+		/* We drop this transition if already in an active state. */
+		if ((old_state != SHUTDOWN_INVALID) &&
+		    (old_state != SHUTDOWN_RESUMING))
+			return;
+		/* Attempt to transition. */
+		prev_state = old_state;
+		old_state = cmpxchg(&shutting_down, old_state, new_state);
+	} while (old_state != prev_state);
+
+	/* Either we kick off the work, or we leave it to xen_suspend(). */
+	if (old_state == SHUTDOWN_INVALID)
+		schedule_work(&shutdown_work);
+	else
+		BUG_ON(old_state != SHUTDOWN_RESUMING);
+}
+
  static void __shutdown_handler(void *unused)
  {
  	int err;



It is doing a busy-wait loop till suspend-resume cycle completes, if 
any.  I found that it may go more than 25 msec sometimes.




@@ -220,26 +247,24 @@ static struct xenbus_watch sysrq_watch =

  static irqreturn_t suspend_int(int irq, void* dev_id, struct pt_regs 
*ptregs)
  {
-	shutting_down = SHUTDOWN_SUSPEND;
-	schedule_work(&shutdown_work);
-
+	switch_shutdown_state(SHUTDOWN_SUSPEND);
  	return IRQ_HANDLED;
  }



And you are calling the above busy-wait loop from within an irq handler. 
AFAIK it runs in interrupt-context and might cause dead-lock on 
uniprocessor setups (especially if underlying process context belongs to 
suspend thread itself).



regards,
--
bvk-chaitanya

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-07-31 15:24                     ` Keir Fraser
@ 2008-08-01  5:31                       ` BVK Chaitanya
  2008-08-01  5:58                         ` BVK Chaitanya
  0 siblings, 1 reply; 16+ messages in thread
From: BVK Chaitanya @ 2008-08-01  5:31 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen-devel

Keir Fraser wrote:
> 
> On 31/7/08 16:27, "BVK Chaitanya" <bayapuneni_chaitanya@symantec.com> wrote:
> 
>> It is doing a busy-wait loop till suspend-resume cycle completes, if
>> any.  I found that it may go more than 25 msec sometimes.
> 
> It's not a busy-wait loop. As long as shutdown_state does not change under
> its feet, it will complete in no more than two iterations.
> 

Yep, my mistake :-(


I still have a concern, but it may not be important for xen-3.3 as of 
now.  Let me explain:

Dom0 can trigger a suspend-resume cycle and can wait for suspended 
notification back through event channel (and subscribe domctl).  When 
dom0 is done with its checkpoint-ing or any work it can resume the domU.

But resuming the domU doesn't complete suspend-resume cycle. 
Suspend-resume cycle is completed only after domU completes the 
xenbus_suspend_cancel function (I saw it taking more than 25msec.) 
Suspend requests sent during this suspend-cancel time are _lost_.

If we assume dom0 shouldn't send suspend requests during suspend-cancel, 
there must be some way for dom0 to know when suspend-cancel is 
completed.  AFAIK this doesn't exists in the current state.


Does it clarify my concern?  Shall i bring this issue back after xen-3.3 
is release work is done?


--
bvk-chaitanya

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-08-01  5:31                       ` BVK Chaitanya
@ 2008-08-01  5:58                         ` BVK Chaitanya
  2008-08-01  8:01                           ` Keir Fraser
  0 siblings, 1 reply; 16+ messages in thread
From: BVK Chaitanya @ 2008-08-01  5:58 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Xen-devel

BVK Chaitanya wrote:
> Keir Fraser wrote:
>>
>> On 31/7/08 16:27, "BVK Chaitanya" <bayapuneni_chaitanya@symantec.com> 
>> wrote:
>>
>>> It is doing a busy-wait loop till suspend-resume cycle completes, if
>>> any.  I found that it may go more than 25 msec sometimes.
>>
>> It's not a busy-wait loop. As long as shutdown_state does not change 
>> under
>> its feet, it will complete in no more than two iterations.
>>
> 
> Yep, my mistake :-(
> 

My mistake again, forget my last message.  It doesn't loose any suspend 
requests.


I should say its a nice piece of code; brain teasing after a long time :-)


regards,
--
bvk-chaitanya

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] serialize suspend-resume process
  2008-08-01  5:58                         ` BVK Chaitanya
@ 2008-08-01  8:01                           ` Keir Fraser
  0 siblings, 0 replies; 16+ messages in thread
From: Keir Fraser @ 2008-08-01  8:01 UTC (permalink / raw)
  To: BVK Chaitanya; +Cc: Xen-devel

On 1/8/08 06:58, "BVK Chaitanya" <bayapuneni_chaitanya@symantec.com> wrote:

>> Yep, my mistake :-(
>> 
> 
> My mistake again, forget my last message.  It doesn't loose any suspend
> requests.
> 
> I should say its a nice piece of code; brain teasing after a long time :-)

My PhD involved building data structures with cmpxchg, so I'm quite handy at
it. ;-)

 -- Keir

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2008-08-01  8:01 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-31 11:09 [PATCH] serialize suspend-resume process BVK Chaitanya
2008-07-31 11:17 ` Keir Fraser
2008-07-31 11:57   ` BVK Chaitanya
2008-07-31 12:23     ` Keir Fraser
2008-07-31 13:04       ` BVK Chaitanya
2008-07-31 13:07         ` Keir Fraser
2008-07-31 13:34           ` BVK Chaitanya
2008-07-31 13:46             ` Keir Fraser
2008-07-31 14:10               ` BVK Chaitanya
2008-07-31 14:12                 ` Neil Turton
2008-07-31 14:36                 ` Keir Fraser
2008-07-31 15:27                   ` BVK Chaitanya
2008-07-31 15:24                     ` Keir Fraser
2008-08-01  5:31                       ` BVK Chaitanya
2008-08-01  5:58                         ` BVK Chaitanya
2008-08-01  8:01                           ` Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.