xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 2/2] reap the blktapctl thread and notify the tapdisk backend driver to release resource like memory..
@ 2010-05-05  3:22 James Song
  2010-05-06 16:01 ` Ian Jackson
  0 siblings, 1 reply; 7+ messages in thread
From: James Song @ 2010-05-05  3:22 UTC (permalink / raw)
  To: ian.jackson, xen-devel, Jim Fehlig


[-- Attachment #1.1: Type: text/plain, Size: 2995 bytes --]

Hi,
 
For this issue I had initial discussion thread before, more detail, please see :
http://lists.xensource.com/archives/html/xen-devel/2010-04/msg01140.html.
 
I write a new patch for this issue, which modified qemu code.  So Ian, could you take a look this patch,too.
 
thanks,
-James (Song Wei)
 
Signed-off-by: James ( Song Wei ) <jsong@novell.com>
 
diff -r 2cc9ec0cf061 i386-dm/helper2.c
--- a/i386-dm/helper2.c Fri Apr 30 17:41:45 2010 +0100
+++ b/i386-dm/helper2.c Wed May 05 10:57:33 2010 +0800
@@ -550,12 +550,27 @@
 
 int xen_pause_requested;
 
+int finish_work_shutdown(int timeout)
+{
+    extern int shutdown_requested;
+    extern int connected_disks;
+    if (shutdown_requested){
+        while(connected_disks > 0 &&  timeout > 0){
+            main_loop_wait(1);
+            timeout--;
+        }
+    }
+    raise(SIGKILL);
+    return 1;
+}
+    
 int main_loop(void)
 {
     CPUState *env = cpu_single_env;
     int evtchn_fd = xce_handle == -1 ? -1 : xc_evtchn_fd(xce_handle);
     char *qemu_file;
     fd_set fds;
+    extern int shutdown_requested;
 
     main_loop_prepare();
 
@@ -571,9 +586,12 @@
     qemu_set_fd_handler(xenstore_fd(), xenstore_process_event, NULL, NULL);
 
     while (1) {
-        while (!(vm_running && xen_pause_requested))
+        while (!(vm_running && xen_pause_requested)){
+            if (shutdown_requested)
+                finish_work_shutdown(2000); //timeout 5s
             /* Wait up to 10 msec. */
             main_loop_wait(10);
+       }
 
         fprintf(logfile, "device model saving state\n");
 
@@ -595,6 +613,9 @@
             FD_SET(xenstore_fd(), &fds);
             if (select(xenstore_fd() + 1, &fds, NULL, NULL, NULL) > 0)
                 xenstore_process_event(NULL);
+            if (shutdown_requested)
+                finish_work_shutdown(2000);
+
         }
 
         xenstore_record_dm_state("running");
diff -r 2cc9ec0cf061 vl.c
--- a/vl.c Fri Apr 30 17:41:45 2010 +0100
+++ b/vl.c Wed May 05 10:57:33 2010 +0800
@@ -3614,7 +3614,7 @@
 
 static QEMUResetEntry *first_reset_entry;
 static int reset_requested;
-static int shutdown_requested;
+int shutdown_requested;
 static int powerdown_requested;
 
 int qemu_shutdown_requested(void)
@@ -4699,9 +4699,11 @@
 
     memset(&act, 0, sizeof(act));
     act.sa_handler = termsig_handler;
+#ifndef CONFIG_DM
     sigaction(SIGINT,  &act, NULL);
+    sigaction(SIGTERM, &act, NULL);
+#endif
     sigaction(SIGHUP,  &act, NULL);
-    sigaction(SIGTERM, &act, NULL);
 }
 
 #endif
@@ -5851,11 +5853,9 @@
     register_savevm_live("ram", 0, 3, ram_save_live, NULL, ram_load, NULL);
 
 #ifndef _WIN32
-#ifndef CONFIG_DM
     /* must be after terminal init, SDL library changes signal handlers */
     termsig_setup();
 #endif
-#endif
 
     /* Maintain compatibility with multiple stdio monitors */
     if (!strcmp(monitor_device,"stdio")) {
 
 
 

 

[-- Attachment #1.2: Type: text/html, Size: 5422 bytes --]

[-- Attachment #2: wait_for_tapdisk_close.diff --]
[-- Type: text/plain, Size: 2612 bytes --]

diff -r 2cc9ec0cf061 i386-dm/helper2.c
--- a/i386-dm/helper2.c	Fri Apr 30 17:41:45 2010 +0100
+++ b/i386-dm/helper2.c	Wed May 05 10:57:33 2010 +0800
@@ -550,12 +550,27 @@
 
 int xen_pause_requested;
 
+int finish_work_shutdown(int timeout)
+{
+    extern int shutdown_requested;
+    extern int connected_disks;
+    if (shutdown_requested){
+        while(connected_disks > 0 &&  timeout > 0){
+            main_loop_wait(1);
+            timeout--;
+        }
+    }
+    raise(SIGKILL);
+    return 1;
+}
+    
 int main_loop(void)
 {
     CPUState *env = cpu_single_env;
     int evtchn_fd = xce_handle == -1 ? -1 : xc_evtchn_fd(xce_handle);
     char *qemu_file;
     fd_set fds;
+    extern int shutdown_requested;
 
     main_loop_prepare();
 
@@ -571,9 +586,12 @@
     qemu_set_fd_handler(xenstore_fd(), xenstore_process_event, NULL, NULL);
 
     while (1) {
-        while (!(vm_running && xen_pause_requested))
+        while (!(vm_running && xen_pause_requested)){
+            if (shutdown_requested)
+                finish_work_shutdown(2000); //timeout 5s
             /* Wait up to 10 msec. */
             main_loop_wait(10);
+       }
 
         fprintf(logfile, "device model saving state\n");
 
@@ -595,6 +613,9 @@
             FD_SET(xenstore_fd(), &fds);
             if (select(xenstore_fd() + 1, &fds, NULL, NULL, NULL) > 0)
                 xenstore_process_event(NULL);
+            if (shutdown_requested)
+                finish_work_shutdown(2000);
+
         }
 
         xenstore_record_dm_state("running");
diff -r 2cc9ec0cf061 vl.c
--- a/vl.c	Fri Apr 30 17:41:45 2010 +0100
+++ b/vl.c	Wed May 05 10:57:33 2010 +0800
@@ -3614,7 +3614,7 @@
 
 static QEMUResetEntry *first_reset_entry;
 static int reset_requested;
-static int shutdown_requested;
+int shutdown_requested;
 static int powerdown_requested;
 
 int qemu_shutdown_requested(void)
@@ -4699,9 +4699,11 @@
 
     memset(&act, 0, sizeof(act));
     act.sa_handler = termsig_handler;
+#ifndef CONFIG_DM
     sigaction(SIGINT,  &act, NULL);
+    sigaction(SIGTERM, &act, NULL);
+#endif
     sigaction(SIGHUP,  &act, NULL);
-    sigaction(SIGTERM, &act, NULL);
 }
 
 #endif
@@ -5851,11 +5853,9 @@
     register_savevm_live("ram", 0, 3, ram_save_live, NULL, ram_load, NULL);
 
 #ifndef _WIN32
-#ifndef CONFIG_DM
     /* must be after terminal init, SDL library changes signal handlers */
     termsig_setup();
 #endif
-#endif
 
     /* Maintain compatibility with multiple stdio monitors */
     if (!strcmp(monitor_device,"stdio")) {

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] reap the blktapctl thread and notify the tapdisk backend driver to release resource like memory..
  2010-05-05  3:22 [PATCH 2/2] reap the blktapctl thread and notify the tapdisk backend driver to release resource like memory James Song
@ 2010-05-06 16:01 ` Ian Jackson
  2010-05-07  7:20   ` Jan Beulich
  0 siblings, 1 reply; 7+ messages in thread
From: Ian Jackson @ 2010-05-06 16:01 UTC (permalink / raw)
  To: James Song; +Cc: Jim Fehlig, xen-devel

James Song writes ("[Xen-devel] [PATCH 2/2] reap the blktapctl thread and notify the tapdisk backend driver to release resource like memory.."):
> I write a new patch for this issue, which modified qemu code.  So
> Ian, could you take a look this patch,too.

As far as I can see the effect here is to catch SIGINT in order to do
some kind of cleanup.  I don't think that is right.  qemu could quite
easily crash (and does!) so it is not right for cleanup to happen in
qemu.

If there is any cleanup that needs doing it needs to be done by qemu's
caller.

Reading the message you refer to, surely it should be the job of the
toolstack (xend or libxl) to ensure that the backends are instructed
to do all necessary releasing ?

Ian.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] reap the blktapctl thread and notify the tapdisk backend driver to release resource like memory..
  2010-05-06 16:01 ` Ian Jackson
@ 2010-05-07  7:20   ` Jan Beulich
  2010-05-07 17:32     ` Ian Jackson
  0 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2010-05-07  7:20 UTC (permalink / raw)
  To: Ian Jackson, James Song; +Cc: Jim Fehlig, xen-devel

>>> Ian Jackson <Ian.Jackson@eu.citrix.com> 06.05.10 18:01 >>>
>James Song writes ("[Xen-devel] [PATCH 2/2] reap the blktapctl thread and notify the tapdisk backend driver to release resource like memory.."):
>> I write a new patch for this issue, which modified qemu code.  So
>> Ian, could you take a look this patch,too.
>
>As far as I can see the effect here is to catch SIGINT in order to do
>some kind of cleanup.  I don't think that is right.  qemu could quite
>easily crash (and does!) so it is not right for cleanup to happen in
>qemu.
>
>If there is any cleanup that needs doing it needs to be done by qemu's
>caller.
>
>Reading the message you refer to, surely it should be the job of the
>toolstack (xend or libxl) to ensure that the backends are instructed
>to do all necessary releasing ?

No (or not only): The cleanup done here is to close open file handles
and/or mmap-s associated with blktap. You may have seen the kernel
side patches to allow the system as a whole to recover from that
state (particularly when qemu-dm crashes), but in general I consider
it bad practice for an application to keep open huge amounts of
mapped memory when getting orderly terminated.

"Orderly" in the qemu-dm case unfortunately means being terminated
by a signal, hence the signal should be intercepted by qemu
(otherwise, i.e. in the current state) the design seems broken to me.

Having said that doesn't mean that I agree to the blktap-centric
approach taken by the patch. Imo global cleanup should be
performed by qemu-dm upon being terminated - the question just is
whether such code already exists (and just needs to be hooked up),
or whether that part is missing altogether and needs to be written
from scratch.

Jan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] reap the blktapctl thread and notify the tapdisk backend driver to release resource like memory..
  2010-05-07  7:20   ` Jan Beulich
@ 2010-05-07 17:32     ` Ian Jackson
  2010-05-10  7:06       ` Jan Beulich
  0 siblings, 1 reply; 7+ messages in thread
From: Ian Jackson @ 2010-05-07 17:32 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Jim Fehlig, xen-devel, James Song

Jan Beulich writes ("Re: [Xen-devel] [PATCH 2/2] reap the blktapctl thread and notify the tapdisk backend driver to release resource like memory.."):
> Ian Jackson <Ian.Jackson@eu.citrix.com> 06.05.10 18:01
> >Reading the message you refer to, surely it should be the job of the
> >toolstack (xend or libxl) to ensure that the backends are instructed
> >to do all necessary releasing ?
> 
> No (or not only): The cleanup done here is to close open file handles
> and/or mmap-s associated with blktap. You may have seen the kernel
> side patches to allow the system as a whole to recover from that
> state (particularly when qemu-dm crashes), but in general I consider
> it bad practice for an application to keep open huge amounts of
> mapped memory when getting orderly terminated.

Uh ?  I can't see anything at all wrong with letting the kernel do the
cleanup of memory mapped by and fds held by qemu.

The kernel already needs to have that code and if it's wrong or
incomplete (which you don't seem to be suggesting) then the system is
already broken; whereas if it's correct and complete then there is no
need for qemu to do anything.

In fact however there is allegedly some bug somewhere which this patch
is supposed to deal with, but I can't really see the connection.

> "Orderly" in the qemu-dm case unfortunately means being terminated
> by a signal, hence the signal should be intercepted by qemu
> (otherwise, i.e. in the current state) the design seems broken to me.

I think in general we should be aiming for crash-only software.
  http://dslab.epfl.ch/pubs/crashonly/crashonly.pdf
It's much much more reliable, as well as meaning we need to write less
code (and thus fewer bugs).

> Having said that doesn't mean that I agree to the blktap-centric
> approach taken by the patch. Imo global cleanup should be
> performed by qemu-dm upon being terminated - the question just is
> whether such code already exists (and just needs to be hooked up),
> or whether that part is missing altogether and needs to be written
> from scratch.

I can't see that there is anything that qemu should be relied upon to
do on its own termination.  If it can't be relied on to do it then we
need code elsewhere to do it (which we already have), and then there
is no need for qemu to have any code for it.

Ian.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] reap the blktapctl thread and notify the tapdisk backend driver to release resource like memory..
  2010-05-07 17:32     ` Ian Jackson
@ 2010-05-10  7:06       ` Jan Beulich
  2010-05-10 11:30         ` Ian Jackson
  0 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2010-05-10  7:06 UTC (permalink / raw)
  To: Ian Jackson; +Cc: Jim Fehlig, xen-devel, James Song

>>> Ian Jackson <Ian.Jackson@eu.citrix.com> 07.05.10 19:32 >>>
>In fact however there is allegedly some bug somewhere which this patch
>is supposed to deal with, but I can't really see the connection.

The bug was with the blktap kernel driver not being able to clean up
after an unclean exit of qemu. We had reports of this only for 3.4
and 4.0 (and I wonder how no-one else noticed this, when the bug
was introduced about a year ago, even before blktap2 got added),
yet the problematic blktap code also existed in those systems that
we ship with 3.2.3 and 3.3.1, hence either no-one ever noticed the
problem on those platforms, or there must be a behavioral
difference of qemu (i.e. cleaning up after itself in earlier versions).

I fully agree that the kernel should (or really has to) properly clean
up after any uncleanly exiting application, yet ...

>I think in general we should be aiming for crash-only software.
>  http://dslab.epfl.ch/pubs/crashonly/crashonly.pdf 
>It's much much more reliable, as well as meaning we need to write less
>code (and thus fewer bugs).

... I can see a philosophical point in this discussion (but I don't
agree that this is the only sensible position).

Jan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] reap the blktapctl thread and  notify the tapdisk backend driver to release resource like  memory..
  2010-05-10  7:06       ` Jan Beulich
@ 2010-05-10 11:30         ` Ian Jackson
  2010-05-10 12:10           ` Keir Fraser
  0 siblings, 1 reply; 7+ messages in thread
From: Ian Jackson @ 2010-05-10 11:30 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Jim Fehlig, xen-devel@lists.xensource.com, James Song

Jan Beulich writes ("Re: [Xen-devel] [PATCH 2/2] reap the blktapctl thread and	 notify the tapdisk backend driver to release resource like	 memory.."):
> The bug was with the blktap kernel driver not being able to clean up
> after an unclean exit of qemu. [....]

This is a serious kernel bug which absolutely must be fixed.  There is
no way the userland toolstack can promise that qemu won't just die.

Ian.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] reap the blktapctl thread and notify the tapdisk backend driver to release resource like  memory..
  2010-05-10 11:30         ` Ian Jackson
@ 2010-05-10 12:10           ` Keir Fraser
  0 siblings, 0 replies; 7+ messages in thread
From: Keir Fraser @ 2010-05-10 12:10 UTC (permalink / raw)
  To: Ian Jackson, Jan Beulich
  Cc: Jim Fehlig, xen-devel@lists.xensource.com, James Song

On 10/05/2010 12:30, "Ian Jackson" <Ian.Jackson@eu.citrix.com> wrote:

> Jan Beulich writes ("Re: [Xen-devel] [PATCH 2/2] reap the blktapctl thread
> and  notify the tapdisk backend driver to release resource like  memory.."):
>> The bug was with the blktap kernel driver not being able to clean up
>> after an unclean exit of qemu. [....]
> 
> This is a serious kernel bug which absolutely must be fixed.  There is
> no way the userland toolstack can promise that qemu won't just die.

Well it depends who/what sets up the tap context. Is it actually qemu, or is
the initial setup done by xend and then qemu merely plumbed into it?

 -- Keir

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2010-05-10 12:10 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-05  3:22 [PATCH 2/2] reap the blktapctl thread and notify the tapdisk backend driver to release resource like memory James Song
2010-05-06 16:01 ` Ian Jackson
2010-05-07  7:20   ` Jan Beulich
2010-05-07 17:32     ` Ian Jackson
2010-05-10  7:06       ` Jan Beulich
2010-05-10 11:30         ` Ian Jackson
2010-05-10 12:10           ` Keir Fraser

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).