Linux Container Development
 help / color / mirror / Atom feed
* [PATCH 0/2] cryo: Re-enable checkpointing of thread area
@ 2008-06-11 14:13 Benjamin Thery
  2008-06-11 14:14 ` [PATCH 1/2] cryo: re-enable " Benjamin Thery
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Benjamin Thery @ 2008-06-11 14:13 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Containers, Benjamin Thery

I found the cause of one of the general protection faults I saw with
my test program and I finally managed to completely restart (a very 
dumb) program for the first time!

My program was failing (GPF) at restart in glibc code. After some 
debugging I found the failures occur on SINGLE_THREAD_P calls 
(eg. glibc/sysdeps/posix/system.c:__libc_system()).

I suspected a problem with nptl and remembered the comments in cr.c 
("for redhat 9.0, NPTL") and in cr.txt ("Support linuxthreads, but not 
NPTL."). I uncommented this code that checkpoints the thread area 
(don't ask me what it is) and, voila, my program restarted!

It doesn't solve everything: I still have issues restarting the 'sleep'
program.

Benjamin

-- 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] cryo: re-enable checkpointing of thread area
  2008-06-11 14:13 [PATCH 0/2] cryo: Re-enable checkpointing of thread area Benjamin Thery
@ 2008-06-11 14:14 ` Benjamin Thery
       [not found]   ` <20080611141408.977819123-4vkkeT0zb4ZEtYaxpPmRp1aPQRlvutdw@public.gmane.org>
  2008-06-11 14:14 ` [PATCH 2/2] cryo: minimal test program Benjamin Thery
       [not found] ` <20080611141350.541711754-4vkkeT0zb4ZEtYaxpPmRp1aPQRlvutdw@public.gmane.org>
  2 siblings, 1 reply; 5+ messages in thread
From: Benjamin Thery @ 2008-06-11 14:14 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Containers, Benjamin Thery

This patch re-enable the code that checkpoints (and restore) and thread
area (ldt) using ptrace_get_thread_area(). This is seem to improve the 
situation a lot on systems with NPTL: it solved one of the general 
protection fault I had when restarting a program.

Signed-off-by: Benjamin Thery <benjamin.thery-6ktuUTfB/bM@public.gmane.org>
---
 cr.c |   17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

Index: cryodev/cr.c
===================================================================
--- cryodev.orig/cr.c
+++ cryodev/cr.c
@@ -24,7 +24,7 @@
 #include <signal.h>
 #include <errno.h>
 
-#include <asm/ldt.h>	/* for redhat 9.0, NPTL */
+#include <asm/ldt.h>	/* for NPTL */
 
 #include "utils.h"
 #include "sci.h"
@@ -513,14 +513,12 @@ static int save_process_data(pid_t pid, 
 		return 0;
 	}
 
-	/* This is required in redhat9 */
-#if 0
+	/* This is required for NPTL */
 	{
-		modify_ldt_t ldt;
+		struct user_desc ldt;
 		if (ptrace_get_thread_area(pid, &ldt) == 0)
 			write_item(fd, "ldt", (void *)&ldt, sizeof(ldt));
 	}
-#endif
 
 	snprintf(fname, sizeof(fname), "/proc/%u/exe", pid);
 	memset(exe, 0, sizeof(exe));
@@ -1237,7 +1235,7 @@ static int process_restart(int fd, int m
 	char *exe = NULL, *cwd = NULL, *sargv = NULL, *senv = NULL;
 	struct user_regs_struct *regs = NULL;
 	struct user_fpregs_struct *fpregs = NULL;
-	//modify_ldt_t *ldt = NULL;
+	struct user_desc *ldt = NULL;
 	int *exitsig = NULL;
 	sigset_t *sigmask = NULL, *sigpend = NULL;
 	struct sigaction *sigact = NULL;
@@ -1262,7 +1260,7 @@ static int process_restart(int fd, int m
 			Free(senv);
 			Free(regs);
 			Free(fpregs);
-			//Free(ldt);
+			Free(ldt);
 			Free(sigact);
 			Free(sigmask);
 			Free(sigpend);
@@ -1276,7 +1274,7 @@ static int process_restart(int fd, int m
 		else ITEM_SET(cwd, char);
 		else ITEM_SET(regs, struct user_regs_struct);
 		else ITEM_SET(fpregs, struct user_fpregs_struct);
-		//else ITEM_SET(ldt, modify_ldt_t);
+		else ITEM_SET(ldt, struct user_desc);
 		else ITEM_SET(sigact, struct sigaction);
 		else ITEM_SET(sigmask, sigset_t);
 		else ITEM_SET(sigpend, sigset_t);
@@ -1304,7 +1302,8 @@ static int process_restart(int fd, int m
 				ERROR("lh_hash_add(%p, %u, %p)\n", &hpid, (unsigned)*pid, (void *)npid);
 				return -1;
 			}
-			//if (ldt) ptrace_set_thread_area(npid, ldt);
+			if (ldt)
+				ptrace_set_thread_area(npid, ldt);
 			if (cwd) PT_CHDIR(npid, cwd);
 			restore_fd(fd, npid);
 		} else if (ITEM_IS("SOCK")) {

-- 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/2] cryo: minimal test program
  2008-06-11 14:13 [PATCH 0/2] cryo: Re-enable checkpointing of thread area Benjamin Thery
  2008-06-11 14:14 ` [PATCH 1/2] cryo: re-enable " Benjamin Thery
@ 2008-06-11 14:14 ` Benjamin Thery
       [not found] ` <20080611141350.541711754-4vkkeT0zb4ZEtYaxpPmRp1aPQRlvutdw@public.gmane.org>
  2 siblings, 0 replies; 5+ messages in thread
From: Benjamin Thery @ 2008-06-11 14:14 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Containers, Benjamin Thery

This is the dumb test program I managed to restart after I re-enabled
the checkpointing of thread area stuff.

Signed-off-by: Benjamin Thery <benjamin.thery-6ktuUTfB/bM@public.gmane.org>
--
---
 tests/Makefile  |    2 +-
 tests/compute.c |   17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

Index: cryodev/tests/Makefile
===================================================================
--- cryodev.orig/tests/Makefile
+++ cryodev/tests/Makefile
@@ -1,4 +1,4 @@
-TARGETS = sleep mksysvipc pause_asm
+TARGETS = sleep mksysvipc pause_asm compute
 
 CFLAGS = -static
 
Index: cryodev/tests/compute.c
===================================================================
--- /dev/null
+++ cryodev/tests/compute.c
@@ -0,0 +1,17 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+int main()
+{
+	int i = 0;
+	double f = 0;
+
+	printf("Running as %d\n", getpid());
+	while (i<1000000000) {
+		f = i / 0.000234567;
+		if (i%10000000 == 0)
+			printf("i is %d (pid %d)\n", i, getpid());
+		i++;
+	}
+}

-- 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 0/2] cryo: Re-enable checkpointing of thread area
       [not found] ` <20080611141350.541711754-4vkkeT0zb4ZEtYaxpPmRp1aPQRlvutdw@public.gmane.org>
@ 2008-06-11 14:41   ` Benjamin Thery
  0 siblings, 0 replies; 5+ messages in thread
From: Benjamin Thery @ 2008-06-11 14:41 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Containers

Benjamin Thery wrote:
> I found the cause of one of the general protection faults I saw with
> my test program and I finally managed to completely restart (a very 
> dumb) program for the first time!
> 
> My program was failing (GPF) at restart in glibc code. After some 
> debugging I found the failures occur on SINGLE_THREAD_P calls 
> (eg. glibc/sysdeps/posix/system.c:__libc_system()).
> 
> I suspected a problem with nptl and remembered the comments in cr.c 
> ("for redhat 9.0, NPTL") and in cr.txt ("Support linuxthreads, but not 
> NPTL."). I uncommented this code that checkpoints the thread area 
> (don't ask me what it is) and, voila, my program restarted!
> 
> It doesn't solve everything: I still have issues restarting the 'sleep'
> program.

I spoke too fast... in fact I have no more issue with sleep or mksysvipc 
programs. They both restart fine now. :)

Benjamin


-- 
B e n j a m i n   T h e r y  - BULL/DT/Open Software R&D

    http://www.bull.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] cryo: re-enable checkpointing of thread area
       [not found]   ` <20080611141408.977819123-4vkkeT0zb4ZEtYaxpPmRp1aPQRlvutdw@public.gmane.org>
@ 2008-06-11 15:28     ` Serge E. Hallyn
  0 siblings, 0 replies; 5+ messages in thread
From: Serge E. Hallyn @ 2008-06-11 15:28 UTC (permalink / raw)
  To: Benjamin Thery; +Cc: Containers

Quoting Benjamin Thery (benjamin.thery-6ktuUTfB/bM@public.gmane.org):
> This patch re-enable the code that checkpoints (and restore) and thread
> area (ldt) using ptrace_get_thread_area(). This is seem to improve the 
> situation a lot on systems with NPTL: it solved one of the general 
> protection fault I had when restarting a program.
> 
> Signed-off-by: Benjamin Thery <benjamin.thery-6ktuUTfB/bM@public.gmane.org>

Benjamin, you rock.  This fixes my kvm image as well.

Nadia, Suka, could you confirm that this does *not* break cryo on your
systems?

Patched added to git tree.

thanks,
-serge

> ---
>  cr.c |   17 ++++++++---------
>  1 file changed, 8 insertions(+), 9 deletions(-)
> 
> Index: cryodev/cr.c
> ===================================================================
> --- cryodev.orig/cr.c
> +++ cryodev/cr.c
> @@ -24,7 +24,7 @@
>  #include <signal.h>
>  #include <errno.h>
> 
> -#include <asm/ldt.h>	/* for redhat 9.0, NPTL */
> +#include <asm/ldt.h>	/* for NPTL */
> 
>  #include "utils.h"
>  #include "sci.h"
> @@ -513,14 +513,12 @@ static int save_process_data(pid_t pid, 
>  		return 0;
>  	}
> 
> -	/* This is required in redhat9 */
> -#if 0
> +	/* This is required for NPTL */
>  	{
> -		modify_ldt_t ldt;
> +		struct user_desc ldt;
>  		if (ptrace_get_thread_area(pid, &ldt) == 0)
>  			write_item(fd, "ldt", (void *)&ldt, sizeof(ldt));
>  	}
> -#endif
> 
>  	snprintf(fname, sizeof(fname), "/proc/%u/exe", pid);
>  	memset(exe, 0, sizeof(exe));
> @@ -1237,7 +1235,7 @@ static int process_restart(int fd, int m
>  	char *exe = NULL, *cwd = NULL, *sargv = NULL, *senv = NULL;
>  	struct user_regs_struct *regs = NULL;
>  	struct user_fpregs_struct *fpregs = NULL;
> -	//modify_ldt_t *ldt = NULL;
> +	struct user_desc *ldt = NULL;
>  	int *exitsig = NULL;
>  	sigset_t *sigmask = NULL, *sigpend = NULL;
>  	struct sigaction *sigact = NULL;
> @@ -1262,7 +1260,7 @@ static int process_restart(int fd, int m
>  			Free(senv);
>  			Free(regs);
>  			Free(fpregs);
> -			//Free(ldt);
> +			Free(ldt);
>  			Free(sigact);
>  			Free(sigmask);
>  			Free(sigpend);
> @@ -1276,7 +1274,7 @@ static int process_restart(int fd, int m
>  		else ITEM_SET(cwd, char);
>  		else ITEM_SET(regs, struct user_regs_struct);
>  		else ITEM_SET(fpregs, struct user_fpregs_struct);
> -		//else ITEM_SET(ldt, modify_ldt_t);
> +		else ITEM_SET(ldt, struct user_desc);
>  		else ITEM_SET(sigact, struct sigaction);
>  		else ITEM_SET(sigmask, sigset_t);
>  		else ITEM_SET(sigpend, sigset_t);
> @@ -1304,7 +1302,8 @@ static int process_restart(int fd, int m
>  				ERROR("lh_hash_add(%p, %u, %p)\n", &hpid, (unsigned)*pid, (void *)npid);
>  				return -1;
>  			}
> -			//if (ldt) ptrace_set_thread_area(npid, ldt);
> +			if (ldt)
> +				ptrace_set_thread_area(npid, ldt);
>  			if (cwd) PT_CHDIR(npid, cwd);
>  			restore_fd(fd, npid);
>  		} else if (ITEM_IS("SOCK")) {
> 
> -- 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2008-06-11 15:28 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-11 14:13 [PATCH 0/2] cryo: Re-enable checkpointing of thread area Benjamin Thery
2008-06-11 14:14 ` [PATCH 1/2] cryo: re-enable " Benjamin Thery
     [not found]   ` <20080611141408.977819123-4vkkeT0zb4ZEtYaxpPmRp1aPQRlvutdw@public.gmane.org>
2008-06-11 15:28     ` Serge E. Hallyn
2008-06-11 14:14 ` [PATCH 2/2] cryo: minimal test program Benjamin Thery
     [not found] ` <20080611141350.541711754-4vkkeT0zb4ZEtYaxpPmRp1aPQRlvutdw@public.gmane.org>
2008-06-11 14:41   ` [PATCH 0/2] cryo: Re-enable checkpointing of thread area Benjamin Thery

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox