All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] cryo: Re-enable checkpointing of thread area
@ 2008-06-11 14:13 Benjamin Thery
  2008-06-11 14:14 ` [PATCH 1/2] cryo: re-enable " Benjamin Thery
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Benjamin Thery @ 2008-06-11 14:13 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Containers, Benjamin Thery

I found the cause of one of the general protection faults I saw with
my test program and I finally managed to completely restart (a very 
dumb) program for the first time!

My program was failing (GPF) at restart in glibc code. After some 
debugging I found the failures occur on SINGLE_THREAD_P calls 
(eg. glibc/sysdeps/posix/system.c:__libc_system()).

I suspected a problem with nptl and remembered the comments in cr.c 
("for redhat 9.0, NPTL") and in cr.txt ("Support linuxthreads, but not 
NPTL."). I uncommented this code that checkpoints the thread area 
(don't ask me what it is) and, voila, my program restarted!

It doesn't solve everything: I still have issues restarting the 'sleep'
program.

Benjamin

-- 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] cryo: re-enable checkpointing of thread area
  2008-06-11 14:13 [PATCH 0/2] cryo: Re-enable checkpointing of thread area Benjamin Thery
@ 2008-06-11 14:14 ` Benjamin Thery
       [not found]   ` <20080611141408.977819123-4vkkeT0zb4ZEtYaxpPmRp1aPQRlvutdw@public.gmane.org>
  2008-06-11 14:14 ` [PATCH 2/2] cryo: minimal test program Benjamin Thery
       [not found] ` <20080611141350.541711754-4vkkeT0zb4ZEtYaxpPmRp1aPQRlvutdw@public.gmane.org>
  2 siblings, 1 reply; 5+ messages in thread
From: Benjamin Thery @ 2008-06-11 14:14 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Containers, Benjamin Thery

This patch re-enable the code that checkpoints (and restore) and thread
area (ldt) using ptrace_get_thread_area(). This is seem to improve the 
situation a lot on systems with NPTL: it solved one of the general 
protection fault I had when restarting a program.

Signed-off-by: Benjamin Thery <benjamin.thery-6ktuUTfB/bM@public.gmane.org>
---
 cr.c |   17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

Index: cryodev/cr.c
===================================================================
--- cryodev.orig/cr.c
+++ cryodev/cr.c
@@ -24,7 +24,7 @@
 #include <signal.h>
 #include <errno.h>
 
-#include <asm/ldt.h>	/* for redhat 9.0, NPTL */
+#include <asm/ldt.h>	/* for NPTL */
 
 #include "utils.h"
 #include "sci.h"
@@ -513,14 +513,12 @@ static int save_process_data(pid_t pid, 
 		return 0;
 	}
 
-	/* This is required in redhat9 */
-#if 0
+	/* This is required for NPTL */
 	{
-		modify_ldt_t ldt;
+		struct user_desc ldt;
 		if (ptrace_get_thread_area(pid, &ldt) == 0)
 			write_item(fd, "ldt", (void *)&ldt, sizeof(ldt));
 	}
-#endif
 
 	snprintf(fname, sizeof(fname), "/proc/%u/exe", pid);
 	memset(exe, 0, sizeof(exe));
@@ -1237,7 +1235,7 @@ static int process_restart(int fd, int m
 	char *exe = NULL, *cwd = NULL, *sargv = NULL, *senv = NULL;
 	struct user_regs_struct *regs = NULL;
 	struct user_fpregs_struct *fpregs = NULL;
-	//modify_ldt_t *ldt = NULL;
+	struct user_desc *ldt = NULL;
 	int *exitsig = NULL;
 	sigset_t *sigmask = NULL, *sigpend = NULL;
 	struct sigaction *sigact = NULL;
@@ -1262,7 +1260,7 @@ static int process_restart(int fd, int m
 			Free(senv);
 			Free(regs);
 			Free(fpregs);
-			//Free(ldt);
+			Free(ldt);
 			Free(sigact);
 			Free(sigmask);
 			Free(sigpend);
@@ -1276,7 +1274,7 @@ static int process_restart(int fd, int m
 		else ITEM_SET(cwd, char);
 		else ITEM_SET(regs, struct user_regs_struct);
 		else ITEM_SET(fpregs, struct user_fpregs_struct);
-		//else ITEM_SET(ldt, modify_ldt_t);
+		else ITEM_SET(ldt, struct user_desc);
 		else ITEM_SET(sigact, struct sigaction);
 		else ITEM_SET(sigmask, sigset_t);
 		else ITEM_SET(sigpend, sigset_t);
@@ -1304,7 +1302,8 @@ static int process_restart(int fd, int m
 				ERROR("lh_hash_add(%p, %u, %p)\n", &hpid, (unsigned)*pid, (void *)npid);
 				return -1;
 			}
-			//if (ldt) ptrace_set_thread_area(npid, ldt);
+			if (ldt)
+				ptrace_set_thread_area(npid, ldt);
 			if (cwd) PT_CHDIR(npid, cwd);
 			restore_fd(fd, npid);
 		} else if (ITEM_IS("SOCK")) {

-- 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/2] cryo: minimal test program
  2008-06-11 14:13 [PATCH 0/2] cryo: Re-enable checkpointing of thread area Benjamin Thery
  2008-06-11 14:14 ` [PATCH 1/2] cryo: re-enable " Benjamin Thery
@ 2008-06-11 14:14 ` Benjamin Thery
       [not found] ` <20080611141350.541711754-4vkkeT0zb4ZEtYaxpPmRp1aPQRlvutdw@public.gmane.org>
  2 siblings, 0 replies; 5+ messages in thread
From: Benjamin Thery @ 2008-06-11 14:14 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Containers, Benjamin Thery

This is the dumb test program I managed to restart after I re-enabled
the checkpointing of thread area stuff.

Signed-off-by: Benjamin Thery <benjamin.thery-6ktuUTfB/bM@public.gmane.org>
--
---
 tests/Makefile  |    2 +-
 tests/compute.c |   17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

Index: cryodev/tests/Makefile
===================================================================
--- cryodev.orig/tests/Makefile
+++ cryodev/tests/Makefile
@@ -1,4 +1,4 @@
-TARGETS = sleep mksysvipc pause_asm
+TARGETS = sleep mksysvipc pause_asm compute
 
 CFLAGS = -static
 
Index: cryodev/tests/compute.c
===================================================================
--- /dev/null
+++ cryodev/tests/compute.c
@@ -0,0 +1,17 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+int main()
+{
+	int i = 0;
+	double f = 0;
+
+	printf("Running as %d\n", getpid());
+	while (i<1000000000) {
+		f = i / 0.000234567;
+		if (i%10000000 == 0)
+			printf("i is %d (pid %d)\n", i, getpid());
+		i++;
+	}
+}

-- 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 0/2] cryo: Re-enable checkpointing of thread area
       [not found] ` <20080611141350.541711754-4vkkeT0zb4ZEtYaxpPmRp1aPQRlvutdw@public.gmane.org>
@ 2008-06-11 14:41   ` Benjamin Thery
  0 siblings, 0 replies; 5+ messages in thread
From: Benjamin Thery @ 2008-06-11 14:41 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Containers

Benjamin Thery wrote:
> I found the cause of one of the general protection faults I saw with
> my test program and I finally managed to completely restart (a very 
> dumb) program for the first time!
> 
> My program was failing (GPF) at restart in glibc code. After some 
> debugging I found the failures occur on SINGLE_THREAD_P calls 
> (eg. glibc/sysdeps/posix/system.c:__libc_system()).
> 
> I suspected a problem with nptl and remembered the comments in cr.c 
> ("for redhat 9.0, NPTL") and in cr.txt ("Support linuxthreads, but not 
> NPTL."). I uncommented this code that checkpoints the thread area 
> (don't ask me what it is) and, voila, my program restarted!
> 
> It doesn't solve everything: I still have issues restarting the 'sleep'
> program.

I spoke too fast... in fact I have no more issue with sleep or mksysvipc 
programs. They both restart fine now. :)

Benjamin


-- 
B e n j a m i n   T h e r y  - BULL/DT/Open Software R&D

    http://www.bull.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] cryo: re-enable checkpointing of thread area
       [not found]   ` <20080611141408.977819123-4vkkeT0zb4ZEtYaxpPmRp1aPQRlvutdw@public.gmane.org>
@ 2008-06-11 15:28     ` Serge E. Hallyn
  0 siblings, 0 replies; 5+ messages in thread
From: Serge E. Hallyn @ 2008-06-11 15:28 UTC (permalink / raw)
  To: Benjamin Thery; +Cc: Containers

Quoting Benjamin Thery (benjamin.thery-6ktuUTfB/bM@public.gmane.org):
> This patch re-enable the code that checkpoints (and restore) and thread
> area (ldt) using ptrace_get_thread_area(). This is seem to improve the 
> situation a lot on systems with NPTL: it solved one of the general 
> protection fault I had when restarting a program.
> 
> Signed-off-by: Benjamin Thery <benjamin.thery-6ktuUTfB/bM@public.gmane.org>

Benjamin, you rock.  This fixes my kvm image as well.

Nadia, Suka, could you confirm that this does *not* break cryo on your
systems?

Patched added to git tree.

thanks,
-serge

> ---
>  cr.c |   17 ++++++++---------
>  1 file changed, 8 insertions(+), 9 deletions(-)
> 
> Index: cryodev/cr.c
> ===================================================================
> --- cryodev.orig/cr.c
> +++ cryodev/cr.c
> @@ -24,7 +24,7 @@
>  #include <signal.h>
>  #include <errno.h>
> 
> -#include <asm/ldt.h>	/* for redhat 9.0, NPTL */
> +#include <asm/ldt.h>	/* for NPTL */
> 
>  #include "utils.h"
>  #include "sci.h"
> @@ -513,14 +513,12 @@ static int save_process_data(pid_t pid, 
>  		return 0;
>  	}
> 
> -	/* This is required in redhat9 */
> -#if 0
> +	/* This is required for NPTL */
>  	{
> -		modify_ldt_t ldt;
> +		struct user_desc ldt;
>  		if (ptrace_get_thread_area(pid, &ldt) == 0)
>  			write_item(fd, "ldt", (void *)&ldt, sizeof(ldt));
>  	}
> -#endif
> 
>  	snprintf(fname, sizeof(fname), "/proc/%u/exe", pid);
>  	memset(exe, 0, sizeof(exe));
> @@ -1237,7 +1235,7 @@ static int process_restart(int fd, int m
>  	char *exe = NULL, *cwd = NULL, *sargv = NULL, *senv = NULL;
>  	struct user_regs_struct *regs = NULL;
>  	struct user_fpregs_struct *fpregs = NULL;
> -	//modify_ldt_t *ldt = NULL;
> +	struct user_desc *ldt = NULL;
>  	int *exitsig = NULL;
>  	sigset_t *sigmask = NULL, *sigpend = NULL;
>  	struct sigaction *sigact = NULL;
> @@ -1262,7 +1260,7 @@ static int process_restart(int fd, int m
>  			Free(senv);
>  			Free(regs);
>  			Free(fpregs);
> -			//Free(ldt);
> +			Free(ldt);
>  			Free(sigact);
>  			Free(sigmask);
>  			Free(sigpend);
> @@ -1276,7 +1274,7 @@ static int process_restart(int fd, int m
>  		else ITEM_SET(cwd, char);
>  		else ITEM_SET(regs, struct user_regs_struct);
>  		else ITEM_SET(fpregs, struct user_fpregs_struct);
> -		//else ITEM_SET(ldt, modify_ldt_t);
> +		else ITEM_SET(ldt, struct user_desc);
>  		else ITEM_SET(sigact, struct sigaction);
>  		else ITEM_SET(sigmask, sigset_t);
>  		else ITEM_SET(sigpend, sigset_t);
> @@ -1304,7 +1302,8 @@ static int process_restart(int fd, int m
>  				ERROR("lh_hash_add(%p, %u, %p)\n", &hpid, (unsigned)*pid, (void *)npid);
>  				return -1;
>  			}
> -			//if (ldt) ptrace_set_thread_area(npid, ldt);
> +			if (ldt)
> +				ptrace_set_thread_area(npid, ldt);
>  			if (cwd) PT_CHDIR(npid, cwd);
>  			restore_fd(fd, npid);
>  		} else if (ITEM_IS("SOCK")) {
> 
> -- 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2008-06-11 15:28 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-11 14:13 [PATCH 0/2] cryo: Re-enable checkpointing of thread area Benjamin Thery
2008-06-11 14:14 ` [PATCH 1/2] cryo: re-enable " Benjamin Thery
     [not found]   ` <20080611141408.977819123-4vkkeT0zb4ZEtYaxpPmRp1aPQRlvutdw@public.gmane.org>
2008-06-11 15:28     ` Serge E. Hallyn
2008-06-11 14:14 ` [PATCH 2/2] cryo: minimal test program Benjamin Thery
     [not found] ` <20080611141350.541711754-4vkkeT0zb4ZEtYaxpPmRp1aPQRlvutdw@public.gmane.org>
2008-06-11 14:41   ` [PATCH 0/2] cryo: Re-enable checkpointing of thread area Benjamin Thery

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.