All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] s390: let tasks know to restart syscalls after sys_restart()
@ 2010-01-28 23:35 ` Serge E. Hallyn
  0 siblings, 0 replies; 8+ messages in thread
From: Serge E. Hallyn @ 2010-01-28 23:35 UTC (permalink / raw)
  To: Linux Containers; +Cc: linux-s390-u79uwXL29TY76Z2rM5mHXA

(This is a patch against the checkpoint/restart kernel tree at
http://git.ncl.cs.columbia.edu/?p=linux-cr.git;a=shortlog;h=refs/heads/ckpt-v19-rc2.9)

On x86, do_signal() leaves -516 in eax while it freezes, which
sys_restart() can use to detect that it should restart the
syscall which was interrupted by a signal (or the freezer).

On s390, gprs[2] gets tweaked to -EINTR (-4) instead, leaving
us no reliable way to tell whether should be restarted.  If the
task is checkpointed here and then restarted, then the last part
of do_signal() won't be done, and the interrupted syscall won't
be restarted.

This patch defines TIF_RESTARTBLOCK as a thread flag showing that
the thread expects to be frozen while kicked out of a restartable
syscall by a signal.

The TIF_RESTARTBLOCK flag is only set for the duration of the
get get_signal_to_deliver() which is where the task may get
frozen.  We also set it in sys_restart() if the checkpointed task
had had TIF_RESTARTBLOCK set.  It will get cleared if upon exiting
sys_restart() we jump to sysc_sigpending.  If instead we jump back
to do_signal(), then TIF_RESTARTBLOCK will stay set again until
after get_signal_to_deliver() so that if it immediately freezes and
is re-checkpointed, the resulting second checkpoint image again
will have TIF_RESTARTBLOCK set.

To test this, add a dummy handler for SIGUSR1 to cr_tests/sleep/sleeptest.c,
open two terminals, and do:

Terminal 1:                             Terminal :
mount -t cgroup -o freezer freezer /cgroup
mkdir /cgroup/1 /cgroup/2
echo $$ > /cgroup/1/tasks
./sleeptest

				    echo FROZEN > /cgroup/1/freezer.state
				    checkpoint `pidof sleeptest` > /tmp/out
				    thaw

restart -F /cgroup/2 < /tmp/out

				    kill -USR1 `pidof sleeptest`;
				    echo THAWED > /cgroup/2/freezer.state ;
				    sleep 0.3;
				    echo FROZEN > /cgroup/2/freezer.state;
				    checkpoint `pidof sleeptest` > /tmp/out2;
				    echo THAWED > /cgroup/2/freezer.state

restart < /tmp/out2

Without this patch, the first restart will immediately exit.  Without
re-setting TIF_RESTARTBLOCK in sys_restart(), the second restart will
immediately exit.  With this full patch, it will restart the sleep
correctly.

Cc: linux-s390-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 arch/s390/include/asm/checkpoint_hdr.h |    5 +++
 arch/s390/include/asm/thread_info.h    |    2 +
 arch/s390/kernel/checkpoint.c          |   52 +++++++++++++++++++++++++++++++-
 arch/s390/kernel/entry64.S             |    2 +
 arch/s390/kernel/signal.c              |    6 ++++
 5 files changed, 66 insertions(+), 1 deletions(-)

diff --git a/arch/s390/include/asm/checkpoint_hdr.h b/arch/s390/include/asm/checkpoint_hdr.h
index a8c2a3d..fd8be2a 100644
--- a/arch/s390/include/asm/checkpoint_hdr.h
+++ b/arch/s390/include/asm/checkpoint_hdr.h
@@ -76,6 +76,11 @@ struct ckpt_hdr_cpu {
 	__u8 instruction_fetch;
 };
 
+struct ckpt_hdr_thread {
+	struct ckpt_hdr h;
+	__u64 thread_info_flags;
+};
+
 struct ckpt_hdr_mm_context {
 	struct ckpt_hdr h;
 	unsigned long vdso_base;
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 07eb61b..6ef11b2 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -100,6 +100,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE		19
 #define TIF_RESTORE_SIGMASK	20	/* restore signal mask in do_signal() */
 #define TIF_FREEZE		21	/* thread is freezing for suspend */
+#define TIF_RESTARTBLOCK	23	/* use restart block after sys_restart */
 
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
@@ -116,6 +117,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_31BIT		(1<<TIF_31BIT)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
+#define _TIF_RESTARTBLOCK	(1<<TIF_RESTARTBLOCK)
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/s390/kernel/checkpoint.c b/arch/s390/kernel/checkpoint.c
index 60ba04d..092fd87 100644
--- a/arch/s390/kernel/checkpoint.c
+++ b/arch/s390/kernel/checkpoint.c
@@ -65,6 +65,16 @@ static void s390_copy_regs(int op, struct ckpt_hdr_cpu *h,
 		BUG_ON(h->gprs[2] < 0);
 		h->gprs[2] = 0;
 	}
+
+	/*
+	 * if checkpoint was taken while interrupted from restartable
+	 * system call, then restore_thread() will have already reset
+	 * TIF_RESTARTBLOCK, but it did not set gprs[2].  It's easier to
+	 * do that here where gprs registers are all restored.
+	 */
+	if (op == CKPT_RST && test_thread_flag(TIF_RESTARTBLOCK))
+		regs->gprs[2] = __NR_restart_syscall;
+
 	CKPT_COPY_ARRAY(op, h->fprs, thr->fp_regs.fprs, NUM_FPRS);
 	CKPT_COPY_ARRAY(op, h->acrs, thr->acrs, NUM_ACRS);
 	CKPT_COPY_ARRAY(op, h->per_control_regs,
@@ -83,12 +93,24 @@ static void s390_mm(int op, struct ckpt_hdr_mm_context *h,
 
 int checkpoint_thread(struct ckpt_ctx *ctx, struct task_struct *t)
 {
+	struct ckpt_hdr_thread *h;
+	int ret;
+
 	/* we will eventually support this, as we do on x86-64 */
 	if (test_tsk_thread_flag(t, TIF_31BIT)) {
 		ckpt_err(ctx, -EINVAL, "checkpoint of 31-bit task\n");
 		return -EINVAL;
 	}
-	return 0;
+
+	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_THREAD);
+	if (!h)
+		return -ENOMEM;
+
+	h->thread_info_flags = task_thread_info(t)->flags;
+	ret = ckpt_write_obj(ctx, &h->h);
+	ckpt_hdr_put(ctx, h);
+
+	return ret;
 }
 
 /* dump the cpu state and registers of a given task */
@@ -148,11 +170,39 @@ int checkpoint_mm_context(struct ckpt_ctx *ctx, struct mm_struct *mm)
 
 int restore_thread(struct ckpt_ctx *ctx)
 {
+	struct ckpt_hdr_thread *h;
+
 	/* a 31-bit task cannot call sys_restart right now */
 	if (test_thread_flag(TIF_31BIT)) {
 		ckpt_err(ctx, -EINVAL, "restart from 31-bit task\n");
 		return -EINVAL;
 	}
+
+	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_THREAD);
+	if (IS_ERR(h))
+		return PTR_ERR(h);
+
+	/*
+	 * if the checkpointed task was frozen in a syscall with
+	 * -ERESTART_RESTARTBLOCK (switched to -EINTR during do_signal()
+	 * before try_to_freeze() happened) then after restart we need
+	 * to call __NR_restart_syscall to continue.  Fix up here.
+	 * In other words, we're doing the rest of the fixup which the
+	 * end of do_signal(), which won't now be run, would have done.
+	 *
+	 * We also re-set TIF_RESTARTBLOCK in case we hit do_signal
+	 * right after sys_restart() instead of sysc_restart, since
+	 * we would enter do_signal() with different inital conditions
+	 * than usual for being "inside" a restartable syscall.
+	 */
+	if (h->thread_info_flags & _TIF_RESTARTBLOCK) {
+		set_thread_flag(TIF_RESTART_SVC);
+		set_thread_flag(TIF_RESTARTBLOCK);
+	}
+
+	/* need to do something with TIF_RESTORE_SIGMASK ? */
+
+	ckpt_hdr_put(ctx, h);
 	return 0;
 }
 
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 9aff1d4..798ea9c 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -58,6 +58,7 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
 _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
 		_TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8)
+CLR_TIF_RESTARTBLOCK = (255 - (_TIF_RESTARTBLOCK>>16))
 
 #define BASED(name) name-system_call(%r13)
 
@@ -338,6 +339,7 @@ sysc_mcck_pending:
 #
 sysc_sigpending:
 	ni	__TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
+	ni	__TI_flags+6(%r9),CLR_TIF_RESTARTBLOCK # clear TIF_RESTARTBLOCK
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	brasl	%r14,do_signal		# call do_signal
 	tm	__TI_flags+7(%r9),_TIF_RESTART_SVC
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 6b4fef8..503fd09 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -458,6 +458,8 @@ void do_signal(struct pt_regs *regs)
 			regs->psw.addr = restart_addr;
 			break;
 		case -ERESTART_RESTARTBLOCK:
+			/* if checkpointed here, set needs_restart */
+			set_thread_flag(TIF_RESTARTBLOCK);
 			regs->gprs[2] = -EINTR;
 		}
 		regs->svcnr = 0;	/* Don't deal with this again. */
@@ -467,6 +469,10 @@ void do_signal(struct pt_regs *regs)
 	   the debugger may change all our registers ... */
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 
+	/* we've passed the try_to_freeze() in get_signal_to_deliver(), won't
+	 * be checkpointed before end of do_signal() */
+	clear_thread_flag(TIF_RESTARTBLOCK);
+
 	/* Depending on the signal settings we may need to revert the
 	   decision to restart the system call. */
 	if (signr > 0 && regs->psw.addr == restart_addr) {
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH RFC] s390: let tasks know to restart syscalls after
@ 2010-01-28 23:35 ` Serge E. Hallyn
  0 siblings, 0 replies; 8+ messages in thread
From: Serge E. Hallyn @ 2010-01-28 23:35 UTC (permalink / raw)
  To: linux-s390

(This is a patch against the checkpoint/restart kernel tree at
http://git.ncl.cs.columbia.edu/?p=linux-cr.git;a=shortlog;h=refs/heads/ckpt-v19-rc2.9)

On x86, do_signal() leaves -516 in eax while it freezes, which
sys_restart() can use to detect that it should restart the
syscall which was interrupted by a signal (or the freezer).

On s390, gprs[2] gets tweaked to -EINTR (-4) instead, leaving
us no reliable way to tell whether should be restarted.  If the
task is checkpointed here and then restarted, then the last part
of do_signal() won't be done, and the interrupted syscall won't
be restarted.

This patch defines TIF_RESTARTBLOCK as a thread flag showing that
the thread expects to be frozen while kicked out of a restartable
syscall by a signal.

The TIF_RESTARTBLOCK flag is only set for the duration of the
get get_signal_to_deliver() which is where the task may get
frozen.  We also set it in sys_restart() if the checkpointed task
had had TIF_RESTARTBLOCK set.  It will get cleared if upon exiting
sys_restart() we jump to sysc_sigpending.  If instead we jump back
to do_signal(), then TIF_RESTARTBLOCK will stay set again until
after get_signal_to_deliver() so that if it immediately freezes and
is re-checkpointed, the resulting second checkpoint image again
will have TIF_RESTARTBLOCK set.

To test this, add a dummy handler for SIGUSR1 to cr_tests/sleep/sleeptest.c,
open two terminals, and do:

Terminal 1:                             Terminal :
mount -t cgroup -o freezer freezer /cgroup
mkdir /cgroup/1 /cgroup/2
echo $$ > /cgroup/1/tasks
./sleeptest

				    echo FROZEN > /cgroup/1/freezer.state
				    checkpoint `pidof sleeptest` > /tmp/out
				    thaw

restart -F /cgroup/2 < /tmp/out

				    kill -USR1 `pidof sleeptest`;
				    echo THAWED > /cgroup/2/freezer.state ;
				    sleep 0.3;
				    echo FROZEN > /cgroup/2/freezer.state;
				    checkpoint `pidof sleeptest` > /tmp/out2;
				    echo THAWED > /cgroup/2/freezer.state

restart < /tmp/out2

Without this patch, the first restart will immediately exit.  Without
re-setting TIF_RESTARTBLOCK in sys_restart(), the second restart will
immediately exit.  With this full patch, it will restart the sleep
correctly.

Cc: linux-s390@vger.kernel.org
Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
---
 arch/s390/include/asm/checkpoint_hdr.h |    5 +++
 arch/s390/include/asm/thread_info.h    |    2 +
 arch/s390/kernel/checkpoint.c          |   52 +++++++++++++++++++++++++++++++-
 arch/s390/kernel/entry64.S             |    2 +
 arch/s390/kernel/signal.c              |    6 ++++
 5 files changed, 66 insertions(+), 1 deletions(-)

diff --git a/arch/s390/include/asm/checkpoint_hdr.h b/arch/s390/include/asm/checkpoint_hdr.h
index a8c2a3d..fd8be2a 100644
--- a/arch/s390/include/asm/checkpoint_hdr.h
+++ b/arch/s390/include/asm/checkpoint_hdr.h
@@ -76,6 +76,11 @@ struct ckpt_hdr_cpu {
 	__u8 instruction_fetch;
 };
 
+struct ckpt_hdr_thread {
+	struct ckpt_hdr h;
+	__u64 thread_info_flags;
+};
+
 struct ckpt_hdr_mm_context {
 	struct ckpt_hdr h;
 	unsigned long vdso_base;
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 07eb61b..6ef11b2 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -100,6 +100,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE		19
 #define TIF_RESTORE_SIGMASK	20	/* restore signal mask in do_signal() */
 #define TIF_FREEZE		21	/* thread is freezing for suspend */
+#define TIF_RESTARTBLOCK	23	/* use restart block after sys_restart */
 
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
@@ -116,6 +117,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_31BIT		(1<<TIF_31BIT)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
+#define _TIF_RESTARTBLOCK	(1<<TIF_RESTARTBLOCK)
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/s390/kernel/checkpoint.c b/arch/s390/kernel/checkpoint.c
index 60ba04d..092fd87 100644
--- a/arch/s390/kernel/checkpoint.c
+++ b/arch/s390/kernel/checkpoint.c
@@ -65,6 +65,16 @@ static void s390_copy_regs(int op, struct ckpt_hdr_cpu *h,
 		BUG_ON(h->gprs[2] < 0);
 		h->gprs[2] = 0;
 	}
+
+	/*
+	 * if checkpoint was taken while interrupted from restartable
+	 * system call, then restore_thread() will have already reset
+	 * TIF_RESTARTBLOCK, but it did not set gprs[2].  It's easier to
+	 * do that here where gprs registers are all restored.
+	 */
+	if (op == CKPT_RST && test_thread_flag(TIF_RESTARTBLOCK))
+		regs->gprs[2] = __NR_restart_syscall;
+
 	CKPT_COPY_ARRAY(op, h->fprs, thr->fp_regs.fprs, NUM_FPRS);
 	CKPT_COPY_ARRAY(op, h->acrs, thr->acrs, NUM_ACRS);
 	CKPT_COPY_ARRAY(op, h->per_control_regs,
@@ -83,12 +93,24 @@ static void s390_mm(int op, struct ckpt_hdr_mm_context *h,
 
 int checkpoint_thread(struct ckpt_ctx *ctx, struct task_struct *t)
 {
+	struct ckpt_hdr_thread *h;
+	int ret;
+
 	/* we will eventually support this, as we do on x86-64 */
 	if (test_tsk_thread_flag(t, TIF_31BIT)) {
 		ckpt_err(ctx, -EINVAL, "checkpoint of 31-bit task\n");
 		return -EINVAL;
 	}
-	return 0;
+
+	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_THREAD);
+	if (!h)
+		return -ENOMEM;
+
+	h->thread_info_flags = task_thread_info(t)->flags;
+	ret = ckpt_write_obj(ctx, &h->h);
+	ckpt_hdr_put(ctx, h);
+
+	return ret;
 }
 
 /* dump the cpu state and registers of a given task */
@@ -148,11 +170,39 @@ int checkpoint_mm_context(struct ckpt_ctx *ctx, struct mm_struct *mm)
 
 int restore_thread(struct ckpt_ctx *ctx)
 {
+	struct ckpt_hdr_thread *h;
+
 	/* a 31-bit task cannot call sys_restart right now */
 	if (test_thread_flag(TIF_31BIT)) {
 		ckpt_err(ctx, -EINVAL, "restart from 31-bit task\n");
 		return -EINVAL;
 	}
+
+	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_THREAD);
+	if (IS_ERR(h))
+		return PTR_ERR(h);
+
+	/*
+	 * if the checkpointed task was frozen in a syscall with
+	 * -ERESTART_RESTARTBLOCK (switched to -EINTR during do_signal()
+	 * before try_to_freeze() happened) then after restart we need
+	 * to call __NR_restart_syscall to continue.  Fix up here.
+	 * In other words, we're doing the rest of the fixup which the
+	 * end of do_signal(), which won't now be run, would have done.
+	 *
+	 * We also re-set TIF_RESTARTBLOCK in case we hit do_signal
+	 * right after sys_restart() instead of sysc_restart, since
+	 * we would enter do_signal() with different inital conditions
+	 * than usual for being "inside" a restartable syscall.
+	 */
+	if (h->thread_info_flags & _TIF_RESTARTBLOCK) {
+		set_thread_flag(TIF_RESTART_SVC);
+		set_thread_flag(TIF_RESTARTBLOCK);
+	}
+
+	/* need to do something with TIF_RESTORE_SIGMASK ? */
+
+	ckpt_hdr_put(ctx, h);
 	return 0;
 }
 
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 9aff1d4..798ea9c 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -58,6 +58,7 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
 _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
 		_TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8)
+CLR_TIF_RESTARTBLOCK = (255 - (_TIF_RESTARTBLOCK>>16))
 
 #define BASED(name) name-system_call(%r13)
 
@@ -338,6 +339,7 @@ sysc_mcck_pending:
 #
 sysc_sigpending:
 	ni	__TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
+	ni	__TI_flags+6(%r9),CLR_TIF_RESTARTBLOCK # clear TIF_RESTARTBLOCK
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	brasl	%r14,do_signal		# call do_signal
 	tm	__TI_flags+7(%r9),_TIF_RESTART_SVC
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 6b4fef8..503fd09 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -458,6 +458,8 @@ void do_signal(struct pt_regs *regs)
 			regs->psw.addr = restart_addr;
 			break;
 		case -ERESTART_RESTARTBLOCK:
+			/* if checkpointed here, set needs_restart */
+			set_thread_flag(TIF_RESTARTBLOCK);
 			regs->gprs[2] = -EINTR;
 		}
 		regs->svcnr = 0;	/* Don't deal with this again. */
@@ -467,6 +469,10 @@ void do_signal(struct pt_regs *regs)
 	   the debugger may change all our registers ... */
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 
+	/* we've passed the try_to_freeze() in get_signal_to_deliver(), won't
+	 * be checkpointed before end of do_signal() */
+	clear_thread_flag(TIF_RESTARTBLOCK);
+
 	/* Depending on the signal settings we may need to revert the
 	   decision to restart the system call. */
 	if (signr > 0 && regs->psw.addr == restart_addr) {
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] s390: let tasks know to restart syscalls after sys_restart()
       [not found] ` <20100128233528.GA10140-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-02-05  3:29   ` Oren Laadan
       [not found]     ` <4B6B908F.10709-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Oren Laadan @ 2010-02-05  3:29 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Linux Containers, linux-s390-u79uwXL29TY76Z2rM5mHXA



Serge E. Hallyn wrote:
> (This is a patch against the checkpoint/restart kernel tree at
> http://git.ncl.cs.columbia.edu/?p=linux-cr.git;a=shortlog;h=refs/heads/ckpt-v19-rc2.9)
> 
> On x86, do_signal() leaves -516 in eax while it freezes, which
> sys_restart() can use to detect that it should restart the
> syscall which was interrupted by a signal (or the freezer).
> 
> On s390, gprs[2] gets tweaked to -EINTR (-4) instead, leaving
> us no reliable way to tell whether should be restarted.  If the
> task is checkpointed here and then restarted, then the last part
> of do_signal() won't be done, and the interrupted syscall won't
> be restarted.
> 
> This patch defines TIF_RESTARTBLOCK as a thread flag showing that
> the thread expects to be frozen while kicked out of a restartable
> syscall by a signal.
> 
> The TIF_RESTARTBLOCK flag is only set for the duration of the
> get get_signal_to_deliver() which is where the task may get
> frozen.  We also set it in sys_restart() if the checkpointed task
> had had TIF_RESTARTBLOCK set.  It will get cleared if upon exiting
> sys_restart() we jump to sysc_sigpending.  If instead we jump back
> to do_signal(), then TIF_RESTARTBLOCK will stay set again until
> after get_signal_to_deliver() so that if it immediately freezes and
> is re-checkpointed, the resulting second checkpoint image again
> will have TIF_RESTARTBLOCK set.

Two comments:

1) This note will be lost once we fold this patch into a clean
patchset. Can you please make it part of the code ?

2) Maybe I'm missing something, but I'm not convinced. Can you
elaborate on why this is correct in different cases ?  Also, in
particular with respect to the post-signal-sent snippet in the
signal handling code:

        if (retval == -ERESTART_RESTARTBLOCK
            && regs->psw.addr == continue_addr) {

                regs->gprs[2] = __NR_restart_syscall;

                set_thread_flag(TIF_RESTART_SVC);

        }


Would it do what you expect after a restart ?  (restart modifies
the psw.addr)

Oren.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] s390: let tasks know to restart syscalls after sys_restart()
       [not found]     ` <4B6B908F.10709-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
@ 2010-02-09 17:12       ` Serge E. Hallyn
       [not found]         ` <20100209171232.GA21899-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Serge E. Hallyn @ 2010-02-09 17:12 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Linux Containers, linux-s390-u79uwXL29TY76Z2rM5mHXA

Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org):
> 
> 
> Serge E. Hallyn wrote:
> > (This is a patch against the checkpoint/restart kernel tree at
> > http://git.ncl.cs.columbia.edu/?p=linux-cr.git;a=shortlog;h=refs/heads/ckpt-v19-rc2.9)
> > 
> > On x86, do_signal() leaves -516 in eax while it freezes, which
> > sys_restart() can use to detect that it should restart the
> > syscall which was interrupted by a signal (or the freezer).
> > 
> > On s390, gprs[2] gets tweaked to -EINTR (-4) instead, leaving
> > us no reliable way to tell whether should be restarted.  If the
> > task is checkpointed here and then restarted, then the last part
> > of do_signal() won't be done, and the interrupted syscall won't
> > be restarted.
> > 
> > This patch defines TIF_RESTARTBLOCK as a thread flag showing that
> > the thread expects to be frozen while kicked out of a restartable
> > syscall by a signal.
> > 
> > The TIF_RESTARTBLOCK flag is only set for the duration of the
> > get get_signal_to_deliver() which is where the task may get
> > frozen.  We also set it in sys_restart() if the checkpointed task
> > had had TIF_RESTARTBLOCK set.  It will get cleared if upon exiting
> > sys_restart() we jump to sysc_sigpending.  If instead we jump back
> > to do_signal(), then TIF_RESTARTBLOCK will stay set again until
> > after get_signal_to_deliver() so that if it immediately freezes and
> > is re-checkpointed, the resulting second checkpoint image again
> > will have TIF_RESTARTBLOCK set.
> 
> Two comments:
> 
> 1) This note will be lost once we fold this patch into a clean
> patchset. Can you please make it part of the code ?

Sure, good idea.

> 2) Maybe I'm missing something, but I'm not convinced. Can you
> elaborate on why this is correct in different cases ?  Also, in
> particular with respect to the post-signal-sent snippet in the
> signal handling code:
> 
>         if (retval == -ERESTART_RESTARTBLOCK
>             && regs->psw.addr == continue_addr) {
> 
>                 regs->gprs[2] = __NR_restart_syscall;
> 
>                 set_thread_flag(TIF_RESTART_SVC);
> 
>         }
> 
> 
> Would it do what you expect after a restart ?  (restart modifies
> the psw.addr)

I don't understand the question.  After sys_restart(), we won't be
returning to this kernel code.  We'll either immediately call
restart_syscall(), or, if a signal was delivered before sys_restart(),
completed, then do_signal() will start again from the top.

In the first case we're doing exactly what we wanted to.

In that second case, we enter do_signal with very different
initial conditions than the checkpointed case:  regs->svcnr is 0,
so none of the gprs[2] or svcnr or psw-addr tweaking that
would have happened the first time will happen.  We'll just
handle the signal (if any), then, upon exit of do_signal,
proceed again with regs->gprs[2] == __NR_restart_syscall.

But, since thread_info_flags->TIF_RESTARTBLOCK is set,
if we get frozen and checkpointed again during the
get_signal_to_deliver(), a restart of that image should
be exactly the same as a restart of the current image.

(That, at least, is my intent and understanding :)

-serge

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] s390: let tasks know to restart syscalls after sys_restart()
       [not found]         ` <20100209171232.GA21899-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-02-09 17:41           ` Oren Laadan
       [not found]             ` <4B719E56.7000009-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Oren Laadan @ 2010-02-09 17:41 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Linux Containers, linux-s390-u79uwXL29TY76Z2rM5mHXA



Serge E. Hallyn wrote:
> Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org):
>>
>> Serge E. Hallyn wrote:
>>> (This is a patch against the checkpoint/restart kernel tree at
>>> http://git.ncl.cs.columbia.edu/?p=linux-cr.git;a=shortlog;h=refs/heads/ckpt-v19-rc2.9)
>>>
>>> On x86, do_signal() leaves -516 in eax while it freezes, which
>>> sys_restart() can use to detect that it should restart the
>>> syscall which was interrupted by a signal (or the freezer).
>>>
>>> On s390, gprs[2] gets tweaked to -EINTR (-4) instead, leaving
>>> us no reliable way to tell whether should be restarted.  If the
>>> task is checkpointed here and then restarted, then the last part
>>> of do_signal() won't be done, and the interrupted syscall won't
>>> be restarted.
>>>
>>> This patch defines TIF_RESTARTBLOCK as a thread flag showing that
>>> the thread expects to be frozen while kicked out of a restartable
>>> syscall by a signal.
>>>
>>> The TIF_RESTARTBLOCK flag is only set for the duration of the
>>> get get_signal_to_deliver() which is where the task may get
>>> frozen.  We also set it in sys_restart() if the checkpointed task
>>> had had TIF_RESTARTBLOCK set.  It will get cleared if upon exiting
>>> sys_restart() we jump to sysc_sigpending.  If instead we jump back
>>> to do_signal(), then TIF_RESTARTBLOCK will stay set again until
>>> after get_signal_to_deliver() so that if it immediately freezes and
>>> is re-checkpointed, the resulting second checkpoint image again
>>> will have TIF_RESTARTBLOCK set.
>> Two comments:
>>
>> 1) This note will be lost once we fold this patch into a clean
>> patchset. Can you please make it part of the code ?
> 
> Sure, good idea.
> 
>> 2) Maybe I'm missing something, but I'm not convinced. Can you
>> elaborate on why this is correct in different cases ?  Also, in
>> particular with respect to the post-signal-sent snippet in the
>> signal handling code:
>>
>>         if (retval == -ERESTART_RESTARTBLOCK
>>             && regs->psw.addr == continue_addr) {
>>
>>                 regs->gprs[2] = __NR_restart_syscall;
>>
>>                 set_thread_flag(TIF_RESTART_SVC);
>>
>>         }
>>
>>
>> Would it do what you expect after a restart ?  (restart modifies
>> the psw.addr)
> 
> I don't understand the question.  After sys_restart(), we won't be
> returning to this kernel code.  We'll either immediately call
> restart_syscall(), or, if a signal was delivered before sys_restart(),
> completed, then do_signal() will start again from the top.

Ok, I re-read the code: let's look at these cases:

case 1: checkpointee wasn't in syscall -- no problem.

case 2: checkpointee was in syscall, no signal pending; when it was
frozen, regs->svcnr became 0, and that's what we save, so on restart
we won't enter that snippet again. Again, no problem.

case 3: checkpointee was in syscall, signal pending;
case 4: checkpointee was in syscall, signal received at restart;
look at this snippet:

        if (signr > 0 && regs->psw.addr == restart_addr) {
                if (retval == -ERESTARTNOHAND
                    || (retval == -ERESTARTSYS
                         && !(current->sighand->action[signr-1].sa.sa_flags
                              & SA_RESTART))) {
                        regs->gprs[2] = -EINTR;
                        regs->psw.addr = continue_addr;
                }
        }

Because svcnr is/was 0, neither restart_addr nor continue_addr
were setup, so this condition is always false, which I think is
wrong. In particular if the original return value was one of
these two. Also, if the signal arrives _after_ the restart
completes... ?

case 5: receives a signal during restart -- restart should fail.

Oren.

> 
> In the first case we're doing exactly what we wanted to.
> 
> In that second case, we enter do_signal with very different
> initial conditions than the checkpointed case:  regs->svcnr is 0,
> so none of the gprs[2] or svcnr or psw-addr tweaking that
> would have happened the first time will happen.  We'll just
> handle the signal (if any), then, upon exit of do_signal,
> proceed again with regs->gprs[2] == __NR_restart_syscall.
> 
> But, since thread_info_flags->TIF_RESTARTBLOCK is set,
> if we get frozen and checkpointed again during the
> get_signal_to_deliver(), a restart of that image should
> be exactly the same as a restart of the current image.
> 
> (That, at least, is my intent and understanding :)
> 
> -serge
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] s390: let tasks know to restart syscalls after sys_restart()
       [not found]             ` <4B719E56.7000009-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
@ 2010-02-09 20:16               ` Serge E. Hallyn
       [not found]                 ` <20100209201643.GB29094-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Serge E. Hallyn @ 2010-02-09 20:16 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Linux Containers, linux-s390-u79uwXL29TY76Z2rM5mHXA

Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org):
> 
> 
> Serge E. Hallyn wrote:
> > Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org):
> >>
> >> Serge E. Hallyn wrote:
> >>> (This is a patch against the checkpoint/restart kernel tree at
> >>> http://git.ncl.cs.columbia.edu/?p=linux-cr.git;a=shortlog;h=refs/heads/ckpt-v19-rc2.9)
> >>>
> >>> On x86, do_signal() leaves -516 in eax while it freezes, which
> >>> sys_restart() can use to detect that it should restart the
> >>> syscall which was interrupted by a signal (or the freezer).
> >>>
> >>> On s390, gprs[2] gets tweaked to -EINTR (-4) instead, leaving
> >>> us no reliable way to tell whether should be restarted.  If the
> >>> task is checkpointed here and then restarted, then the last part
> >>> of do_signal() won't be done, and the interrupted syscall won't
> >>> be restarted.
> >>>
> >>> This patch defines TIF_RESTARTBLOCK as a thread flag showing that
> >>> the thread expects to be frozen while kicked out of a restartable
> >>> syscall by a signal.
> >>>
> >>> The TIF_RESTARTBLOCK flag is only set for the duration of the
> >>> get get_signal_to_deliver() which is where the task may get
> >>> frozen.  We also set it in sys_restart() if the checkpointed task
> >>> had had TIF_RESTARTBLOCK set.  It will get cleared if upon exiting
> >>> sys_restart() we jump to sysc_sigpending.  If instead we jump back
> >>> to do_signal(), then TIF_RESTARTBLOCK will stay set again until
> >>> after get_signal_to_deliver() so that if it immediately freezes and
> >>> is re-checkpointed, the resulting second checkpoint image again
> >>> will have TIF_RESTARTBLOCK set.
> >> Two comments:
> >>
> >> 1) This note will be lost once we fold this patch into a clean
> >> patchset. Can you please make it part of the code ?
> > 
> > Sure, good idea.
> > 
> >> 2) Maybe I'm missing something, but I'm not convinced. Can you
> >> elaborate on why this is correct in different cases ?  Also, in
> >> particular with respect to the post-signal-sent snippet in the
> >> signal handling code:
> >>
> >>         if (retval == -ERESTART_RESTARTBLOCK
> >>             && regs->psw.addr == continue_addr) {
> >>
> >>                 regs->gprs[2] = __NR_restart_syscall;
> >>
> >>                 set_thread_flag(TIF_RESTART_SVC);
> >>
> >>         }
> >>
> >>
> >> Would it do what you expect after a restart ?  (restart modifies
> >> the psw.addr)
> > 
> > I don't understand the question.  After sys_restart(), we won't be
> > returning to this kernel code.  We'll either immediately call
> > restart_syscall(), or, if a signal was delivered before sys_restart(),
> > completed, then do_signal() will start again from the top.
> 
> Ok, I re-read the code: let's look at these cases:
> 
> case 1: checkpointee wasn't in syscall -- no problem.
> 
> case 2: checkpointee was in syscall, no signal pending; when it was
> frozen, regs->svcnr became 0, and that's what we save, so on restart
> we won't enter that snippet again. Again, no problem.
> 
> case 3: checkpointee was in syscall, signal pending;
> case 4: checkpointee was in syscall, signal received at restart;
> look at this snippet:
> 
>         if (signr > 0 && regs->psw.addr == restart_addr) {
>                 if (retval == -ERESTARTNOHAND
>                     || (retval == -ERESTARTSYS
>                          && !(current->sighand->action[signr-1].sa.sa_flags
>                               & SA_RESTART))) {
>                         regs->gprs[2] = -EINTR;
>                         regs->psw.addr = continue_addr;
>                 }
>         }
> 
> Because svcnr is/was 0, neither restart_addr nor continue_addr
> were setup, so this condition is always false, which I think is
> wrong.

I've been focusing on the ERESTART_RESTARTBLOCK case.  Can
we agree that all cases appear to be handled correctly there?

For the ERESTARTSYS/ERESTARTNOHAND case, I'm probably not
doing the right thing.  For a single checkpoint, since either
there was no real signal (freezer) or it didn't get handled
before checkpoint,  psw.addr gets checkpointed and restored
as restart_addr, which is the right thing.  (since signr is
not >0, we would have kept the values the same after
get_signal_to_deliver()).

But if a real signal gets delivered upon exit of sys_restart(),
then I think I do think we'll end up doing the wrong thing -
we'll restart the interrupted system call with the orig_gpr2,
so we'll pretend the signal did not get delivered, rather
than proceed past the call to the system call (in userspace)
with return value -EINTR.  (Just how wrong is that?)

This is all dense enough that it may be worth thinking of
a different way to handle it, but I'm not sure what that
way would be.  The challenge is finding a *simple*, reliable
way to detect what the the initial conditions to do_signal()
where, based on the register/thread_info values as they are
at do_signal()->get_signal_to_deliver()->try_to_freeze(),
given the ways the values get swapped in the block above
the get_signal_to_deliver() call.

The simplest thing by far would be if we could safely
move the get_signal_to_deliver() call before the

	if (regs->svcnr) {
		continue_addr = regs->psw.addr;
		...

block.  I assume there are entry_64.S-related reasons why
we cannot?

> Also, if the signal arrives _after_ the restart
> completes... ?

> case 5: receives a signal during restart -- restart should fail.
> 
> Oren.
> 
> > 
> > In the first case we're doing exactly what we wanted to.
> > 
> > In that second case, we enter do_signal with very different
> > initial conditions than the checkpointed case:  regs->svcnr is 0,
> > so none of the gprs[2] or svcnr or psw-addr tweaking that
> > would have happened the first time will happen.  We'll just
> > handle the signal (if any), then, upon exit of do_signal,
> > proceed again with regs->gprs[2] == __NR_restart_syscall.
> > 
> > But, since thread_info_flags->TIF_RESTARTBLOCK is set,
> > if we get frozen and checkpointed again during the
> > get_signal_to_deliver(), a restart of that image should
> > be exactly the same as a restart of the current image.
> > 
> > (That, at least, is my intent and understanding :)
> > 
> > -serge
> > 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] s390: let tasks know to restart syscalls after sys_restart()
  2010-02-09 20:16               ` Serge E. Hallyn
@ 2010-02-09 21:12                     ` Oren Laadan
  0 siblings, 0 replies; 8+ messages in thread
From: Oren Laadan @ 2010-02-09 21:12 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Linux Containers, linux-s390-u79uwXL29TY76Z2rM5mHXA



Serge E. Hallyn wrote:
> Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org):
>>
>> Serge E. Hallyn wrote:
>>> Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org):
>>>> Serge E. Hallyn wrote:
>>>>> (This is a patch against the checkpoint/restart kernel tree at
>>>>> http://git.ncl.cs.columbia.edu/?p=linux-cr.git;a=shortlog;h=refs/heads/ckpt-v19-rc2.9)
>>>>>
>>>>> On x86, do_signal() leaves -516 in eax while it freezes, which
>>>>> sys_restart() can use to detect that it should restart the
>>>>> syscall which was interrupted by a signal (or the freezer).
>>>>>
>>>>> On s390, gprs[2] gets tweaked to -EINTR (-4) instead, leaving
>>>>> us no reliable way to tell whether should be restarted.  If the
>>>>> task is checkpointed here and then restarted, then the last part
>>>>> of do_signal() won't be done, and the interrupted syscall won't
>>>>> be restarted.
>>>>>
>>>>> This patch defines TIF_RESTARTBLOCK as a thread flag showing that
>>>>> the thread expects to be frozen while kicked out of a restartable
>>>>> syscall by a signal.
>>>>>
>>>>> The TIF_RESTARTBLOCK flag is only set for the duration of the
>>>>> get get_signal_to_deliver() which is where the task may get
>>>>> frozen.  We also set it in sys_restart() if the checkpointed task
>>>>> had had TIF_RESTARTBLOCK set.  It will get cleared if upon exiting
>>>>> sys_restart() we jump to sysc_sigpending.  If instead we jump back
>>>>> to do_signal(), then TIF_RESTARTBLOCK will stay set again until
>>>>> after get_signal_to_deliver() so that if it immediately freezes and
>>>>> is re-checkpointed, the resulting second checkpoint image again
>>>>> will have TIF_RESTARTBLOCK set.
>>>> Two comments:
>>>>
>>>> 1) This note will be lost once we fold this patch into a clean
>>>> patchset. Can you please make it part of the code ?
>>> Sure, good idea.
>>>
>>>> 2) Maybe I'm missing something, but I'm not convinced. Can you
>>>> elaborate on why this is correct in different cases ?  Also, in
>>>> particular with respect to the post-signal-sent snippet in the
>>>> signal handling code:
>>>>
>>>>         if (retval == -ERESTART_RESTARTBLOCK
>>>>             && regs->psw.addr == continue_addr) {
>>>>
>>>>                 regs->gprs[2] = __NR_restart_syscall;
>>>>
>>>>                 set_thread_flag(TIF_RESTART_SVC);
>>>>
>>>>         }
>>>>
>>>>
>>>> Would it do what you expect after a restart ?  (restart modifies
>>>> the psw.addr)
>>> I don't understand the question.  After sys_restart(), we won't be
>>> returning to this kernel code.  We'll either immediately call
>>> restart_syscall(), or, if a signal was delivered before sys_restart(),
>>> completed, then do_signal() will start again from the top.
>> Ok, I re-read the code: let's look at these cases:
>>
>> case 1: checkpointee wasn't in syscall -- no problem.
>>
>> case 2: checkpointee was in syscall, no signal pending; when it was
>> frozen, regs->svcnr became 0, and that's what we save, so on restart
>> we won't enter that snippet again. Again, no problem.
>>
>> case 3: checkpointee was in syscall, signal pending;
>> case 4: checkpointee was in syscall, signal received at restart;
>> look at this snippet:
>>
>>         if (signr > 0 && regs->psw.addr == restart_addr) {
>>                 if (retval == -ERESTARTNOHAND
>>                     || (retval == -ERESTARTSYS
>>                          && !(current->sighand->action[signr-1].sa.sa_flags
>>                               & SA_RESTART))) {
>>                         regs->gprs[2] = -EINTR;
>>                         regs->psw.addr = continue_addr;
>>                 }
>>         }
>>
>> Because svcnr is/was 0, neither restart_addr nor continue_addr
>> were setup, so this condition is always false, which I think is
>> wrong.
> 
> I've been focusing on the ERESTART_RESTARTBLOCK case.  Can
> we agree that all cases appear to be handled correctly there?

Errr... everytime I need to go and read all the code from
scratch to convince myself... and I my mind just context
switched :(

> 
> For the ERESTARTSYS/ERESTARTNOHAND case, I'm probably not
> doing the right thing.  For a single checkpoint, since either
> there was no real signal (freezer) or it didn't get handled
> before checkpoint,  psw.addr gets checkpointed and restored
> as restart_addr, which is the right thing.  (since signr is
> not >0, we would have kept the values the same after
> get_signal_to_deliver()).
> 
> But if a real signal gets delivered upon exit of sys_restart(),
> then I think I do think we'll end up doing the wrong thing -
> we'll restart the interrupted system call with the orig_gpr2,
> so we'll pretend the signal did not get delivered, rather
> than proceed past the call to the system call (in userspace)
> with return value -EINTR.  (Just how wrong is that?)
> 
> This is all dense enough that it may be worth thinking of
> a different way to handle it, but I'm not sure what that
> way would be.  The challenge is finding a *simple*, reliable
> way to detect what the the initial conditions to do_signal()
> where, based on the register/thread_info values as they are
> at do_signal()->get_signal_to_deliver()->try_to_freeze(),
> given the ways the values get swapped in the block above
> the get_signal_to_deliver() call.
> 
> The simplest thing by far would be if we could safely
> move the get_signal_to_deliver() call before the
> 
> 	if (regs->svcnr) {
> 		continue_addr = regs->psw.addr;
> 		...
> 
> block.  I assume there are entry_64.S-related reasons why
> we cannot?

Yes, that's what I was thinking. And I don't know enough of
s390 to understand why we could not.

Either that or record the state before it is modified in
dedicated checkpoint-related fields (e.g. per thread).

Oren.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] s390: let tasks know to restart syscalls after sys_restart()
@ 2010-02-09 21:12                     ` Oren Laadan
  0 siblings, 0 replies; 8+ messages in thread
From: Oren Laadan @ 2010-02-09 21:12 UTC (permalink / raw)
  To: linux-s390



Serge E. Hallyn wrote:
> Quoting Oren Laadan (orenl@cs.columbia.edu):
>>
>> Serge E. Hallyn wrote:
>>> Quoting Oren Laadan (orenl@cs.columbia.edu):
>>>> Serge E. Hallyn wrote:
>>>>> (This is a patch against the checkpoint/restart kernel tree at
>>>>> http://git.ncl.cs.columbia.edu/?p=linux-cr.git;a=shortlog;h=refs/heads/ckpt-v19-rc2.9)
>>>>>
>>>>> On x86, do_signal() leaves -516 in eax while it freezes, which
>>>>> sys_restart() can use to detect that it should restart the
>>>>> syscall which was interrupted by a signal (or the freezer).
>>>>>
>>>>> On s390, gprs[2] gets tweaked to -EINTR (-4) instead, leaving
>>>>> us no reliable way to tell whether should be restarted.  If the
>>>>> task is checkpointed here and then restarted, then the last part
>>>>> of do_signal() won't be done, and the interrupted syscall won't
>>>>> be restarted.
>>>>>
>>>>> This patch defines TIF_RESTARTBLOCK as a thread flag showing that
>>>>> the thread expects to be frozen while kicked out of a restartable
>>>>> syscall by a signal.
>>>>>
>>>>> The TIF_RESTARTBLOCK flag is only set for the duration of the
>>>>> get get_signal_to_deliver() which is where the task may get
>>>>> frozen.  We also set it in sys_restart() if the checkpointed task
>>>>> had had TIF_RESTARTBLOCK set.  It will get cleared if upon exiting
>>>>> sys_restart() we jump to sysc_sigpending.  If instead we jump back
>>>>> to do_signal(), then TIF_RESTARTBLOCK will stay set again until
>>>>> after get_signal_to_deliver() so that if it immediately freezes and
>>>>> is re-checkpointed, the resulting second checkpoint image again
>>>>> will have TIF_RESTARTBLOCK set.
>>>> Two comments:
>>>>
>>>> 1) This note will be lost once we fold this patch into a clean
>>>> patchset. Can you please make it part of the code ?
>>> Sure, good idea.
>>>
>>>> 2) Maybe I'm missing something, but I'm not convinced. Can you
>>>> elaborate on why this is correct in different cases ?  Also, in
>>>> particular with respect to the post-signal-sent snippet in the
>>>> signal handling code:
>>>>
>>>>         if (retval == -ERESTART_RESTARTBLOCK
>>>>             && regs->psw.addr == continue_addr) {
>>>>
>>>>                 regs->gprs[2] = __NR_restart_syscall;
>>>>
>>>>                 set_thread_flag(TIF_RESTART_SVC);
>>>>
>>>>         }
>>>>
>>>>
>>>> Would it do what you expect after a restart ?  (restart modifies
>>>> the psw.addr)
>>> I don't understand the question.  After sys_restart(), we won't be
>>> returning to this kernel code.  We'll either immediately call
>>> restart_syscall(), or, if a signal was delivered before sys_restart(),
>>> completed, then do_signal() will start again from the top.
>> Ok, I re-read the code: let's look at these cases:
>>
>> case 1: checkpointee wasn't in syscall -- no problem.
>>
>> case 2: checkpointee was in syscall, no signal pending; when it was
>> frozen, regs->svcnr became 0, and that's what we save, so on restart
>> we won't enter that snippet again. Again, no problem.
>>
>> case 3: checkpointee was in syscall, signal pending;
>> case 4: checkpointee was in syscall, signal received at restart;
>> look at this snippet:
>>
>>         if (signr > 0 && regs->psw.addr == restart_addr) {
>>                 if (retval == -ERESTARTNOHAND
>>                     || (retval == -ERESTARTSYS
>>                          && !(current->sighand->action[signr-1].sa.sa_flags
>>                               & SA_RESTART))) {
>>                         regs->gprs[2] = -EINTR;
>>                         regs->psw.addr = continue_addr;
>>                 }
>>         }
>>
>> Because svcnr is/was 0, neither restart_addr nor continue_addr
>> were setup, so this condition is always false, which I think is
>> wrong.
> 
> I've been focusing on the ERESTART_RESTARTBLOCK case.  Can
> we agree that all cases appear to be handled correctly there?

Errr... everytime I need to go and read all the code from
scratch to convince myself... and I my mind just context
switched :(

> 
> For the ERESTARTSYS/ERESTARTNOHAND case, I'm probably not
> doing the right thing.  For a single checkpoint, since either
> there was no real signal (freezer) or it didn't get handled
> before checkpoint,  psw.addr gets checkpointed and restored
> as restart_addr, which is the right thing.  (since signr is
> not >0, we would have kept the values the same after
> get_signal_to_deliver()).
> 
> But if a real signal gets delivered upon exit of sys_restart(),
> then I think I do think we'll end up doing the wrong thing -
> we'll restart the interrupted system call with the orig_gpr2,
> so we'll pretend the signal did not get delivered, rather
> than proceed past the call to the system call (in userspace)
> with return value -EINTR.  (Just how wrong is that?)
> 
> This is all dense enough that it may be worth thinking of
> a different way to handle it, but I'm not sure what that
> way would be.  The challenge is finding a *simple*, reliable
> way to detect what the the initial conditions to do_signal()
> where, based on the register/thread_info values as they are
> at do_signal()->get_signal_to_deliver()->try_to_freeze(),
> given the ways the values get swapped in the block above
> the get_signal_to_deliver() call.
> 
> The simplest thing by far would be if we could safely
> move the get_signal_to_deliver() call before the
> 
> 	if (regs->svcnr) {
> 		continue_addr = regs->psw.addr;
> 		...
> 
> block.  I assume there are entry_64.S-related reasons why
> we cannot?

Yes, that's what I was thinking. And I don't know enough of
s390 to understand why we could not.

Either that or record the state before it is modified in
dedicated checkpoint-related fields (e.g. per thread).

Oren.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2010-02-09 21:12 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-01-28 23:35 [PATCH RFC] s390: let tasks know to restart syscalls after sys_restart() Serge E. Hallyn
2010-01-28 23:35 ` [PATCH RFC] s390: let tasks know to restart syscalls after Serge E. Hallyn
     [not found] ` <20100128233528.GA10140-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-02-05  3:29   ` [PATCH RFC] s390: let tasks know to restart syscalls after sys_restart() Oren Laadan
     [not found]     ` <4B6B908F.10709-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-02-09 17:12       ` Serge E. Hallyn
     [not found]         ` <20100209171232.GA21899-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-02-09 17:41           ` Oren Laadan
     [not found]             ` <4B719E56.7000009-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-02-09 20:16               ` Serge E. Hallyn
     [not found]                 ` <20100209201643.GB29094-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-02-09 21:12                   ` Oren Laadan
2010-02-09 21:12                     ` Oren Laadan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.