All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] c/r: [signal 1/3] blocked and template for shared signals
@ 2009-07-23 14:48 Oren Laadan
       [not found] ` <1248360514-20710-1-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  0 siblings, 1 reply; 11+ messages in thread
From: Oren Laadan @ 2009-07-23 14:48 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

This patch adds checkpoint/restart of blocked signals mask
(t->blocked) and a template for shared signals (t->signal).

Because t->signal sharing is tied to threads, we ensure proper sharing
of t->signal (struct signal_struct) for threads only.

Access to t->signal is protected by locking t->sighand->lock.
Therefore, the usual checkpoint_obj() invoking the callback
checkpoint_signal(ctx, signal) is insufficient because the task
pointer is unavailable.

Instead, handling of t->signal sharing is explicit using helpers
like ckpt_obj_lookup_add(), ckpt_obj_fetch() and ckpt_obj_insert().
The actual state is saved (if needed) _after_ the task_objs data.

To prevent tasks from handling restored signals during restart,
set their mask to block all signals and only restore the original
mask at the very end (before the last sync point).

Introduce per-task pointer 'ckpt_data' to temporary store data
for restore actions that are deferred to the end (like restoring
the signal block mask).

Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
---
 checkpoint/objhash.c           |    9 +++
 checkpoint/process.c           |   65 ++++++++++++++++++++++-
 checkpoint/signal.c            |  114 ++++++++++++++++++++++++++++++++++++---
 include/linux/checkpoint.h     |    6 ++
 include/linux/checkpoint_hdr.h |   14 +++++-
 5 files changed, 197 insertions(+), 11 deletions(-)

diff --git a/checkpoint/objhash.c b/checkpoint/objhash.c
index da43bf4..32cf1ff 100644
--- a/checkpoint/objhash.c
+++ b/checkpoint/objhash.c
@@ -301,6 +301,15 @@ static struct ckpt_obj_ops ckpt_obj_ops[] = {
 		.checkpoint = checkpoint_sighand,
 		.restore = restore_sighand,
 	},
+	/* signal object */
+	{
+		.obj_name = "SIGNAL",
+		.obj_type = CKPT_OBJ_SIGNAL,
+		.ref_drop = obj_no_drop,
+		.ref_grab = obj_no_grab,
+		.checkpoint = checkpoint_bad,
+		.restore = restore_bad,
+	},
 	/* ns object */
 	{
 		.obj_name = "NSPROXY",
diff --git a/checkpoint/process.c b/checkpoint/process.c
index d76ab2c..40b2580 100644
--- a/checkpoint/process.c
+++ b/checkpoint/process.c
@@ -181,7 +181,8 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
 	int files_objref;
 	int mm_objref;
 	int sighand_objref;
-	int ret;
+	int signal_objref;
+	int first, ret;
 
 	/*
 	 * Shared objects may have dependencies among them: task->mm
@@ -224,14 +225,37 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
 		return sighand_objref;
 	}
 
+	/*
+	 * Handle t->signal differently because the checkpoint method
+	 * for t->signal needs access to owning task_struct to access
+	 * t->sighand (to lock/unlock). First explicitly determine if
+	 * need to save, and only below invoke checkpoint_obj_signal()
+	 * if needed.
+	 */
+	signal_objref = ckpt_obj_lookup_add(ctx, t->signal,
+					    CKPT_OBJ_SIGNAL, &first);
+	ckpt_debug("signal: objref %d\n", signal_objref);
+	if (signal_objref < 0)
+		return signal_objref;
+
 	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_TASK_OBJS);
 	if (!h)
 		return -ENOMEM;
 	h->files_objref = files_objref;
 	h->mm_objref = mm_objref;
 	h->sighand_objref = sighand_objref;
+	h->signal_objref = signal_objref;
 	ret = ckpt_write_obj(ctx, &h->h);
 	ckpt_hdr_put(ctx, h);
+	if (ret < 0)
+		return ret;
+
+	/* actually save t->signal, if need to */
+	if (first)
+		ret = checkpoint_obj_signal(ctx, t);
+	if (ret < 0)
+		ckpt_write_err(ctx, "task %d (%s), signal_struct: %d",
+			       task_pid_vnr(t), t->comm, ret);
 
 	return ret;
 }
@@ -374,6 +398,10 @@ int checkpoint_task(struct ckpt_ctx *ctx, struct task_struct *t)
 		goto out;
 	ret = checkpoint_cpu(ctx, t);
 	ckpt_debug("cpu %d\n", ret);
+	if (ret < 0)
+		goto out;
+	ret = checkpoint_task_signal(ctx, t);
+	ckpt_debug("task-signal %d\n", ret);
  out:
 	return ret;
 }
@@ -547,6 +575,11 @@ static int restore_task_objs(struct ckpt_ctx *ctx)
 
 	ret = restore_obj_sighand(ctx, h->sighand_objref);
 	ckpt_debug("sighand: ret %d (%p)\n", ret, current->sighand);
+	if (ret < 0)
+		goto out;
+
+	ret = restore_obj_signal(ctx, h->signal_objref);
+	ckpt_debug("signal: ret %d (%p)\n", ret, current->signal);
  out:
 	ckpt_hdr_put(ctx, h);
 	return ret;
@@ -684,11 +717,37 @@ int restore_restart_block(struct ckpt_ctx *ctx)
 	return ret;
 }
 
+/* pre_restore_task - prepare the task for restore */
+static int pre_restore_task(struct ckpt_ctx *ctx)
+{
+	sigset_t sigset;
+
+	/*
+	 * Block task's signals to avoid interruptions due to signals,
+	 * say, from restored timers, file descriptors etc. Signals
+	 * will be unblocked when restore completes.
+	 *
+	 * NOTE: tasks with file descriptors set to send a SIGKILL as
+	 * i/o notification may fail the restart if a signal occurs
+	 * before that task completed its restore. FIX ?
+	 */
+	sigfillset(&sigset);
+	sigdelset(&sigset, SIGKILL);
+	sigdelset(&sigset, SIGSTOP);
+	sigprocmask(SIG_SETMASK, &sigset, NULL);
+
+	return 0;
+}
+
 /* read the entire state of the current task */
 int restore_task(struct ckpt_ctx *ctx)
 {
 	int ret;
 
+	ret = pre_restore_task(ctx);
+	if (ret < 0)
+		goto out;
+
 	ret = restore_task_struct(ctx);
 	ckpt_debug("task %d\n", ret);
 	if (ret < 0)
@@ -716,6 +775,10 @@ int restore_task(struct ckpt_ctx *ctx)
 		goto out;
 	ret = restore_creds(ctx);
 	ckpt_debug("creds: ret %d\n", ret);
+	if (ret < 0)
+		goto out;
+
+	ret = restore_task_signal(ctx);
  out:
 	return ret;
 }
diff --git a/checkpoint/signal.c b/checkpoint/signal.c
index 506476b..70df7c4 100644
--- a/checkpoint/signal.c
+++ b/checkpoint/signal.c
@@ -28,7 +28,7 @@ static inline void load_sigset(sigset_t *sigset, struct ckpt_hdr_sigset *h)
 }
 
 /***********************************************************************
- * Checkpoint
+ * sighand checkpoint/collect/restart
  */
 
 int do_checkpoint_sighand(struct ckpt_ctx *ctx, struct sighand_struct *sighand)
@@ -81,10 +81,6 @@ int checkpoint_obj_sighand(struct ckpt_ctx *ctx, struct task_struct *t)
 	return objref;
 }
 
-/***********************************************************************
- * Collect
- */
-
 int ckpt_collect_sighand(struct ckpt_ctx *ctx, struct task_struct *t)
 {
 	struct sighand_struct *sighand;
@@ -101,10 +97,6 @@ int ckpt_collect_sighand(struct ckpt_ctx *ctx, struct task_struct *t)
 	return ret;
 }
 
-/***********************************************************************
- * Restart
- */
-
 struct sighand_struct *do_restore_sighand(struct ckpt_ctx *ctx)
 {
 	struct ckpt_hdr_sighand *h;
@@ -168,3 +160,107 @@ int restore_obj_sighand(struct ckpt_ctx *ctx, int sighand_objref)
 
 	return 0;
 }
+
+/***********************************************************************
+ * signal checkpoint/restart
+ */
+
+static int checkpoint_signal(struct ckpt_ctx *ctx, struct task_struct *t)
+{
+	struct ckpt_hdr_signal *h;
+	int ret;
+
+	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_SIGNAL);
+	if (!h)
+		return -ENOMEM;
+
+	/* fill in later */
+
+	ret = ckpt_write_obj(ctx, &h->h);
+	ckpt_hdr_put(ctx, h);
+	return ret;
+}
+
+int checkpoint_obj_signal(struct ckpt_ctx *ctx, struct task_struct *t)
+{
+	BUG_ON(t->flags & PF_EXITING);
+	return checkpoint_signal(ctx, t);
+}
+
+static int restore_signal(struct ckpt_ctx *ctx)
+{
+	struct ckpt_hdr_signal *h;
+
+	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_SIGNAL);
+	if (IS_ERR(h))
+		return PTR_ERR(h);
+
+	/* fill in later */
+
+	ckpt_hdr_put(ctx, h);
+	return 0;
+}
+
+int restore_obj_signal(struct ckpt_ctx *ctx, int signal_objref)
+{
+	struct signal_struct *signal;
+	int ret = 0;
+
+	signal = ckpt_obj_fetch(ctx, signal_objref, CKPT_OBJ_SIGNAL);
+	if (!IS_ERR(signal)) {
+		/*
+		 * signal_struct is already shared properly as it is
+		 * tied to thread groups. Since thread relationships
+		 * are already restore now, t->signal must match.
+		 */
+		if (signal != current->signal)
+			ret = -EINVAL;
+	} else if (PTR_ERR(signal) == -EINVAL) {
+		/* first timer: add to hash and restore our t->signal */
+		ret = ckpt_obj_insert(ctx, current->signal,
+				      signal_objref, CKPT_OBJ_SIGNAL);
+		if (ret >= 0)
+			ret = restore_signal(ctx);
+	} else {
+		ret = PTR_ERR(signal);
+	}
+
+	return ret;
+}
+
+int checkpoint_task_signal(struct ckpt_ctx *ctx, struct task_struct *t)
+{
+	struct ckpt_hdr_signal_task *h;
+	int ret;
+
+	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_SIGNAL_TASK);
+	if (!h)
+		return -ENOMEM;
+
+	fill_sigset(&h->blocked, &t->blocked);
+
+	ret = ckpt_write_obj(ctx, &h->h);
+	ckpt_hdr_put(ctx, h);
+	return ret;
+}
+
+int restore_task_signal(struct ckpt_ctx *ctx)
+{
+	struct ckpt_hdr_signal_task *h;
+	sigset_t blocked;
+
+	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_SIGNAL_TASK);
+	if (IS_ERR(h))
+		return PTR_ERR(h);
+
+	load_sigset(&blocked, &h->blocked);
+	/* silently remove SIGKILL, SIGSTOP */
+	sigdelset(&blocked, SIGKILL);
+	sigdelset(&blocked, SIGSTOP);
+
+	sigprocmask(SIG_SETMASK, &blocked, NULL);
+	recalc_sigpending();
+
+	ckpt_hdr_put(ctx, h);
+	return 0;
+}
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index 60d1116..d977d68 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -230,6 +230,12 @@ extern int ckpt_collect_sighand(struct ckpt_ctx *ctx, struct task_struct *t);
 extern int checkpoint_sighand(struct ckpt_ctx *ctx, void *ptr);
 extern void *restore_sighand(struct ckpt_ctx *ctx);
 
+extern int checkpoint_obj_signal(struct ckpt_ctx *ctx, struct task_struct *t);
+extern int restore_obj_signal(struct ckpt_ctx *ctx, int signal_objref);
+
+extern int checkpoint_task_signal(struct ckpt_ctx *ctx, struct task_struct *t);
+extern int restore_task_signal(struct ckpt_ctx *ctx);
+
 /* useful macros to copy fields and buffers to/from ckpt_hdr_xxx structures */
 #define CKPT_CPT 1
 #define CKPT_RST 2
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 4b85956..63afdc7 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -87,6 +87,8 @@ enum {
 	CKPT_HDR_IPC_SEM,
 
 	CKPT_HDR_SIGHAND = 601,
+	CKPT_HDR_SIGNAL,
+	CKPT_HDR_SIGNAL_TASK,
 
 	CKPT_HDR_TAIL = 9001,
 
@@ -115,6 +117,7 @@ enum obj_type {
 	CKPT_OBJ_FILE,
 	CKPT_OBJ_MM,
 	CKPT_OBJ_SIGHAND,
+	CKPT_OBJ_SIGNAL,
 	CKPT_OBJ_NS,
 	CKPT_OBJ_UTS_NS,
 	CKPT_OBJ_IPC_NS,
@@ -209,7 +212,6 @@ struct ckpt_hdr_task {
 	__u32 compat_robust_futex_list; /* a compat __user ptr */
 	__u32 robust_futex_head_len;
 	__u64 robust_futex_list; /* a __user ptr */
-
 } __attribute__((aligned(8)));
 
 /* Posix capabilities */
@@ -285,6 +287,7 @@ struct ckpt_hdr_task_objs {
 	__s32 files_objref;
 	__s32 mm_objref;
 	__s32 sighand_objref;
+	__s32 signal_objref;
 } __attribute__((aligned(8)));
 
 /* restart blocks */
@@ -427,6 +430,15 @@ struct ckpt_hdr_sighand {
 	struct ckpt_hdr_sigaction action[0];
 } __attribute__((aligned(8)));
 
+struct ckpt_hdr_signal {
+	struct ckpt_hdr h;
+} __attribute__((aligned(8)));
+
+struct ckpt_hdr_signal_task {
+	struct ckpt_hdr h;
+	struct ckpt_hdr_sigset blocked;
+} __attribute__((aligned(8)));
+
 /* ipc commons */
 struct ckpt_hdr_ipcns {
 	struct ckpt_hdr h;
-- 
1.6.0.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH] c/r: [signal 2/3] checkpoint/restart of rlimit
       [not found] ` <1248360514-20710-1-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
@ 2009-07-23 14:48   ` Oren Laadan
       [not found]     ` <1248360514-20710-2-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  2009-07-23 14:48   ` [PATCH] c/r: [signal 3/3] pending signals (private, shared) Oren Laadan
  1 sibling, 1 reply; 11+ messages in thread
From: Oren Laadan @ 2009-07-23 14:48 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

This patch adds checkpoint and restart of rlimit information
that is part of shared signal_struct.

Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
---
 checkpoint/checkpoint.c        |    2 ++
 checkpoint/restart.c           |    3 +++
 checkpoint/signal.c            |   27 +++++++++++++++++++++++----
 include/linux/checkpoint_hdr.h |   17 +++++++++++++++++
 include/linux/resource.h       |    4 ++++
 kernel/sys.c                   |   39 +++++++++++++++++++++++----------------
 6 files changed, 72 insertions(+), 20 deletions(-)

diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
index 1522e6f..64c2d4d 100644
--- a/checkpoint/checkpoint.c
+++ b/checkpoint/checkpoint.c
@@ -199,6 +199,8 @@ static void fill_kernel_const(struct ckpt_hdr_const *h)
 	h->uts_version_len = sizeof(uts->version);
 	h->uts_machine_len = sizeof(uts->machine);
 	h->uts_domainname_len = sizeof(uts->domainname);
+	/* rlimit */
+	h->rlimit_nlimits = RLIM_NLIMITS;
 }
 
 /* write the checkpoint header */
diff --git a/checkpoint/restart.c b/checkpoint/restart.c
index 65cafd9..e80df05 100644
--- a/checkpoint/restart.c
+++ b/checkpoint/restart.c
@@ -317,6 +317,9 @@ static int check_kernel_const(struct ckpt_hdr_const *h)
 		return -EINVAL;
 	if (h->uts_domainname_len != sizeof(uts->domainname))
 		return -EINVAL;
+	/* rlimit */
+	if (h->rlimit_nlimits != RLIM_NLIMITS)
+		return -EINVAL;
 
 	return 0;
 }
diff --git a/checkpoint/signal.c b/checkpoint/signal.c
index 70df7c4..b3f1d3e 100644
--- a/checkpoint/signal.c
+++ b/checkpoint/signal.c
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/errno.h>
+#include <linux/resource.h>
 #include <linux/checkpoint.h>
 #include <linux/checkpoint_hdr.h>
 
@@ -168,13 +169,22 @@ int restore_obj_sighand(struct ckpt_ctx *ctx, int sighand_objref)
 static int checkpoint_signal(struct ckpt_ctx *ctx, struct task_struct *t)
 {
 	struct ckpt_hdr_signal *h;
+	struct signal_struct *signal;
+	struct rlimit *rlim;
 	int ret;
 
 	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_SIGNAL);
 	if (!h)
 		return -ENOMEM;
 
-	/* fill in later */
+	signal = t->signal;
+	rlim = signal->rlim;
+
+	/* rlimit */
+	for (i = 0; i < RLIM_NLIMITS; i++) {
+		h->rlim[i].rlim_cur = rlim[i].rlim_cur;
+		h->rlim[i].rlim_max = rlim[i].rlim_max;
+	}
 
 	ret = ckpt_write_obj(ctx, &h->h);
 	ckpt_hdr_put(ctx, h);
@@ -190,15 +200,24 @@ int checkpoint_obj_signal(struct ckpt_ctx *ctx, struct task_struct *t)
 static int restore_signal(struct ckpt_ctx *ctx)
 {
 	struct ckpt_hdr_signal *h;
+	struct rlimit rlim;
+	int i, ret;
 
 	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_SIGNAL);
 	if (IS_ERR(h))
 		return PTR_ERR(h);
 
-	/* fill in later */
-
+	/* rlimit */
+	for (i = 0; i < RLIM_NLIMITS; i++) {
+		rlim.rlim_cur = h->rlim[i].rlim_cur;
+		rlim.rlim_max = h->rlim[i].rlim_max;
+		ret = do_setrlimit(i, &rlim);
+		if (ret < 0)
+			break;
+	}
+ out:
 	ckpt_hdr_put(ctx, h);
-	return 0;
+	return ret;
 }
 
 int restore_obj_signal(struct ckpt_ctx *ctx, int signal_objref)
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 63afdc7..0fd83b0 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -143,6 +143,8 @@ struct ckpt_hdr_const {
 	__u16 uts_version_len;
 	__u16 uts_machine_len;
 	__u16 uts_domainname_len;
+	/* rlimit */
+	__u16 rlimit_nlimits;
 } __attribute__((aligned(8)));
 
 /* checkpoint image header */
@@ -430,8 +432,23 @@ struct ckpt_hdr_sighand {
 	struct ckpt_hdr_sigaction action[0];
 } __attribute__((aligned(8)));
 
+struct ckpt_hdr_rlimit {
+	__u64 rlim_cur;
+	__u64 rlim_max;
+} __attribute__((aligned(8)));
+
+/* cannot include <linux/resource.h> from userspace, so define: */
+#define CKPT_RLIM_NLIMITS  16
+#ifdef __KERNEL__
+#include <linux/resource.h>
+#if CKPT_RLIM_NLIMITS != RLIM_NLIMITS
+#error CKPT_RLIM_NLIMIT size is wrong per asm-generic/resource.h
+#endif
+#endif
+
 struct ckpt_hdr_signal {
 	struct ckpt_hdr h;
+	struct ckpt_hdr_rlimit rlim[CKPT_RLIM_NLIMITS];
 } __attribute__((aligned(8)));
 
 struct ckpt_hdr_signal_task {
diff --git a/include/linux/resource.h b/include/linux/resource.h
index 40fc7e6..87e1bf3 100644
--- a/include/linux/resource.h
+++ b/include/linux/resource.h
@@ -72,4 +72,8 @@ struct rlimit {
 
 int getrusage(struct task_struct *p, int who, struct rusage __user *ru);
 
+#ifdef __KERNEL__
+extern int do_setrlimit(unsigned int resource, struct rlimit *rlim);
+#endif
+
 #endif
diff --git a/kernel/sys.c b/kernel/sys.c
index da4f9e0..80561c3 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1144,40 +1144,34 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
 
 #endif
 
-SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+int do_setrlimit(unsigned int resource, struct rlimit *new_rlim)
 {
-	struct rlimit new_rlim, *old_rlim;
+	struct rlimit *old_rlim;
 	int retval;
 
-	if (resource >= RLIM_NLIMITS)
-		return -EINVAL;
-	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
-		return -EFAULT;
-	if (new_rlim.rlim_cur > new_rlim.rlim_max)
-		return -EINVAL;
 	old_rlim = current->signal->rlim + resource;
-	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
+	if ((new_rlim->rlim_max > old_rlim->rlim_max) &&
 	    !capable(CAP_SYS_RESOURCE))
 		return -EPERM;
-	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
+	if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open)
 		return -EPERM;
 
-	retval = security_task_setrlimit(resource, &new_rlim);
+	retval = security_task_setrlimit(resource, new_rlim);
 	if (retval)
 		return retval;
 
-	if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
+	if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
 		/*
 		 * The caller is asking for an immediate RLIMIT_CPU
 		 * expiry.  But we use the zero value to mean "it was
 		 * never set".  So let's cheat and make it one second
 		 * instead
 		 */
-		new_rlim.rlim_cur = 1;
+		new_rlim->rlim_cur = 1;
 	}
 
 	task_lock(current->group_leader);
-	*old_rlim = new_rlim;
+	*old_rlim = *new_rlim;
 	task_unlock(current->group_leader);
 
 	if (resource != RLIMIT_CPU)
@@ -1189,14 +1183,27 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	 * very long-standing error, and fixing it now risks breakage of
 	 * applications, so we live with it
 	 */
-	if (new_rlim.rlim_cur == RLIM_INFINITY)
+	if (new_rlim->rlim_cur == RLIM_INFINITY)
 		goto out;
 
-	update_rlimit_cpu(new_rlim.rlim_cur);
+	update_rlimit_cpu(new_rlim->rlim_cur);
 out:
 	return 0;
 }
 
+SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+{
+	struct rlimit new_rlim;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		return -EFAULT;
+	if (new_rlim.rlim_cur > new_rlim.rlim_max)
+		return -EINVAL;
+	return do_setrlimit(resource, &new_rlim);
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After
-- 
1.6.0.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH] c/r: [signal 3/3] pending signals (private, shared)
       [not found] ` <1248360514-20710-1-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  2009-07-23 14:48   ` [PATCH] c/r: [signal 2/3] checkpoint/restart of rlimit Oren Laadan
@ 2009-07-23 14:48   ` Oren Laadan
       [not found]     ` <1248360514-20710-3-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  1 sibling, 1 reply; 11+ messages in thread
From: Oren Laadan @ 2009-07-23 14:48 UTC (permalink / raw)
  To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

This patch adds checkpoint and restart of pending signals queues:
struct sigpending, both per-task t->sigpending and shared (per-
thread-group) t->signal->shared_sigpending.

To checkpoint pending signals (private/shared) we first detach the
signal queue (and copy the mask) to a separate struct sigpending.
This separate structure can be iterated through without locking.

Once the state is saved, we re-attaches (prepends) the original signal
queue back to the original struct sigpending.

Signals that arrive(d) in the meantime will be suitably queued after
these (for real-time signals). Repeated non-realtime signals will not
be queued because they will already be marked in the pending mask,
that remains as is. This is the expected behavior of non-realtime
signals.

Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
---
 checkpoint/signal.c            |  255 +++++++++++++++++++++++++++++++++++++++-
 include/linux/checkpoint_hdr.h |   23 ++++
 2 files changed, 277 insertions(+), 1 deletions(-)

diff --git a/checkpoint/signal.c b/checkpoint/signal.c
index b3f1d3e..940cc4a 100644
--- a/checkpoint/signal.c
+++ b/checkpoint/signal.c
@@ -166,12 +166,99 @@ int restore_obj_sighand(struct ckpt_ctx *ctx, int sighand_objref)
  * signal checkpoint/restart
  */
 
+static void fill_siginfo(struct ckpt_hdr_siginfo *si, siginfo_t *info)
+{
+	si->signo = info->si_signo;
+	si->_errno = info->si_errno;
+	si->code = info->si_code;
+
+	/* TODO: convert info->si_uid to uid_objref */
+
+	switch(info->si_code & __SI_MASK) {
+	case __SI_TIMER:
+		si->pid = info->si_tid;
+		si->uid = info->si_overrun;
+		si->sigval_int = info->si_int;
+	si->utime = info->si_sys_private;
+		break;
+	case __SI_POLL:
+		si->pid = info->si_band;
+		si->sigval_int = info->si_fd;
+		break;
+	case __SI_FAULT:
+		si->sigval_ptr = (unsigned long) info->si_addr;
+#ifdef __ARCH_SI_TRAPNO
+		si->sigval_int = info->si_trapno;
+#endif
+		break;
+	case __SI_CHLD:
+		si->pid = info->si_pid;
+		si->uid = info->si_uid;
+		si->sigval_int = info->si_status;
+		si->stime = info->si_stime;
+		si->utime = info->si_utime;
+		break;
+	/*
+	 * case __SI_KILL:
+	 * case __SI_RT:
+	 * case __SI_MESGQ:
+	 */
+	default:
+		si->pid = info->si_pid;
+		si->uid = info->si_uid;
+		si->sigval_ptr = (unsigned long) info->si_ptr;
+		break;
+	}
+}
+
+/*
+ * To checkpoint pending signals (private/shared) the caller moves the
+ * signal queue (and copies the mask) to a separate struct sigpending,
+ * therefore we can iterate through it without locking.
+ * After we return, the caller re-attaches (prepends) the original
+ * signal queue to the original struct sigpending. Thus, signals that
+ * arrive(d) in the meantime will be suitably queued after these.
+ * Finally, repeated non-realtime signals will not be queued because
+ * they will already be marked in the pending mask, that remains as is.
+ * This is the expected behavior of non-realtime signals.
+ */
+static int checkpoint_sigpending(struct ckpt_ctx *ctx,
+				 struct sigpending *pending)
+{
+	struct ckpt_hdr_sigpending *h;
+	struct ckpt_hdr_siginfo *si;
+	struct sigqueue *q;
+	int nr_pending = 0;
+	int ret;
+
+	list_for_each_entry(q, &pending->list, list)
+		nr_pending++;
+	h = ckpt_hdr_get_type(ctx, nr_pending * sizeof(*si) + sizeof(*h),
+			      CKPT_HDR_SIGPENDING);
+	if (!h)
+		return -ENOMEM;
+
+	h->nr_pending = nr_pending;
+	fill_sigset(&h->signal, &pending->signal);
+
+	si = h->siginfo;
+	list_for_each_entry(q, &pending->list, list)
+		fill_siginfo(si++, &q->info);
+
+	ret = ckpt_write_obj(ctx, &h->h);
+	ckpt_hdr_put(ctx, h);
+
+	return ret;
+}
+
 static int checkpoint_signal(struct ckpt_ctx *ctx, struct task_struct *t)
 {
 	struct ckpt_hdr_signal *h;
 	struct signal_struct *signal;
+	struct sigpending shared_pending;
 	struct rlimit *rlim;
-	int ret;
+	unsigned long flags;
+	int i, ret;
 
 	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_SIGNAL);
 	if (!h)
@@ -180,13 +267,35 @@ static int checkpoint_signal(struct ckpt_ctx *ctx, struct task_struct *t)
 	signal = t->signal;
 	rlim = signal->rlim;
 
+	INIT_LIST_HEAD(&shared_pending.list);
+
+	/* temporarily borrow signal queue - see chekcpoint_sigpending() */
+	if (!lock_task_sighand(t, &flags)) {
+		pr_warning("c/r: [%d] without sighand\n", task_pid_vnr(t));
+		goto out;
+	}
+	list_splice_init(&signal->shared_pending.list, &shared_pending.list);
+	shared_pending.signal = signal->shared_pending.signal;
+
 	/* rlimit */
 	for (i = 0; i < RLIM_NLIMITS; i++) {
 		h->rlim[i].rlim_cur = rlim[i].rlim_cur;
 		h->rlim[i].rlim_max = rlim[i].rlim_max;
 	}
+	unlock_task_sighand(t, &flags);
 
 	ret = ckpt_write_obj(ctx, &h->h);
+	if (!ret)
+		ret = checkpoint_sigpending(ctx, &shared_pending);
+
+	/* return the borrowed queue */
+	if (!lock_task_sighand(t, &flags)) {
+		pr_warning("c/r: [%d] sighand disappeared\n", task_pid_vnr(t));
+		goto out;
+	}
+	list_splice(&shared_pending.list, &signal->shared_pending.list);
+	unlock_task_sighand(t, &flags);
+ out:
 	ckpt_hdr_put(ctx, h);
 	return ret;
 }
@@ -197,9 +306,102 @@ int checkpoint_obj_signal(struct ckpt_ctx *ctx, struct task_struct *t)
 	return checkpoint_signal(ctx, t);
 }
 
+static int load_siginfo(siginfo_t *info, struct ckpt_hdr_siginfo *si)
+{
+	if (!valid_signal(si->signo))
+		return -EINVAL;
+
+	info->si_signo = si->signo;
+	info->si_errno = si->_errno;
+	info->si_code = si->code;
+
+	/* TODO: validate remaining signal fields */
+
+	switch(info->si_code & __SI_MASK) {
+	case __SI_TIMER:
+		info->si_tid = si->pid;
+		info->si_overrun = si->uid;
+		info->si_int = si->sigval_int;
+		info->si_sys_private = si->utime;
+		break;
+	case __SI_POLL:
+		info->si_band = si->pid;
+		info->si_fd = si->sigval_int;
+		break;
+	case __SI_FAULT:
+		info->si_addr = (void __user *) (unsigned long) si->sigval_ptr;
+#ifdef __ARCH_SI_TRAPNO
+		info->si_trapno = si->sigval_int;
+#endif
+		break;
+	case __SI_CHLD:
+		info->si_pid = si->pid;
+		info->si_uid = si->uid;
+		info->si_status = si->sigval_int;
+		info->si_stime = si->stime;
+		info->si_utime = si->utime;
+		break;
+	case __SI_KILL:
+	case __SI_RT:
+	case __SI_MESGQ:
+		info->si_pid = si->pid;
+		info->si_uid = si->uid;
+		info->si_ptr = (void __user *) (unsigned long) si->sigval_ptr;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int restore_sigpending(struct ckpt_ctx *ctx, struct sigpending *pending)
+{
+	struct ckpt_hdr_sigpending *h;
+	struct ckpt_hdr_siginfo *si;
+	struct sigqueue *q, *n;
+	int ret = 0;
+
+	h = ckpt_read_buf_type(ctx, 0, CKPT_HDR_SIGPENDING);
+	if (IS_ERR(h))
+		return PTR_ERR(h);
+
+	INIT_LIST_HEAD(&pending->list);
+	load_sigset(&pending->signal, &h->signal);
+
+	si = h->siginfo;
+	while (h->nr_pending--) {
+		q = sigqueue_alloc();
+		if (!q) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		ret = load_siginfo(&q->info, si++);
+		if (ret < 0) {
+			sigqueue_free(q);
+			break;
+		}
+
+		list_add_tail(&pending->list, &q->list);
+	}
+
+	if (ret < 0) {
+		list_for_each_entry_safe(q, n, &pending->list, list) {
+			list_del_init(&q->list);
+			sigqueue_free(q);
+		}
+	}
+
+	ckpt_hdr_put(ctx, h);
+	return ret;
+}
+
 static int restore_signal(struct ckpt_ctx *ctx)
 {
 	struct ckpt_hdr_signal *h;
+	struct sigpending new_pending;
+	struct sigpending *pending;
 	struct rlimit rlim;
 	int i, ret;
 
@@ -215,6 +417,17 @@ static int restore_signal(struct ckpt_ctx *ctx)
 		if (ret < 0)
 			break;
 	}
+
+	ret = restore_sigpending(ctx, &new_pending);
+	if (ret < 0)
+		goto out;
+
+	spin_lock_irq(&current->sighand->siglock);
+	pending = &current->signal->shared_pending;
+	flush_sigqueue(pending);
+	pending->signal = new_pending.signal;
+	list_splice_init(&new_pending.list, &pending->list);
+	spin_unlock_irq(&current->sighand->siglock);
  out:
 	ckpt_hdr_put(ctx, h);
 	return ret;
@@ -250,8 +463,34 @@ int restore_obj_signal(struct ckpt_ctx *ctx, int signal_objref)
 int checkpoint_task_signal(struct ckpt_ctx *ctx, struct task_struct *t)
 {
 	struct ckpt_hdr_signal_task *h;
+	struct sigpending pending;
+	unsigned long flags;
 	int ret;
 
+	INIT_LIST_HEAD(&pending.list);
+
+	/* temporarily borrow signal queue - see chekcpoint_sigpending() */
+	if (!lock_task_sighand(t, &flags)) {
+		pr_warning("c/r: [%d] without sighand\n", task_pid_vnr(t));
+		return -EAGAIN;
+	}
+	list_splice_init(&t->pending.list, &pending.list);
+	pending.signal = t->pending.signal;
+	unlock_task_sighand(t, &flags);
+
+	ret = checkpoint_sigpending(ctx, &pending);
+
+	/* re-attach the borrowed queue */
+	if (!lock_task_sighand(t, &flags)) {
+		pr_warning("c/r: [%d] sighand disappeared\n", task_pid_vnr(t));
+		return -EAGAIN;
+	}
+	list_splice(&pending.list, &t->pending.list);
+	unlock_task_sighand(t, &flags);
+
+	if (ret < 0)
+		return ret;
+
 	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_SIGNAL_TASK);
 	if (!h)
 		return -ENOMEM;
@@ -266,7 +505,21 @@ int checkpoint_task_signal(struct ckpt_ctx *ctx, struct task_struct *t)
 int restore_task_signal(struct ckpt_ctx *ctx)
 {
 	struct ckpt_hdr_signal_task *h;
+	struct sigpending new_pending;
+	struct sigpending *pending;
 	sigset_t blocked;
+	int ret;
+
+	ret = restore_sigpending(ctx, &new_pending);
+	if (ret < 0)
+		return ret;
+
+	spin_lock_irq(&current->sighand->siglock);
+	pending = &current->pending;
+	flush_sigqueue(pending);
+	pending->signal = new_pending.signal;
+	list_splice_init(&new_pending.list, &pending->list);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_SIGNAL_TASK);
 	if (IS_ERR(h))
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 0fd83b0..9dcb160 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -89,6 +89,7 @@ enum {
 	CKPT_HDR_SIGHAND = 601,
 	CKPT_HDR_SIGNAL,
 	CKPT_HDR_SIGNAL_TASK,
+	CKPT_HDR_SIGPENDING,
 
 	CKPT_HDR_TAIL = 9001,
 
@@ -432,6 +433,28 @@ struct ckpt_hdr_sighand {
 	struct ckpt_hdr_sigaction action[0];
 } __attribute__((aligned(8)));
 
+#ifndef HAVE_ARCH_SIGINFO_T
+struct ckpt_hdr_siginfo {
+	__u32 signo;
+	__u32 _errno;
+	__u32 code;
+
+	__u32 pid;
+	__s32 uid;
+	__u32 sigval_int;
+	__u64 sigval_ptr;
+	__u64 utime;
+	__u64 stime;
+} __attribute__((aligned(8)));
+#endif
+
+struct ckpt_hdr_sigpending {
+	struct ckpt_hdr h;
+	__u32 nr_pending;
+	struct ckpt_hdr_sigset signal;
+	struct ckpt_hdr_siginfo siginfo[0];
+} __attribute__((aligned(8)));
+
 struct ckpt_hdr_rlimit {
 	__u64 rlim_cur;
 	__u64 rlim_max;
-- 
1.6.0.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] c/r: [signal 2/3] checkpoint/restart of rlimit
       [not found]     ` <1248360514-20710-2-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
@ 2009-07-23 22:28       ` Serge E. Hallyn
       [not found]         ` <20090723222825.GA23596-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 11+ messages in thread
From: Serge E. Hallyn @ 2009-07-23 22:28 UTC (permalink / raw)
  To: Oren Laadan; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> This patch adds checkpoint and restart of rlimit information
> that is part of shared signal_struct.

...

>  static int restore_signal(struct ckpt_ctx *ctx)
>  {
>  	struct ckpt_hdr_signal *h;
> +	struct rlimit rlim;
> +	int i, ret;
> 
>  	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_SIGNAL);
>  	if (IS_ERR(h))
>  		return PTR_ERR(h);
> 
> -	/* fill in later */
> -
> +	/* rlimit */
> +	for (i = 0; i < RLIM_NLIMITS; i++) {
> +		rlim.rlim_cur = h->rlim[i].rlim_cur;
> +		rlim.rlim_max = h->rlim[i].rlim_max;
> +		ret = do_setrlimit(i, &rlim);

...
> +int do_setrlimit(unsigned int resource, struct rlimit *new_rlim)
>  {
> -	struct rlimit new_rlim, *old_rlim;
> +	struct rlimit *old_rlim;
>  	int retval;
> 
> -	if (resource >= RLIM_NLIMITS)
> -		return -EINVAL;
> -	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
> -		return -EFAULT;
> -	if (new_rlim.rlim_cur > new_rlim.rlim_max)
> -		return -EINVAL;
>  	old_rlim = current->signal->rlim + resource;
> -	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
> +	if ((new_rlim->rlim_max > old_rlim->rlim_max) &&
>  	    !capable(CAP_SYS_RESOURCE))
>  		return -EPERM;
> -	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
> +	if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open)
>  		return -EPERM;
> 
> -	retval = security_task_setrlimit(resource, &new_rlim);
> +	retval = security_task_setrlimit(resource, new_rlim);
>  	if (retval)
>  		return retval;
> 
> -	if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
> +	if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
>  		/*
>  		 * The caller is asking for an immediate RLIMIT_CPU
>  		 * expiry.  But we use the zero value to mean "it was
>  		 * never set".  So let's cheat and make it one second
>  		 * instead
>  		 */
> -		new_rlim.rlim_cur = 1;
> +		new_rlim->rlim_cur = 1;
>  	}
> 
>  	task_lock(current->group_leader);
> -	*old_rlim = new_rlim;
> +	*old_rlim = *new_rlim;
>  	task_unlock(current->group_leader);
> 
>  	if (resource != RLIMIT_CPU)
> @@ -1189,14 +1183,27 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
>  	 * very long-standing error, and fixing it now risks breakage of
>  	 * applications, so we live with it
>  	 */
> -	if (new_rlim.rlim_cur == RLIM_INFINITY)
> +	if (new_rlim->rlim_cur == RLIM_INFINITY)
>  		goto out;
> 
> -	update_rlimit_cpu(new_rlim.rlim_cur);
> +	update_rlimit_cpu(new_rlim->rlim_cur);
>  out:
>  	return 0;
>  }
> 
> +SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
> +{
> +	struct rlimit new_rlim;
> +
> +	if (resource >= RLIM_NLIMITS)
> +		return -EINVAL;
> +	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
> +		return -EFAULT;
> +	if (new_rlim.rlim_cur > new_rlim.rlim_max)
> +		return -EINVAL;

Should the above check go into do_setrlimit()?  No sense trusting
the data sent to sys_checkpoint() any more than the data sent to
sys_setrlimit().

> +	return do_setrlimit(resource, &new_rlim);
> +}
> +
>  /*
>   * It would make sense to put struct rusage in the task_struct,
>   * except that would make the task_struct be *really big*.  After
> -- 
> 1.6.0.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] c/r: [signal 2/3] checkpoint/restart of rlimit
       [not found]         ` <20090723222825.GA23596-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-07-24  1:39           ` Oren Laadan
       [not found]             ` <4A6910E5.4010605-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  0 siblings, 1 reply; 11+ messages in thread
From: Oren Laadan @ 2009-07-24  1:39 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA



Serge E. Hallyn wrote:
> Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
>> This patch adds checkpoint and restart of rlimit information
>> that is part of shared signal_struct.
> 
> ...
> 
>>  static int restore_signal(struct ckpt_ctx *ctx)
>>  {
>>  	struct ckpt_hdr_signal *h;
>> +	struct rlimit rlim;
>> +	int i, ret;
>>
>>  	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_SIGNAL);
>>  	if (IS_ERR(h))
>>  		return PTR_ERR(h);
>>
>> -	/* fill in later */
>> -
>> +	/* rlimit */
>> +	for (i = 0; i < RLIM_NLIMITS; i++) {
>> +		rlim.rlim_cur = h->rlim[i].rlim_cur;
>> +		rlim.rlim_max = h->rlim[i].rlim_max;
>> +		ret = do_setrlimit(i, &rlim);
> 
> ...
>> +int do_setrlimit(unsigned int resource, struct rlimit *new_rlim)
>>  {
>> -	struct rlimit new_rlim, *old_rlim;
>> +	struct rlimit *old_rlim;
>>  	int retval;
>>
>> -	if (resource >= RLIM_NLIMITS)
>> -		return -EINVAL;
>> -	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
>> -		return -EFAULT;
>> -	if (new_rlim.rlim_cur > new_rlim.rlim_max)
>> -		return -EINVAL;
>>  	old_rlim = current->signal->rlim + resource;
>> -	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
>> +	if ((new_rlim->rlim_max > old_rlim->rlim_max) &&
>>  	    !capable(CAP_SYS_RESOURCE))
>>  		return -EPERM;
>> -	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
>> +	if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open)
>>  		return -EPERM;
>>
>> -	retval = security_task_setrlimit(resource, &new_rlim);
>> +	retval = security_task_setrlimit(resource, new_rlim);
>>  	if (retval)
>>  		return retval;
>>
>> -	if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
>> +	if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
>>  		/*
>>  		 * The caller is asking for an immediate RLIMIT_CPU
>>  		 * expiry.  But we use the zero value to mean "it was
>>  		 * never set".  So let's cheat and make it one second
>>  		 * instead
>>  		 */
>> -		new_rlim.rlim_cur = 1;
>> +		new_rlim->rlim_cur = 1;
>>  	}
>>
>>  	task_lock(current->group_leader);
>> -	*old_rlim = new_rlim;
>> +	*old_rlim = *new_rlim;
>>  	task_unlock(current->group_leader);
>>
>>  	if (resource != RLIMIT_CPU)
>> @@ -1189,14 +1183,27 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
>>  	 * very long-standing error, and fixing it now risks breakage of
>>  	 * applications, so we live with it
>>  	 */
>> -	if (new_rlim.rlim_cur == RLIM_INFINITY)
>> +	if (new_rlim->rlim_cur == RLIM_INFINITY)
>>  		goto out;
>>
>> -	update_rlimit_cpu(new_rlim.rlim_cur);
>> +	update_rlimit_cpu(new_rlim->rlim_cur);
>>  out:
>>  	return 0;
>>  }
>>
>> +SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
>> +{
>> +	struct rlimit new_rlim;
>> +
>> +	if (resource >= RLIM_NLIMITS)
>> +		return -EINVAL;
>> +	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
>> +		return -EFAULT;
>> +	if (new_rlim.rlim_cur > new_rlim.rlim_max)
>> +		return -EINVAL;
> 
> Should the above check go into do_setrlimit()?  No sense trusting
> the data sent to sys_checkpoint() any more than the data sent to
> sys_setrlimit().

You are very correct.

I wonder though: moving the first check will change the order of
input sanitizing, which will change the syscall behavior on bad
input. E.g, setrlimit(4096, NULL) used to return EINVAL but now
will return EFAULT.

Not that I really care that much, but I've seen a similar case
that confused LTP scripts into seeing the "wrong" error from a
syscall and failing a test.

Oren.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] c/r: [signal 3/3] pending signals (private, shared)
       [not found]     ` <1248360514-20710-3-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
@ 2009-07-24 12:36       ` Louis Rilling
       [not found]         ` <20090724123629.GH11101-Hu8+6S1rdjywhHL9vcZdMVaTQe2KTcn/@public.gmane.org>
  2009-07-24 13:13       ` Louis Rilling
  1 sibling, 1 reply; 11+ messages in thread
From: Louis Rilling @ 2009-07-24 12:36 UTC (permalink / raw)
  To: Oren Laadan; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA


[-- Attachment #1.1: Type: text/plain, Size: 2624 bytes --]

Hi Oren,

On 23/07/09 10:48 -0400, Oren Laadan wrote:
> This patch adds checkpoint and restart of pending signals queues:
> struct sigpending, both per-task t->sigpending and shared (per-
> thread-group) t->signal->shared_sigpending.
> 
> To checkpoint pending signals (private/shared) we first detach the
> signal queue (and copy the mask) to a separate struct sigpending.
> This separate structure can be iterated through without locking.
> 
> Once the state is saved, we re-attaches (prepends) the original signal
> queue back to the original struct sigpending.
> 
> Signals that arrive(d) in the meantime will be suitably queued after
> these (for real-time signals). Repeated non-realtime signals will not
> be queued because they will already be marked in the pending mask,
> that remains as is. This is the expected behavior of non-realtime
> signals.
> 
> Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
> ---
>  checkpoint/signal.c            |  255 +++++++++++++++++++++++++++++++++++++++-
>  include/linux/checkpoint_hdr.h |   23 ++++
>  2 files changed, 277 insertions(+), 1 deletions(-)
> 
> diff --git a/checkpoint/signal.c b/checkpoint/signal.c
> index b3f1d3e..940cc4a 100644
> --- a/checkpoint/signal.c
> +++ b/checkpoint/signal.c

[...]

> +
> +static int restore_sigpending(struct ckpt_ctx *ctx, struct sigpending *pending)
> +{
> +	struct ckpt_hdr_sigpending *h;
> +	struct ckpt_hdr_siginfo *si;
> +	struct sigqueue *q, *n;
> +	int ret = 0;
> +
> +	h = ckpt_read_buf_type(ctx, 0, CKPT_HDR_SIGPENDING);
> +	if (IS_ERR(h))
> +		return PTR_ERR(h);
> +
> +	INIT_LIST_HEAD(&pending->list);
> +	load_sigset(&pending->signal, &h->signal);
> +
> +	si = h->siginfo;
> +	while (h->nr_pending--) {
> +		q = sigqueue_alloc();

This introduces a memory leak, since collect_signal() won't free q. Better to
use __sigqueue_alloc() here, or clear the SIGQUEUE_PREALLOC flag.

Thanks,

Louis

> +		if (!q) {
> +			ret = -ENOMEM;
> +			break;
> +		}
> +
> +		ret = load_siginfo(&q->info, si++);
> +		if (ret < 0) {
> +			sigqueue_free(q);
> +			break;
> +		}
> +
> +		list_add_tail(&pending->list, &q->list);
> +	}
> +
> +	if (ret < 0) {
> +		list_for_each_entry_safe(q, n, &pending->list, list) {
> +			list_del_init(&q->list);
> +			sigqueue_free(q);
> +		}
> +	}
> +
> +	ckpt_hdr_put(ctx, h);
> +	return ret;
> +}
> +

[...]

-- 
Dr Louis Rilling			Kerlabs
Skype: louis.rilling			Batiment Germanium
Phone: (+33|0) 6 80 89 08 23		80 avenue des Buttes de Coesmes
http://www.kerlabs.com/			35700 Rennes

[-- Attachment #1.2: Digital signature --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 206 bytes --]

_______________________________________________
Containers mailing list
Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
https://lists.linux-foundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] c/r: [signal 3/3] pending signals (private, shared)
       [not found]         ` <20090724123629.GH11101-Hu8+6S1rdjywhHL9vcZdMVaTQe2KTcn/@public.gmane.org>
@ 2009-07-24 12:59           ` Oren Laadan
  0 siblings, 0 replies; 11+ messages in thread
From: Oren Laadan @ 2009-07-24 12:59 UTC (permalink / raw)
  To: Oren Laadan,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA



Louis Rilling wrote:
> Hi Oren,
> 
> On 23/07/09 10:48 -0400, Oren Laadan wrote:
>> This patch adds checkpoint and restart of pending signals queues:
>> struct sigpending, both per-task t->sigpending and shared (per-
>> thread-group) t->signal->shared_sigpending.
>>
>> To checkpoint pending signals (private/shared) we first detach the
>> signal queue (and copy the mask) to a separate struct sigpending.
>> This separate structure can be iterated through without locking.
>>
>> Once the state is saved, we re-attaches (prepends) the original signal
>> queue back to the original struct sigpending.
>>
>> Signals that arrive(d) in the meantime will be suitably queued after
>> these (for real-time signals). Repeated non-realtime signals will not
>> be queued because they will already be marked in the pending mask,
>> that remains as is. This is the expected behavior of non-realtime
>> signals.
>>
>> Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
>> ---
>>  checkpoint/signal.c            |  255 +++++++++++++++++++++++++++++++++++++++-
>>  include/linux/checkpoint_hdr.h |   23 ++++
>>  2 files changed, 277 insertions(+), 1 deletions(-)
>>
>> diff --git a/checkpoint/signal.c b/checkpoint/signal.c
>> index b3f1d3e..940cc4a 100644
>> --- a/checkpoint/signal.c
>> +++ b/checkpoint/signal.c
> 
> [...]
> 
>> +
>> +static int restore_sigpending(struct ckpt_ctx *ctx, struct sigpending *pending)
>> +{
>> +	struct ckpt_hdr_sigpending *h;
>> +	struct ckpt_hdr_siginfo *si;
>> +	struct sigqueue *q, *n;
>> +	int ret = 0;
>> +
>> +	h = ckpt_read_buf_type(ctx, 0, CKPT_HDR_SIGPENDING);
>> +	if (IS_ERR(h))
>> +		return PTR_ERR(h);
>> +
>> +	INIT_LIST_HEAD(&pending->list);
>> +	load_sigset(&pending->signal, &h->signal);
>> +
>> +	si = h->siginfo;
>> +	while (h->nr_pending--) {
>> +		q = sigqueue_alloc();
> 
> This introduces a memory leak, since collect_signal() won't free q. Better to
> use __sigqueue_alloc() here, or clear the SIGQUEUE_PREALLOC flag.
> 

You're totally right.

I thought I needed it because of sigqueue_free() below, but in fact
it should be __sigqueue_alloc() and __sigqueue_free().

Thanks,

Oren.

> Thanks,
> 
> Louis
> 
>> +		if (!q) {
>> +			ret = -ENOMEM;
>> +			break;
>> +		}
>> +
>> +		ret = load_siginfo(&q->info, si++);
>> +		if (ret < 0) {
>> +			sigqueue_free(q);
>> +			break;
>> +		}
>> +
>> +		list_add_tail(&pending->list, &q->list);
>> +	}
>> +
>> +	if (ret < 0) {
>> +		list_for_each_entry_safe(q, n, &pending->list, list) {
>> +			list_del_init(&q->list);
>> +			sigqueue_free(q);
>> +		}
>> +	}
>> +
>> +	ckpt_hdr_put(ctx, h);
>> +	return ret;
>> +}
>> +
> 
> [...]
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] c/r: [signal 3/3] pending signals (private, shared)
       [not found]     ` <1248360514-20710-3-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  2009-07-24 12:36       ` Louis Rilling
@ 2009-07-24 13:13       ` Louis Rilling
  1 sibling, 0 replies; 11+ messages in thread
From: Louis Rilling @ 2009-07-24 13:13 UTC (permalink / raw)
  To: Oren Laadan; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA


[-- Attachment #1.1: Type: text/plain, Size: 4348 bytes --]

On 23/07/09 10:48 -0400, Oren Laadan wrote:
> This patch adds checkpoint and restart of pending signals queues:
> struct sigpending, both per-task t->sigpending and shared (per-
> thread-group) t->signal->shared_sigpending.
> 
> To checkpoint pending signals (private/shared) we first detach the
> signal queue (and copy the mask) to a separate struct sigpending.
> This separate structure can be iterated through without locking.
> 
> Once the state is saved, we re-attaches (prepends) the original signal
> queue back to the original struct sigpending.
> 
> Signals that arrive(d) in the meantime will be suitably queued after
> these (for real-time signals). Repeated non-realtime signals will not
> be queued because they will already be marked in the pending mask,
> that remains as is. This is the expected behavior of non-realtime
> signals.
> 
> Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
> ---
>  checkpoint/signal.c            |  255 +++++++++++++++++++++++++++++++++++++++-
>  include/linux/checkpoint_hdr.h |   23 ++++
>  2 files changed, 277 insertions(+), 1 deletions(-)
> 
> diff --git a/checkpoint/signal.c b/checkpoint/signal.c
> index b3f1d3e..940cc4a 100644
> --- a/checkpoint/signal.c
> +++ b/checkpoint/signal.c
> @@ -166,12 +166,99 @@ int restore_obj_sighand(struct ckpt_ctx *ctx, int sighand_objref)
>   * signal checkpoint/restart
>   */
>  
> +static void fill_siginfo(struct ckpt_hdr_siginfo *si, siginfo_t *info)
> +{
> +	si->signo = info->si_signo;
> +	si->_errno = info->si_errno;
> +	si->code = info->si_code;
> +
> +	/* TODO: convert info->si_uid to uid_objref */
> +
> +	switch(info->si_code & __SI_MASK) {
> +	case __SI_TIMER:
> +		si->pid = info->si_tid;
> +		si->uid = info->si_overrun;
> +		si->sigval_int = info->si_int;
> +	si->utime = info->si_sys_private;
> +		break;
> +	case __SI_POLL:
> +		si->pid = info->si_band;
> +		si->sigval_int = info->si_fd;
> +		break;
> +	case __SI_FAULT:
> +		si->sigval_ptr = (unsigned long) info->si_addr;
> +#ifdef __ARCH_SI_TRAPNO
> +		si->sigval_int = info->si_trapno;
> +#endif
> +		break;
> +	case __SI_CHLD:
> +		si->pid = info->si_pid;
> +		si->uid = info->si_uid;
> +		si->sigval_int = info->si_status;
> +		si->stime = info->si_stime;
> +		si->utime = info->si_utime;
> +		break;
> +	/*
> +	 * case __SI_KILL:
> +	 * case __SI_RT:
> +	 * case __SI_MESGQ:
> +	 */
> +	default:
> +		si->pid = info->si_pid;
> +		si->uid = info->si_uid;
> +		si->sigval_ptr = (unsigned long) info->si_ptr;
> +		break;
> +	}
> +}
> +
> +/*
> + * To checkpoint pending signals (private/shared) the caller moves the
> + * signal queue (and copies the mask) to a separate struct sigpending,
> + * therefore we can iterate through it without locking.
> + * After we return, the caller re-attaches (prepends) the original
> + * signal queue to the original struct sigpending. Thus, signals that
> + * arrive(d) in the meantime will be suitably queued after these.
> + * Finally, repeated non-realtime signals will not be queued because
> + * they will already be marked in the pending mask, that remains as is.
> + * This is the expected behavior of non-realtime signals.
> + */
> +static int checkpoint_sigpending(struct ckpt_ctx *ctx,
> +				 struct sigpending *pending)
> +{
> +	struct ckpt_hdr_sigpending *h;
> +	struct ckpt_hdr_siginfo *si;
> +	struct sigqueue *q;
> +	int nr_pending = 0;
> +	int ret;
> +
> +	list_for_each_entry(q, &pending->list, list)
> +		nr_pending++;
> +	h = ckpt_hdr_get_type(ctx, nr_pending * sizeof(*si) + sizeof(*h),
> +			      CKPT_HDR_SIGPENDING);
> +	if (!h)
> +		return -ENOMEM;
> +
> +	h->nr_pending = nr_pending;
> +	fill_sigset(&h->signal, &pending->signal);
> +
> +	si = h->siginfo;
> +	list_for_each_entry(q, &pending->list, list)
> +		fill_siginfo(si++, &q->info);

If q has SIGQUEUE_PREALLOC, we should probably abort the checkpoint, until POSIX
timers (only users AFAIK) are supported.

Thanks,

Louis

> +
> +	ret = ckpt_write_obj(ctx, &h->h);
> +	ckpt_hdr_put(ctx, h);
> +
> +	return ret;
> +}
> +

[...]

-- 
Dr Louis Rilling			Kerlabs
Skype: louis.rilling			Batiment Germanium
Phone: (+33|0) 6 80 89 08 23		80 avenue des Buttes de Coesmes
http://www.kerlabs.com/			35700 Rennes

[-- Attachment #1.2: Digital signature --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 206 bytes --]

_______________________________________________
Containers mailing list
Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
https://lists.linux-foundation.org/mailman/listinfo/containers

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] c/r: [signal 2/3] checkpoint/restart of rlimit
       [not found]             ` <4A6910E5.4010605-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
@ 2009-07-24 14:22               ` Serge E. Hallyn
       [not found]                 ` <20090724142235.GB6910-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-07-24 20:04               ` Matt Helsley
  1 sibling, 1 reply; 11+ messages in thread
From: Serge E. Hallyn @ 2009-07-24 14:22 UTC (permalink / raw)
  To: Oren Laadan; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> 
> 
> Serge E. Hallyn wrote:
> > Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> >> This patch adds checkpoint and restart of rlimit information
> >> that is part of shared signal_struct.
> > 
> > ...
> > 
> >>  static int restore_signal(struct ckpt_ctx *ctx)
> >>  {
> >>  	struct ckpt_hdr_signal *h;
> >> +	struct rlimit rlim;
> >> +	int i, ret;
> >>
> >>  	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_SIGNAL);
> >>  	if (IS_ERR(h))
> >>  		return PTR_ERR(h);
> >>
> >> -	/* fill in later */
> >> -
> >> +	/* rlimit */
> >> +	for (i = 0; i < RLIM_NLIMITS; i++) {
> >> +		rlim.rlim_cur = h->rlim[i].rlim_cur;
> >> +		rlim.rlim_max = h->rlim[i].rlim_max;
> >> +		ret = do_setrlimit(i, &rlim);
> > 
> > ...
> >> +int do_setrlimit(unsigned int resource, struct rlimit *new_rlim)
> >>  {
> >> -	struct rlimit new_rlim, *old_rlim;
> >> +	struct rlimit *old_rlim;
> >>  	int retval;
> >>
> >> -	if (resource >= RLIM_NLIMITS)
> >> -		return -EINVAL;
> >> -	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
> >> -		return -EFAULT;
> >> -	if (new_rlim.rlim_cur > new_rlim.rlim_max)
> >> -		return -EINVAL;
> >>  	old_rlim = current->signal->rlim + resource;
> >> -	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
> >> +	if ((new_rlim->rlim_max > old_rlim->rlim_max) &&
> >>  	    !capable(CAP_SYS_RESOURCE))
> >>  		return -EPERM;
> >> -	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
> >> +	if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open)
> >>  		return -EPERM;
> >>
> >> -	retval = security_task_setrlimit(resource, &new_rlim);
> >> +	retval = security_task_setrlimit(resource, new_rlim);
> >>  	if (retval)
> >>  		return retval;
> >>
> >> -	if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
> >> +	if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
> >>  		/*
> >>  		 * The caller is asking for an immediate RLIMIT_CPU
> >>  		 * expiry.  But we use the zero value to mean "it was
> >>  		 * never set".  So let's cheat and make it one second
> >>  		 * instead
> >>  		 */
> >> -		new_rlim.rlim_cur = 1;
> >> +		new_rlim->rlim_cur = 1;
> >>  	}
> >>
> >>  	task_lock(current->group_leader);
> >> -	*old_rlim = new_rlim;
> >> +	*old_rlim = *new_rlim;
> >>  	task_unlock(current->group_leader);
> >>
> >>  	if (resource != RLIMIT_CPU)
> >> @@ -1189,14 +1183,27 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
> >>  	 * very long-standing error, and fixing it now risks breakage of
> >>  	 * applications, so we live with it
> >>  	 */
> >> -	if (new_rlim.rlim_cur == RLIM_INFINITY)
> >> +	if (new_rlim->rlim_cur == RLIM_INFINITY)
> >>  		goto out;
> >>
> >> -	update_rlimit_cpu(new_rlim.rlim_cur);
> >> +	update_rlimit_cpu(new_rlim->rlim_cur);
> >>  out:
> >>  	return 0;
> >>  }
> >>
> >> +SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
> >> +{
> >> +	struct rlimit new_rlim;
> >> +
> >> +	if (resource >= RLIM_NLIMITS)
> >> +		return -EINVAL;
> >> +	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
> >> +		return -EFAULT;
> >> +	if (new_rlim.rlim_cur > new_rlim.rlim_max)
> >> +		return -EINVAL;
> > 
> > Should the above check go into do_setrlimit()?  No sense trusting
> > the data sent to sys_checkpoint() any more than the data sent to
> > sys_setrlimit().
> 
> You are very correct.
> 
> I wonder though: moving the first check will change the order of
> input sanitizing, which will change the syscall behavior on bad
> input. E.g, setrlimit(4096, NULL) used to return EINVAL but now
> will return EFAULT.
> 
> Not that I really care that much, but I've seen a similar case
> that confused LTP scripts into seeing the "wrong" error from a
> syscall and failing a test.

Heh, I could be wrong, but when you mess up 2 ways, I don't think the kernel
needs to guarantee which one you'll be warned about :)  Of course there are
cases where that is well-defined (i.e. DAC-before-MAC).  Maybe we should ask at
linux-api?

Putting the same check before both callers of do_setrlimit() isn't *that*
bad, and I suppose we can put a comment above do_setrlimit() saying that
that any new callers need to do that check themselves...

-serge

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] c/r: [signal 2/3] checkpoint/restart of rlimit
       [not found]                 ` <20090724142235.GB6910-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-07-24 17:11                   ` Oren Laadan
  0 siblings, 0 replies; 11+ messages in thread
From: Oren Laadan @ 2009-07-24 17:11 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA



Serge E. Hallyn wrote:
> Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
>>
>> Serge E. Hallyn wrote:
>>> Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
>>>> This patch adds checkpoint and restart of rlimit information
>>>> that is part of shared signal_struct.
>>> ...
>>>
>>>>  static int restore_signal(struct ckpt_ctx *ctx)
>>>>  {
>>>>  	struct ckpt_hdr_signal *h;
>>>> +	struct rlimit rlim;
>>>> +	int i, ret;
>>>>
>>>>  	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_SIGNAL);
>>>>  	if (IS_ERR(h))
>>>>  		return PTR_ERR(h);
>>>>
>>>> -	/* fill in later */
>>>> -
>>>> +	/* rlimit */
>>>> +	for (i = 0; i < RLIM_NLIMITS; i++) {
>>>> +		rlim.rlim_cur = h->rlim[i].rlim_cur;
>>>> +		rlim.rlim_max = h->rlim[i].rlim_max;
>>>> +		ret = do_setrlimit(i, &rlim);
>>> ...
>>>> +int do_setrlimit(unsigned int resource, struct rlimit *new_rlim)
>>>>  {
>>>> -	struct rlimit new_rlim, *old_rlim;
>>>> +	struct rlimit *old_rlim;
>>>>  	int retval;
>>>>
>>>> -	if (resource >= RLIM_NLIMITS)
>>>> -		return -EINVAL;
>>>> -	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
>>>> -		return -EFAULT;
>>>> -	if (new_rlim.rlim_cur > new_rlim.rlim_max)
>>>> -		return -EINVAL;
>>>>  	old_rlim = current->signal->rlim + resource;
>>>> -	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
>>>> +	if ((new_rlim->rlim_max > old_rlim->rlim_max) &&
>>>>  	    !capable(CAP_SYS_RESOURCE))
>>>>  		return -EPERM;
>>>> -	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
>>>> +	if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open)
>>>>  		return -EPERM;
>>>>
>>>> -	retval = security_task_setrlimit(resource, &new_rlim);
>>>> +	retval = security_task_setrlimit(resource, new_rlim);
>>>>  	if (retval)
>>>>  		return retval;
>>>>
>>>> -	if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
>>>> +	if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
>>>>  		/*
>>>>  		 * The caller is asking for an immediate RLIMIT_CPU
>>>>  		 * expiry.  But we use the zero value to mean "it was
>>>>  		 * never set".  So let's cheat and make it one second
>>>>  		 * instead
>>>>  		 */
>>>> -		new_rlim.rlim_cur = 1;
>>>> +		new_rlim->rlim_cur = 1;
>>>>  	}
>>>>
>>>>  	task_lock(current->group_leader);
>>>> -	*old_rlim = new_rlim;
>>>> +	*old_rlim = *new_rlim;
>>>>  	task_unlock(current->group_leader);
>>>>
>>>>  	if (resource != RLIMIT_CPU)
>>>> @@ -1189,14 +1183,27 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
>>>>  	 * very long-standing error, and fixing it now risks breakage of
>>>>  	 * applications, so we live with it
>>>>  	 */
>>>> -	if (new_rlim.rlim_cur == RLIM_INFINITY)
>>>> +	if (new_rlim->rlim_cur == RLIM_INFINITY)
>>>>  		goto out;
>>>>
>>>> -	update_rlimit_cpu(new_rlim.rlim_cur);
>>>> +	update_rlimit_cpu(new_rlim->rlim_cur);
>>>>  out:
>>>>  	return 0;
>>>>  }
>>>>
>>>> +SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
>>>> +{
>>>> +	struct rlimit new_rlim;
>>>> +
>>>> +	if (resource >= RLIM_NLIMITS)
>>>> +		return -EINVAL;
>>>> +	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
>>>> +		return -EFAULT;
>>>> +	if (new_rlim.rlim_cur > new_rlim.rlim_max)
>>>> +		return -EINVAL;
>>> Should the above check go into do_setrlimit()?  No sense trusting
>>> the data sent to sys_checkpoint() any more than the data sent to
>>> sys_setrlimit().
>> You are very correct.
>>
>> I wonder though: moving the first check will change the order of
>> input sanitizing, which will change the syscall behavior on bad
>> input. E.g, setrlimit(4096, NULL) used to return EINVAL but now
>> will return EFAULT.
>>
>> Not that I really care that much, but I've seen a similar case
>> that confused LTP scripts into seeing the "wrong" error from a
>> syscall and failing a test.
> 
> Heh, I could be wrong, but when you mess up 2 ways, I don't think the kernel
> needs to guarantee which one you'll be warned about :)  Of course there are
> cases where that is well-defined (i.e. DAC-before-MAC).  Maybe we should ask at
> linux-api?

I totally agree with you - I don't think it's an API issue.

I only wonder whether this would cause an LTP test or a libc test
to fail (because it expected one error and got another). Of course,
it would be a false negative, but would still happen.

Oh, well .. I'll just assume it doesn't break anything unless it
is proved wrong :p

Oren.

> 
> Putting the same check before both callers of do_setrlimit() isn't *that*
> bad, and I suppose we can put a comment above do_setrlimit() saying that
> that any new callers need to do that check themselves...
> 
> -serge

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] c/r: [signal 2/3] checkpoint/restart of rlimit
       [not found]             ` <4A6910E5.4010605-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  2009-07-24 14:22               ` Serge E. Hallyn
@ 2009-07-24 20:04               ` Matt Helsley
  1 sibling, 0 replies; 11+ messages in thread
From: Matt Helsley @ 2009-07-24 20:04 UTC (permalink / raw)
  To: Oren Laadan; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

On Thu, Jul 23, 2009 at 09:39:49PM -0400, Oren Laadan wrote:
> 
> 
> Serge E. Hallyn wrote:
> > Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> >> This patch adds checkpoint and restart of rlimit information
> >> that is part of shared signal_struct.
> > 
> > ...
> > 
> >>  static int restore_signal(struct ckpt_ctx *ctx)
> >>  {
> >>  	struct ckpt_hdr_signal *h;
> >> +	struct rlimit rlim;
> >> +	int i, ret;
> >>
> >>  	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_SIGNAL);
> >>  	if (IS_ERR(h))
> >>  		return PTR_ERR(h);
> >>
> >> -	/* fill in later */
> >> -
> >> +	/* rlimit */
> >> +	for (i = 0; i < RLIM_NLIMITS; i++) {
> >> +		rlim.rlim_cur = h->rlim[i].rlim_cur;
> >> +		rlim.rlim_max = h->rlim[i].rlim_max;
> >> +		ret = do_setrlimit(i, &rlim);
> > 
> > ...
> >> +int do_setrlimit(unsigned int resource, struct rlimit *new_rlim)
> >>  {
> >> -	struct rlimit new_rlim, *old_rlim;
> >> +	struct rlimit *old_rlim;
> >>  	int retval;
> >>
> >> -	if (resource >= RLIM_NLIMITS)
> >> -		return -EINVAL;
> >> -	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
> >> -		return -EFAULT;
> >> -	if (new_rlim.rlim_cur > new_rlim.rlim_max)
> >> -		return -EINVAL;
> >>  	old_rlim = current->signal->rlim + resource;
> >> -	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
> >> +	if ((new_rlim->rlim_max > old_rlim->rlim_max) &&
> >>  	    !capable(CAP_SYS_RESOURCE))
> >>  		return -EPERM;
> >> -	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
> >> +	if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open)
> >>  		return -EPERM;
> >>
> >> -	retval = security_task_setrlimit(resource, &new_rlim);
> >> +	retval = security_task_setrlimit(resource, new_rlim);
> >>  	if (retval)
> >>  		return retval;
> >>
> >> -	if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
> >> +	if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
> >>  		/*
> >>  		 * The caller is asking for an immediate RLIMIT_CPU
> >>  		 * expiry.  But we use the zero value to mean "it was
> >>  		 * never set".  So let's cheat and make it one second
> >>  		 * instead
> >>  		 */
> >> -		new_rlim.rlim_cur = 1;
> >> +		new_rlim->rlim_cur = 1;
> >>  	}
> >>
> >>  	task_lock(current->group_leader);
> >> -	*old_rlim = new_rlim;
> >> +	*old_rlim = *new_rlim;
> >>  	task_unlock(current->group_leader);
> >>
> >>  	if (resource != RLIMIT_CPU)
> >> @@ -1189,14 +1183,27 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
> >>  	 * very long-standing error, and fixing it now risks breakage of
> >>  	 * applications, so we live with it
> >>  	 */
> >> -	if (new_rlim.rlim_cur == RLIM_INFINITY)
> >> +	if (new_rlim->rlim_cur == RLIM_INFINITY)
> >>  		goto out;
> >>
> >> -	update_rlimit_cpu(new_rlim.rlim_cur);
> >> +	update_rlimit_cpu(new_rlim->rlim_cur);
> >>  out:
> >>  	return 0;
> >>  }
> >>
> >> +SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
> >> +{
> >> +	struct rlimit new_rlim;
> >> +
> >> +	if (resource >= RLIM_NLIMITS)
> >> +		return -EINVAL;
> >> +	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
> >> +		return -EFAULT;
> >> +	if (new_rlim.rlim_cur > new_rlim.rlim_max)
> >> +		return -EINVAL;
> > 
> > Should the above check go into do_setrlimit()?  No sense trusting
> > the data sent to sys_checkpoint() any more than the data sent to
> > sys_setrlimit().
> 
> You are very correct.
> 
> I wonder though: moving the first check will change the order of
> input sanitizing, which will change the syscall behavior on bad
> input. E.g, setrlimit(4096, NULL) used to return EINVAL but now
> will return EFAULT.
> 
> Not that I really care that much, but I've seen a similar case
> that confused LTP scripts into seeing the "wrong" error from a
> syscall and failing a test.

Unless the syscall documentation (if any) defines an order it checks
things I think such a test is broken. If multiple "things" are wrong with
a syscall then userspace should expect an error for any one of them.

Cheers,
	-Matt

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2009-07-24 20:04 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-07-23 14:48 [PATCH] c/r: [signal 1/3] blocked and template for shared signals Oren Laadan
     [not found] ` <1248360514-20710-1-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-07-23 14:48   ` [PATCH] c/r: [signal 2/3] checkpoint/restart of rlimit Oren Laadan
     [not found]     ` <1248360514-20710-2-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-07-23 22:28       ` Serge E. Hallyn
     [not found]         ` <20090723222825.GA23596-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-07-24  1:39           ` Oren Laadan
     [not found]             ` <4A6910E5.4010605-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-07-24 14:22               ` Serge E. Hallyn
     [not found]                 ` <20090724142235.GB6910-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-07-24 17:11                   ` Oren Laadan
2009-07-24 20:04               ` Matt Helsley
2009-07-23 14:48   ` [PATCH] c/r: [signal 3/3] pending signals (private, shared) Oren Laadan
     [not found]     ` <1248360514-20710-3-git-send-email-orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-07-24 12:36       ` Louis Rilling
     [not found]         ` <20090724123629.GH11101-Hu8+6S1rdjywhHL9vcZdMVaTQe2KTcn/@public.gmane.org>
2009-07-24 12:59           ` Oren Laadan
2009-07-24 13:13       ` Louis Rilling

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.