* [RFC][PATCH][v2] Define/use siginfo_from_ancestor_ns()
@ 2008-11-15 21:21 Sukadev Bhattiprolu
2008-11-18 17:53 ` Oleg Nesterov
2008-11-19 2:28 ` Sukadev Bhattiprolu
0 siblings, 2 replies; 8+ messages in thread
From: Sukadev Bhattiprolu @ 2008-11-15 21:21 UTC (permalink / raw)
To: oleg, ebiederm; +Cc: daniel, xemul, containers, linux-kernel
Oleg,
I tried to address most of your comments from the earlier version.
Sukadev
---
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Subject: [PATCH] Define/use siginfo_from_ancestor_ns()
Container-init must appear like 'global-init' to processes within the
container and hence it must be immune to fatal signals from within the
container. But container-init should appear like a normal process to
processes in ancestor namespaces and so if same fatal signal is sent
by a process in parent namespace, the container-init must terminate.
There have been several attempts to meet these conflicting requirements
This patch (tries to) implement the approach suggested recently by Oleg
Nesterov.
Changelog[v2]:
- Have sig_ignored() ignore unblocked SIG_DFL signals to container-init
- Use the new SIG_FROM_USER flag and sender's namespace to clear
si_pid for signals crossing namespaces.
- Move setting of SIGNAL_UNKILLABLE from copy_process() to copy_signal()
- Clear si_pid in sys_rt_sigqueueinfo() when signalling processes in
descendant namespaces
Touch tested, but this is just a quick patch and has known issues.
TODO:
- Move this new code under #ifdef CONFIG_PID_NS
- Need additional checks for stopped/traced processes.
- With 'si_pid = 0' sys_rt_sigqueueinfo() can still terminate own init.
- Clear ->si_pid in other namespace-crossing cases including: F_SETSIG,
__do_notify(), etc
Signed-off-by Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
kernel/fork.c | 2 +
kernel/signal.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 78 insertions(+), 10 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index 28be39a..368f25c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -814,6 +814,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
atomic_set(&sig->live, 1);
init_waitqueue_head(&sig->wait_chldexit);
sig->flags = 0;
+ if (clone_flags & CLONE_NEWPID)
+ sig->flags |= SIGNAL_UNKILLABLE;
sig->group_exit_code = 0;
sig->group_exit_task = NULL;
sig->group_stop_count = 0;
diff --git a/kernel/signal.c b/kernel/signal.c
index 4530fc6..cd4b2a5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -53,7 +53,7 @@ static int sig_handler_ignored(void __user *handler, int sig)
(handler == SIG_DFL && sig_kernel_ignore(sig));
}
-static int sig_ignored(struct task_struct *t, int sig)
+static int sig_ignored(struct task_struct *t, int sig, int same_ns)
{
void __user *handler;
@@ -68,6 +68,14 @@ static int sig_ignored(struct task_struct *t, int sig)
handler = sig_handler(t, sig);
if (!sig_handler_ignored(handler, sig))
return 0;
+ /*
+ * ignores SIGSTOP/SIGKILL signals to init from same namespace.
+ *
+ * TODO: Ignore unblocked SIG_DFL signals also here or drop them
+ * in get_signal_to_deliver() ?
+ */
+ if (is_container_init(t) && same_ns && sig_kernel_only(sig))
+ return 1;
/*
* Tracers may want to know about even ignored signals.
@@ -609,11 +617,16 @@ static int check_kill_permission(int sig, struct siginfo *info,
* Returns true if the signal should be actually delivered, otherwise
* it should be dropped.
*/
-static int prepare_signal(int sig, struct task_struct *p)
+static int prepare_signal(int sig, struct task_struct *p, int same_ns)
{
struct signal_struct *signal = p->signal;
struct task_struct *t;
+ /*
+ * TODO: If cinit is stopped by a process from ancestor ns, it
+ * must not be continued by a SIGCONT from a descendant
+ * process. And vice-versa
+ */
if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) {
/*
* The process is in the middle of dying, nothing to do.
@@ -693,7 +706,7 @@ static int prepare_signal(int sig, struct task_struct *p)
}
}
- return !sig_ignored(p, sig);
+ return !sig_ignored(p, sig, same_ns);
}
/*
@@ -798,16 +811,38 @@ static inline int legacy_queue(struct sigpending *signals, int sig)
return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
}
+/*
+ * Return TRUE if this signal originated directly from the user (i.e via
+ * kill(), tkill(), sigqueue()).
+ *
+ * This differs from similarly named, SI_FROMUSER() in that the latter
+ * includes signals such as timer, async-io, or message-queue that
+ * originate _from kernel_.
+ */
+#define SIG_FROM_USER INT_MIN /* MSB */
+static inline int siginfo_from_user(siginfo_t *info)
+{
+ return !is_si_special(info) && (info->si_signo & SIG_FROM_USER);
+}
+
static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
int group)
{
struct sigpending *pending;
struct sigqueue *q;
+ int from_ancestor_ns;
+
+ from_ancestor_ns = 0;
+ if (siginfo_from_user(info)) {
+ /* if t can't see us we are from parent ns */
+ if (task_pid_nr_ns(current, task_active_pid_ns(t)) == 0)
+ from_ancestor_ns = 1;
+ }
trace_sched_signal_send(sig, t);
assert_spin_locked(&t->sighand->siglock);
- if (!prepare_signal(sig, t))
+ if (!prepare_signal(sig, t, !from_ancestor_ns))
return 0;
pending = group ? &t->signal->shared_pending : &t->pending;
@@ -855,6 +890,9 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
break;
default:
copy_siginfo(&q->info, info);
+ if (from_ancestor_ns)
+ q->info.si_pid = 0;
+ q->info.si_signo &= ~SIG_FROM_USER;
break;
}
} else if (!is_si_special(info)) {
@@ -864,6 +902,9 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
* and sent by user using something other than kill().
*/
return -EAGAIN;
+
+ if (from_ancestor_ns)
+ return -ENOMEM;
}
out_set:
@@ -1295,7 +1336,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
goto ret;
ret = 1; /* the signal is ignored */
- if (!prepare_signal(sig, t))
+ if (!prepare_signal(sig, t, 1))
goto out;
ret = 0;
@@ -1722,6 +1763,11 @@ static int ptrace_signal(int signr, siginfo_t *info,
return signr;
}
+static inline int siginfo_from_ancestor_ns(siginfo_t *info)
+{
+ return SI_FROMUSER(info) && (info->si_pid == 0);
+}
+
int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
struct pt_regs *regs, void *cookie)
{
@@ -1813,9 +1859,12 @@ relock:
/*
* Global init gets no signals it doesn't want.
+ * Container-init gets no signals from its descendants, it
+ * doesn't want.
*/
if (unlikely(signal->flags & SIGNAL_UNKILLABLE) &&
- !signal_group_exit(signal))
+ !siginfo_from_ancestor_ns(info) &&
+ !signal_group_exit(signal))
continue;
if (sig_kernel_stop(signr)) {
@@ -2207,7 +2256,7 @@ sys_kill(pid_t pid, int sig)
{
struct siginfo info;
- info.si_signo = sig;
+ info.si_signo = sig | SIG_FROM_USER;
info.si_errno = 0;
info.si_code = SI_USER;
info.si_pid = task_tgid_vnr(current);
@@ -2224,7 +2273,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig)
unsigned long flags;
error = -ESRCH;
- info.si_signo = sig;
+ info.si_signo = sig | SIG_FROM_USER;
info.si_errno = 0;
info.si_code = SI_TKILL;
info.si_pid = task_tgid_vnr(current);
@@ -2288,6 +2337,8 @@ asmlinkage long
sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo)
{
siginfo_t info;
+ struct pid *spid;
+ int error;
if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
return -EFAULT;
@@ -2296,10 +2347,25 @@ sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo)
Nor can they impersonate a kill(), which adds source info. */
if (info.si_code >= 0)
return -EPERM;
- info.si_signo = sig;
+ info.si_signo = sig | SIG_FROM_USER;
/* POSIX.1b doesn't mention process groups. */
- return kill_proc_info(sig, &info, pid);
+ rcu_read_lock();
+ spid = find_vpid(pid);
+ /*
+ * A container-init (cinit) ignores/drops fatal signals unless sender
+ * is in an ancestor namespace. Cinit uses 'si_pid == 0' to check if
+ * sender is an ancestor. See siginfo_from_ancestor_ns().
+ *
+ * If signalling a descendant cinit, set si_pid to 0 so it does not
+ * get ignored/dropped.
+ */
+ if (!pid_nr_ns(spid, task_active_pid_ns(current)))
+ info.si_pid = 0;
+ error = kill_pid_info(sig, &info, spid);
+ rcu_read_unlock();
+
+ return error;
}
int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
--
1.5.2.5
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [RFC][PATCH][v2] Define/use siginfo_from_ancestor_ns()
2008-11-15 21:21 [RFC][PATCH][v2] Define/use siginfo_from_ancestor_ns() Sukadev Bhattiprolu
@ 2008-11-18 17:53 ` Oleg Nesterov
2008-11-18 18:37 ` Sukadev Bhattiprolu
2008-11-19 1:22 ` Sukadev Bhattiprolu
2008-11-19 2:28 ` Sukadev Bhattiprolu
1 sibling, 2 replies; 8+ messages in thread
From: Oleg Nesterov @ 2008-11-18 17:53 UTC (permalink / raw)
To: Sukadev Bhattiprolu; +Cc: ebiederm, daniel, xemul, containers, linux-kernel
On 11/15, Sukadev Bhattiprolu wrote:
>
> Subject: [PATCH] Define/use siginfo_from_ancestor_ns()
Imho, the main problem with this patch is that it tries to do many
different things at once, and each part is suboptimal/incomplete.
This needs several patches. Not only because this is easier to review,
but also because each part needs the good changelog.
Also. I don't think we should try do solve the "whole" problem right
now. For example, if we add/use siginfo_from_ancestor_ns(), we should
also change sys_sigaction(SIG_IGN). As I said, imho we should start
with:
- cinit can't be killed from its namespace
- the parent ns can always SIGKILL cinit
This solves most of problems, and this is very simple.
As for .si_pid mangling, this again needs a separate patch.
Sukadev, I don't have a time today, I'll return tomorrow with more
comments on this...
> +static int sig_ignored(struct task_struct *t, int sig, int same_ns)
> {
> void __user *handler;
>
> @@ -68,6 +68,14 @@ static int sig_ignored(struct task_struct *t, int sig)
> handler = sig_handler(t, sig);
> if (!sig_handler_ignored(handler, sig))
> return 0;
> + /*
> + * ignores SIGSTOP/SIGKILL signals to init from same namespace.
> + *
> + * TODO: Ignore unblocked SIG_DFL signals also here or drop them
> + * in get_signal_to_deliver() ?
> + */
> + if (is_container_init(t) && same_ns && sig_kernel_only(sig))
> + return 1;
No, no. is_container_init() is slow and unneeded, same_ns is bogus,
the usage of sig_kernel_only() is suboptimal. The comment is not
right too...
As I already said, this problem is not namespace-specific, we need
some changes for the global init too.
Actually, I already did the patch, I'll send it soon.
> static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
> int group)
> {
> struct sigpending *pending;
> struct sigqueue *q;
> + int from_ancestor_ns;
> +
> + from_ancestor_ns = 0;
> + if (siginfo_from_user(info)) {
> + /* if t can't see us we are from parent ns */
> + if (task_pid_nr_ns(current, task_active_pid_ns(t)) == 0)
^^^^^^^^^^^^^^^^^^
->nsproxy may be NULL, but we can use task_pid(t)->numbers[-1].ns
> @@ -864,6 +902,9 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
> * and sent by user using something other than kill().
> */
> return -EAGAIN;
> +
> + if (from_ancestor_ns)
> + return -ENOMEM;
This change alone needs a fat comment in changelog. But I don't think
we need it now. Until we change the dequeue path to check "from_ancestor_ns".
> +static inline int siginfo_from_ancestor_ns(siginfo_t *info)
> +{
> + return SI_FROMUSER(info) && (info->si_pid == 0);
> +}
Yes, this is problem... I doubt we can rely on !si_pid here.
More on this later.
> @@ -2296,10 +2347,25 @@ sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo)
> Nor can they impersonate a kill(), which adds source info. */
> if (info.si_code >= 0)
> return -EPERM;
> - info.si_signo = sig;
> + info.si_signo = sig | SIG_FROM_USER;
>
> /* POSIX.1b doesn't mention process groups. */
> - return kill_proc_info(sig, &info, pid);
> + rcu_read_lock();
> + spid = find_vpid(pid);
> + /*
> + * A container-init (cinit) ignores/drops fatal signals unless sender
> + * is in an ancestor namespace. Cinit uses 'si_pid == 0' to check if
> + * sender is an ancestor. See siginfo_from_ancestor_ns().
> + *
> + * If signalling a descendant cinit, set si_pid to 0 so it does not
> + * get ignored/dropped.
> + */
> + if (!pid_nr_ns(spid, task_active_pid_ns(current)))
> + info.si_pid = 0;
> + error = kill_pid_info(sig, &info, spid);
Can't understand. We set SIG_FROM_USER, If signalling a descendant task
(not only cinit), send_signal() will clear .si_pid anyway?
Oleg.
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [RFC][PATCH][v2] Define/use siginfo_from_ancestor_ns()
2008-11-18 17:53 ` Oleg Nesterov
@ 2008-11-18 18:37 ` Sukadev Bhattiprolu
2008-11-19 1:22 ` Sukadev Bhattiprolu
1 sibling, 0 replies; 8+ messages in thread
From: Sukadev Bhattiprolu @ 2008-11-18 18:37 UTC (permalink / raw)
To: Oleg Nesterov; +Cc: ebiederm, daniel, xemul, containers, linux-kernel
Oleg Nesterov [oleg@redhat.com] wrote:
| On 11/15, Sukadev Bhattiprolu wrote:
| >
| > Subject: [PATCH] Define/use siginfo_from_ancestor_ns()
|
| Imho, the main problem with this patch is that it tries to do many
| different things at once, and each part is suboptimal/incomplete.
|
| This needs several patches. Not only because this is easier to review,
| but also because each part needs the good changelog.
I agree I sent this as an RFC to show the overall changes.
I do plan to include the following two patches, which should address
the issue of ->nsproxy being NULL.
https://lists.linux-foundation.org/pipermail/containers/2008-November/014187.html
https://lists.linux-foundation.org/pipermail/containers/2008-November/014188.html
|
| Also. I don't think we should try do solve the "whole" problem right
| now. For example, if we add/use siginfo_from_ancestor_ns(), we should
| also change sys_sigaction(SIG_IGN). As I said, imho we should start
| with:
|
| - cinit can't be killed from its namespace
|
| - the parent ns can always SIGKILL cinit
|
| This solves most of problems, and this is very simple.
Yes, I agree and am trying to solve only those two :-) I moved out
changes to __do_notify() and others to separate patches, but maybe
we can simplify this patch further.
|
| As for .si_pid mangling, this again needs a separate patch.
I thought we were going to use SIG_FROM_USER to decide if the siginfo
does in fact have a ->si_pid (so we don't need the switch statement
we had in an earlier patch).
|
| Sukadev, I don't have a time today, I'll return tomorrow with more
| comments on this...
No problem. Thanks for the comments so far.
|
| > +static int sig_ignored(struct task_struct *t, int sig, int same_ns)
| > {
| > void __user *handler;
| >
| > @@ -68,6 +68,14 @@ static int sig_ignored(struct task_struct *t, int sig)
| > handler = sig_handler(t, sig);
| > if (!sig_handler_ignored(handler, sig))
| > return 0;
| > + /*
| > + * ignores SIGSTOP/SIGKILL signals to init from same namespace.
| > + *
| > + * TODO: Ignore unblocked SIG_DFL signals also here or drop them
| > + * in get_signal_to_deliver() ?
| > + */
| > + if (is_container_init(t) && same_ns && sig_kernel_only(sig))
| > + return 1;
|
| No, no. is_container_init() is slow and unneeded, same_ns is bogus,
| the usage of sig_kernel_only() is suboptimal. The comment is not
| right too...
Maybe in a separate patch, but same_ns is needed to ensure container-init
does not ignore signals from ancestor namespace - no ?
I was undecided between the above sig_kernel_only() check and
'handler == SIG_DFL' (hence the TODO).
|
| As I already said, this problem is not namespace-specific, we need
| some changes for the global init too.
Right I used is_container_init() since it includes global init().
Again, maybe it could have been separate patches for just global_init
first.
But I see from your patch that we could use SIGNAL_UNKILLABLE instead
of is_container_init(). That is more efficient.
|
| Actually, I already did the patch, I'll send it soon.
Ok. I will review that.
|
| > static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
| > int group)
| > {
| > struct sigpending *pending;
| > struct sigqueue *q;
| > + int from_ancestor_ns;
| > +
| > + from_ancestor_ns = 0;
| > + if (siginfo_from_user(info)) {
| > + /* if t can't see us we are from parent ns */
| > + if (task_pid_nr_ns(current, task_active_pid_ns(t)) == 0)
| ^^^^^^^^^^^^^^^^^^
|
| ->nsproxy may be NULL, but we can use task_pid(t)->numbers[-1].ns
Eric's patch of generalizing task_active_pid_ns() should fix this. It
was reviewed several times, so I did not send it, but yes, I should
have mentioned it.
|
| > @@ -864,6 +902,9 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
| > * and sent by user using something other than kill().
| > */
| > return -EAGAIN;
| > +
| > + if (from_ancestor_ns)
| > + return -ENOMEM;
|
| This change alone needs a fat comment in changelog. But I don't think
| we need it now. Until we change the dequeue path to check "from_ancestor_ns".
Ok.
|
| > +static inline int siginfo_from_ancestor_ns(siginfo_t *info)
| > +{
| > + return SI_FROMUSER(info) && (info->si_pid == 0);
| > +}
|
| Yes, this is problem... I doubt we can rely on !si_pid here.
| More on this later.
|
| > @@ -2296,10 +2347,25 @@ sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo)
| > Nor can they impersonate a kill(), which adds source info. */
| > if (info.si_code >= 0)
| > return -EPERM;
| > - info.si_signo = sig;
| > + info.si_signo = sig | SIG_FROM_USER;
| >
| > /* POSIX.1b doesn't mention process groups. */
| > - return kill_proc_info(sig, &info, pid);
| > + rcu_read_lock();
| > + spid = find_vpid(pid);
| > + /*
| > + * A container-init (cinit) ignores/drops fatal signals unless sender
| > + * is in an ancestor namespace. Cinit uses 'si_pid == 0' to check if
| > + * sender is an ancestor. See siginfo_from_ancestor_ns().
| > + *
| > + * If signalling a descendant cinit, set si_pid to 0 so it does not
| > + * get ignored/dropped.
| > + */
| > + if (!pid_nr_ns(spid, task_active_pid_ns(current)))
| > + info.si_pid = 0;
| > + error = kill_pid_info(sig, &info, spid);
|
| Can't understand. We set SIG_FROM_USER, If signalling a descendant task
| (not only cinit), send_signal() will clear .si_pid anyway?
Good point. We had gone back and forth on this and I thought one of the
emails mentioned this check. Maybe I misread that.
But yes, its not needed since send_signal() does it.
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [RFC][PATCH][v2] Define/use siginfo_from_ancestor_ns()
2008-11-18 17:53 ` Oleg Nesterov
2008-11-18 18:37 ` Sukadev Bhattiprolu
@ 2008-11-19 1:22 ` Sukadev Bhattiprolu
2008-11-23 23:10 ` Oleg Nesterov
1 sibling, 1 reply; 8+ messages in thread
From: Sukadev Bhattiprolu @ 2008-11-19 1:22 UTC (permalink / raw)
To: Oleg Nesterov; +Cc: ebiederm, daniel, xemul, containers, linux-kernel
|
| > +static inline int siginfo_from_ancestor_ns(siginfo_t *info)
| > +{
| > + return SI_FROMUSER(info) && (info->si_pid == 0);
| > +}
|
| Yes, this is problem... I doubt we can rely on !si_pid here.
| More on this later.
BTW, rather than clearing SIG_FROM_USER in send_signal(), can we
keep it till we dequeue the signal ? Yes, collect_signal() would
need to consider this flag. But when we dequeue, we can note that
it was from user and use that in the siginfo_from_ancestor() ?
Sukadev
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [RFC][PATCH][v2] Define/use siginfo_from_ancestor_ns()
2008-11-19 1:22 ` Sukadev Bhattiprolu
@ 2008-11-23 23:10 ` Oleg Nesterov
2008-11-26 3:16 ` Sukadev Bhattiprolu
0 siblings, 1 reply; 8+ messages in thread
From: Oleg Nesterov @ 2008-11-23 23:10 UTC (permalink / raw)
To: Sukadev Bhattiprolu; +Cc: ebiederm, daniel, xemul, containers, linux-kernel
On 11/18, Sukadev Bhattiprolu wrote:
>
> |
> | > +static inline int siginfo_from_ancestor_ns(siginfo_t *info)
> | > +{
> | > + return SI_FROMUSER(info) && (info->si_pid == 0);
> | > +}
> |
> | Yes, this is problem... I doubt we can rely on !si_pid here.
> | More on this later.
>
> BTW, rather than clearing SIG_FROM_USER in send_signal(), can we
> keep it till we dequeue the signal ? Yes, collect_signal() would
> need to consider this flag. But when we dequeue, we can note that
> it was from user and use that in the siginfo_from_ancestor() ?
Yes! I thought about this too. As a last resort this should work
afaics. But we should be carefull, we have to fix rm_from_queue_full()
for example as well.
Another note. We can split SIG_FROM_USER (if we are going to use this
hack) into 2 flags: SIG_KILL_SUB_NS and SIG_MANGLE_SI_PID. We can
even put "struct pid *pid" into si_signo along with these bits if
we find some strange user which sends the signal on behalve of
the different task.
But personally, I'd prefer to make 3 simple patches for the start.
Then we can continue with these complications if needed. Sukadev,
please feel free to disagree with me. I am just trying to make
the first step reviewable and simple. No changes on dequeue path,
no -ENOMEM in send_signal().
1. Introduce SIG_FROM_USER (or whatever). Basically, the patch I
sent. Except I'd relly like to see this code under CONFIG_
just for documentation, but please feel free to ignore.
So, with this patch send_signal() has "bool from_ancestor", which
is not used so far. And we the fixup code after copy_siginfo()
which clears the flags, or better yet just sets .si_signo = sig.
2. Now we change send_signal()
+ if (from_ancestor && sig == SIGKILL)
+ t->signal->flags &= ~SIGNAL_UNKILLABLE;
if (!prepare_signal(...))
return;
and change copy_signal() to set SIGNAL_UNKILLABLE for
cinit.
From now cinit is protected from unwanted signals from
its namespace, and the parent can always kill it with
SIGKILL.
Actually, I think this is enough to solve most problems,
the further changes can be discussed later. OK, the only
"real" problem is SIGSTOP, afaics. This looks solveable.
3. mangle .si_pid in send_signal(). Again, it is not clear
what should we do with sys_rt_sigqueueinfo(), but there
is no "obviously right" solution.
And I am really sorry for delay.
Oleg.
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [RFC][PATCH][v2] Define/use siginfo_from_ancestor_ns()
2008-11-23 23:10 ` Oleg Nesterov
@ 2008-11-26 3:16 ` Sukadev Bhattiprolu
2008-11-26 17:44 ` Sukadev Bhattiprolu
0 siblings, 1 reply; 8+ messages in thread
From: Sukadev Bhattiprolu @ 2008-11-26 3:16 UTC (permalink / raw)
To: Oleg Nesterov; +Cc: ebiederm, daniel, xemul, containers, linux-kernel
Oleg Nesterov [oleg@redhat.com] wrote:
| On 11/18, Sukadev Bhattiprolu wrote:
| >
| > |
| > | > +static inline int siginfo_from_ancestor_ns(siginfo_t *info)
| > | > +{
| > | > + return SI_FROMUSER(info) && (info->si_pid == 0);
| > | > +}
| > |
| > | Yes, this is problem... I doubt we can rely on !si_pid here.
| > | More on this later.
| >
| > BTW, rather than clearing SIG_FROM_USER in send_signal(), can we
| > keep it till we dequeue the signal ? Yes, collect_signal() would
| > need to consider this flag. But when we dequeue, we can note that
| > it was from user and use that in the siginfo_from_ancestor() ?
|
| Yes! I thought about this too. As a last resort this should work
| afaics. But we should be carefull, we have to fix rm_from_queue_full()
| for example as well.
|
| Another note. We can split SIG_FROM_USER (if we are going to use this
| hack) into 2 flags: SIG_KILL_SUB_NS and SIG_MANGLE_SI_PID. We can
| even put "struct pid *pid" into si_signo along with these bits if
| we find some strange user which sends the signal on behalve of
| the different task.
|
| But personally, I'd prefer to make 3 simple patches for the start.
| Then we can continue with these complications if needed. Sukadev,
| please feel free to disagree with me. I am just trying to make
| the first step reviewable and simple. No changes on dequeue path,
| no -ENOMEM in send_signal().
Yes, I think this is simple and addresses most common problems
and specially bypasses the blocked signals issues. And since
we don't have to pass any state to get_signal_deliver(), we
don't have worry about the -ENOMEM.
|
| 1. Introduce SIG_FROM_USER (or whatever). Basically, the patch I
| sent. Except I'd relly like to see this code under CONFIG_
| just for documentation, but please feel free to ignore.
|
| So, with this patch send_signal() has "bool from_ancestor", which
| is not used so far. And we the fixup code after copy_siginfo()
| which clears the flags, or better yet just sets .si_signo = sig.
I am just clearing the SIG_FROM_USER flag for now - just in case
we need to overload it for something else later ?
|
| 2. Now we change send_signal()
|
| + if (from_ancestor && sig == SIGKILL)
| + t->signal->flags &= ~SIGNAL_UNKILLABLE;
| if (!prepare_signal(...))
| return;
|
| and change copy_signal() to set SIGNAL_UNKILLABLE for
| cinit.
|
| From now cinit is protected from unwanted signals from
| its namespace, and the parent can always kill it with
| SIGKILL.
|
| Actually, I think this is enough to solve most problems,
| the further changes can be discussed later. OK, the only
| "real" problem is SIGSTOP, afaics. This looks solveable.
Yes, SIGSTOP even from parent is ignored due to the SIGNAL_UNKILLABLE
check in get_signal_to_deliver.
|
| 3. mangle .si_pid in send_signal(). Again, it is not clear
| what should we do with sys_rt_sigqueueinfo(), but there
| is no "obviously right" solution.
Agree.
|
| And I am really sorry for delay.
No problem. This issue has been around for over a year :-)
I will send the patches out in a bit.
Thanks,
Sukadev
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [RFC][PATCH][v2] Define/use siginfo_from_ancestor_ns()
2008-11-26 3:16 ` Sukadev Bhattiprolu
@ 2008-11-26 17:44 ` Sukadev Bhattiprolu
0 siblings, 0 replies; 8+ messages in thread
From: Sukadev Bhattiprolu @ 2008-11-26 17:44 UTC (permalink / raw)
To: Oleg Nesterov; +Cc: ebiederm, daniel, xemul, containers, linux-kernel
| | Actually, I think this is enough to solve most problems,
| | the further changes can be discussed later. OK, the only
| | "real" problem is SIGSTOP, afaics. This looks solveable.
|
| Yes, SIGSTOP even from parent is ignored due to the SIGNAL_UNKILLABLE
| check in get_signal_to_deliver.
Actually with sig_task_ignored() patch, http://lkml.org/lkml/2008/11/18/251
SIGSTOP gets dropped early. To eventually allow SIGSTOP from ancestor ns,
we need to pass 'from_ancestor_ns' into sig_ignored() ?
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC][PATCH][v2] Define/use siginfo_from_ancestor_ns()
2008-11-15 21:21 [RFC][PATCH][v2] Define/use siginfo_from_ancestor_ns() Sukadev Bhattiprolu
2008-11-18 17:53 ` Oleg Nesterov
@ 2008-11-19 2:28 ` Sukadev Bhattiprolu
1 sibling, 0 replies; 8+ messages in thread
From: Sukadev Bhattiprolu @ 2008-11-19 2:28 UTC (permalink / raw)
To: oleg, ebiederm; +Cc: daniel, xemul, containers, linux-kernel
| @@ -864,6 +902,9 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
| * and sent by user using something other than kill().
| */
| return -EAGAIN;
| +
| + if (from_ancestor_ns)
| + return -ENOMEM;
| }
|
| out_set:
We had wanted to start with a check like above and improve later.
But if sender is from ancestor namespace, we must post the signal even if
we don't have the siginfo right ? Otherwise, a SIGKILL from ancestor may
get the -ENOMEM ?
Conversely, if a signal from same namespace is being posted to cinit, and
we don't have siginfo, ->si_pid would be 0 and get_signal_to_deliver()
would mistake that the sender is an ancestor ns and process the signal
(which should have been ignored).
So, maybe we should start with the reverse check ?
if (same_ns && (t->signal->flags & SIGNAL_UNKILLABLE))
return -ENOMEM;
Sukadev
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2008-11-26 17:45 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-15 21:21 [RFC][PATCH][v2] Define/use siginfo_from_ancestor_ns() Sukadev Bhattiprolu
2008-11-18 17:53 ` Oleg Nesterov
2008-11-18 18:37 ` Sukadev Bhattiprolu
2008-11-19 1:22 ` Sukadev Bhattiprolu
2008-11-23 23:10 ` Oleg Nesterov
2008-11-26 3:16 ` Sukadev Bhattiprolu
2008-11-26 17:44 ` Sukadev Bhattiprolu
2008-11-19 2:28 ` Sukadev Bhattiprolu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox