* [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace @ 2008-07-17 14:55 ` Daniel Hokka Zakrisson 0 siblings, 0 replies; 28+ messages in thread From: Daniel Hokka Zakrisson @ 2008-07-17 14:55 UTC (permalink / raw) To: linux-kernel-u79uwXL29TY76Z2rM5mHXA Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, xemul-GEFAQzZX7r8dnm+yROfE0A, oleg-6lXkIZvqkOAvJsYlp49lxw, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, ebiederm-aS9lmoZGLiVWk0Htik3J/w While moving Linux-VServer to using pid namespaces, I noticed that kill(-1) from inside a pid namespace is currently signalling every process in the entire system, including processes that are otherwise unreachable from the current process. This patch fixes it by making sure that only processes which are in the same pid namespace as current get signalled. Signed-off-by: Daniel Hokka Zakrisson <daniel-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org> diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index caff528..4cf41bd 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -40,6 +40,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); extern void free_pid_ns(struct kref *kref); extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); +extern int task_in_pid_ns(struct task_struct *tsk, + struct pid_namespace *pid_ns); static inline void put_pid_ns(struct pid_namespace *ns) { @@ -72,6 +74,12 @@ static inline void zap_pid_ns_processes(struct pid_namespace *ns) { BUG(); } + +static inline int task_in_pid_ns(struct task_struct *tsk, + struct pid_namespace *ns) +{ + return 1; +} #endif /* CONFIG_PID_NS */ static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 98702b4..3e71011 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -188,6 +188,26 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) return; } +/* + * Checks whether tsk has a pid in the pid namespace ns. + * Must be called with tasklist_lock read-locked or under rcu_read_lock() + */ +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + struct pid *pid = task_pid(tsk); + + if (!pid) + return 0; + + if (pid->level < ns->level) + return 0; + + if (pid->numbers[ns->level].ns != ns) + return 0; + + return 1; +} + static __init int pid_namespaces_init(void) { pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); diff --git a/kernel/signal.c b/kernel/signal.c index 6c0958e..93713a5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct siginfo *info, int pid) struct task_struct * p; for_each_process(p) { - if (p->pid > 1 && !same_thread_group(p, current)) { + if (p->pid > 1 && !same_thread_group(p, current) && + task_in_pid_ns(p, current->nsproxy->pid_ns)) { int err = group_send_sig_info(sig, info, p); ++count; if (err != -EPERM) -- 1.5.5.1 ^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace @ 2008-07-17 14:55 ` Daniel Hokka Zakrisson 0 siblings, 0 replies; 28+ messages in thread From: Daniel Hokka Zakrisson @ 2008-07-17 14:55 UTC (permalink / raw) To: linux-kernel; +Cc: containers, oleg, ebiederm, xemul, akpm While moving Linux-VServer to using pid namespaces, I noticed that kill(-1) from inside a pid namespace is currently signalling every process in the entire system, including processes that are otherwise unreachable from the current process. This patch fixes it by making sure that only processes which are in the same pid namespace as current get signalled. Signed-off-by: Daniel Hokka Zakrisson <daniel@hozac.com> diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index caff528..4cf41bd 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -40,6 +40,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); extern void free_pid_ns(struct kref *kref); extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); +extern int task_in_pid_ns(struct task_struct *tsk, + struct pid_namespace *pid_ns); static inline void put_pid_ns(struct pid_namespace *ns) { @@ -72,6 +74,12 @@ static inline void zap_pid_ns_processes(struct pid_namespace *ns) { BUG(); } + +static inline int task_in_pid_ns(struct task_struct *tsk, + struct pid_namespace *ns) +{ + return 1; +} #endif /* CONFIG_PID_NS */ static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 98702b4..3e71011 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -188,6 +188,26 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) return; } +/* + * Checks whether tsk has a pid in the pid namespace ns. + * Must be called with tasklist_lock read-locked or under rcu_read_lock() + */ +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + struct pid *pid = task_pid(tsk); + + if (!pid) + return 0; + + if (pid->level < ns->level) + return 0; + + if (pid->numbers[ns->level].ns != ns) + return 0; + + return 1; +} + static __init int pid_namespaces_init(void) { pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); diff --git a/kernel/signal.c b/kernel/signal.c index 6c0958e..93713a5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct siginfo *info, int pid) struct task_struct * p; for_each_process(p) { - if (p->pid > 1 && !same_thread_group(p, current)) { + if (p->pid > 1 && !same_thread_group(p, current) && + task_in_pid_ns(p, current->nsproxy->pid_ns)) { int err = group_send_sig_info(sig, info, p); ++count; if (err != -EPERM) -- 1.5.5.1 ^ permalink raw reply related [flat|nested] 28+ messages in thread
[parent not found: <487F5D6B.1090007-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org>]
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace [not found] ` <487F5D6B.1090007-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org> @ 2008-07-17 15:01 ` Pavel Emelyanov 2008-07-17 18:13 ` sukadev-r/Jw6+rmf7HQT0dZR+AlfA 2008-07-23 14:34 ` Oleg Nesterov 2 siblings, 0 replies; 28+ messages in thread From: Pavel Emelyanov @ 2008-07-17 15:01 UTC (permalink / raw) To: Daniel Hokka Zakrisson Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, ebiederm-aS9lmoZGLiVWk0Htik3J/w, linux-kernel-u79uwXL29TY76Z2rM5mHXA, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, oleg-6lXkIZvqkOAvJsYlp49lxw Daniel Hokka Zakrisson wrote: > While moving Linux-VServer to using pid namespaces, I noticed that > kill(-1) from inside a pid namespace is currently signalling every > process in the entire system, including processes that are otherwise > unreachable from the current process. This is not a "news" actually, buy anyway - thanks :) > This patch fixes it by making sure that only processes which are in > the same pid namespace as current get signalled. This is to be done, indeed, but I do not like the proposed implementation, since you have to walk all the tasks in the system (under tasklist_lock, by the way) to search for a couple of interesting ones. Better look at how zap_pid_ns_processes works (by the way - I saw some patch doing so some time ago). > Signed-off-by: Daniel Hokka Zakrisson <daniel-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org> > > diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h > index caff528..4cf41bd 100644 > --- a/include/linux/pid_namespace.h > +++ b/include/linux/pid_namespace.h > @@ -40,6 +40,8 @@ static inline struct pid_namespace *get_pid_ns(struct > pid_namespace *ns) > extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct > pid_namespace *ns); > extern void free_pid_ns(struct kref *kref); > extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); > +extern int task_in_pid_ns(struct task_struct *tsk, > + struct pid_namespace *pid_ns); > > static inline void put_pid_ns(struct pid_namespace *ns) > { > @@ -72,6 +74,12 @@ static inline void zap_pid_ns_processes(struct > pid_namespace *ns) > { > BUG(); > } > + > +static inline int task_in_pid_ns(struct task_struct *tsk, > + struct pid_namespace *ns) > +{ > + return 1; > +} > #endif /* CONFIG_PID_NS */ > > static inline struct pid_namespace *task_active_pid_ns(struct > task_struct *tsk) > diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c > index 98702b4..3e71011 100644 > --- a/kernel/pid_namespace.c > +++ b/kernel/pid_namespace.c > @@ -188,6 +188,26 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) > return; > } > > +/* > + * Checks whether tsk has a pid in the pid namespace ns. > + * Must be called with tasklist_lock read-locked or under rcu_read_lock() > + */ > +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) > +{ > + struct pid *pid = task_pid(tsk); > + > + if (!pid) > + return 0; > + > + if (pid->level < ns->level) > + return 0; > + > + if (pid->numbers[ns->level].ns != ns) > + return 0; > + > + return 1; > +} > + > static __init int pid_namespaces_init(void) > { > pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); > diff --git a/kernel/signal.c b/kernel/signal.c > index 6c0958e..93713a5 100644 > --- a/kernel/signal.c > +++ b/kernel/signal.c > @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct > siginfo *info, int pid) > struct task_struct * p; > > for_each_process(p) { > - if (p->pid > 1 && !same_thread_group(p, current)) { > + if (p->pid > 1 && !same_thread_group(p, current) && > + task_in_pid_ns(p, current->nsproxy->pid_ns)) { > int err = group_send_sig_info(sig, info, p); > ++count; > if (err != -EPERM) ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace [not found] ` <487F5D6B.1090007-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org> 2008-07-17 15:01 ` Pavel Emelyanov @ 2008-07-17 18:13 ` sukadev-r/Jw6+rmf7HQT0dZR+AlfA 2008-07-23 14:34 ` Oleg Nesterov 2 siblings, 0 replies; 28+ messages in thread From: sukadev-r/Jw6+rmf7HQT0dZR+AlfA @ 2008-07-17 18:13 UTC (permalink / raw) To: Daniel Hokka Zakrisson Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, linux-kernel-u79uwXL29TY76Z2rM5mHXA, ebiederm-aS9lmoZGLiVWk0Htik3J/w, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, oleg-6lXkIZvqkOAvJsYlp49lxw, xemul-GEFAQzZX7r8dnm+yROfE0A Daniel Hokka Zakrisson [daniel-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org] wrote: | While moving Linux-VServer to using pid namespaces, I noticed that | kill(-1) from inside a pid namespace is currently signalling every | process in the entire system, including processes that are otherwise | unreachable from the current process. | | This patch fixes it by making sure that only processes which are in | the same pid namespace as current get signalled. | | Signed-off-by: Daniel Hokka Zakrisson <daniel-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org> | | diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h | index caff528..4cf41bd 100644 | --- a/include/linux/pid_namespace.h | +++ b/include/linux/pid_namespace.h | @@ -40,6 +40,8 @@ static inline struct pid_namespace *get_pid_ns(struct | pid_namespace *ns) | extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct | pid_namespace *ns); | extern void free_pid_ns(struct kref *kref); | extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); | +extern int task_in_pid_ns(struct task_struct *tsk, | + struct pid_namespace *pid_ns); | | static inline void put_pid_ns(struct pid_namespace *ns) | { | @@ -72,6 +74,12 @@ static inline void zap_pid_ns_processes(struct | pid_namespace *ns) | { | BUG(); | } | + | +static inline int task_in_pid_ns(struct task_struct *tsk, | + struct pid_namespace *ns) | +{ | + return 1; | +} | #endif /* CONFIG_PID_NS */ | | static inline struct pid_namespace *task_active_pid_ns(struct | task_struct *tsk) | diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c | index 98702b4..3e71011 100644 | --- a/kernel/pid_namespace.c | +++ b/kernel/pid_namespace.c | @@ -188,6 +188,26 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | return; | } | | +/* | + * Checks whether tsk has a pid in the pid namespace ns. | + * Must be called with tasklist_lock read-locked or under rcu_read_lock() | + */ | +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) | +{ | + struct pid *pid = task_pid(tsk); | + | + if (!pid) | + return 0; | + | + if (pid->level < ns->level) | + return 0; ns can be NULL if tsk is exiting. Like Pavel said, we had couple of attempts to fix the larger problem of signal semantics in containers but did not have a consensus on handling blocked/unhandled signals to container-init. It would still be good to fix this "kill -1" problem. Eric had a slightly optimized interface, 'pid_in_pid_ns()' in following patchset. Maybe we could use that ? https://lists.linux-foundation.org/pipermail/containers/2007-December/009174.html | + | + if (pid->numbers[ns->level].ns != ns) | + return 0; | + | + return 1; | +} | + | static __init int pid_namespaces_init(void) | { | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | diff --git a/kernel/signal.c b/kernel/signal.c | index 6c0958e..93713a5 100644 | --- a/kernel/signal.c | +++ b/kernel/signal.c | @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct | siginfo *info, int pid) | struct task_struct * p; | | for_each_process(p) { | - if (p->pid > 1 && !same_thread_group(p, current)) { | + if (p->pid > 1 && !same_thread_group(p, current) && | + task_in_pid_ns(p, current->nsproxy->pid_ns)) { | int err = group_send_sig_info(sig, info, p); | ++count; | if (err != -EPERM) | -- | 1.5.5.1 | _______________________________________________ | Containers mailing list | Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org | https://lists.linux-foundation.org/mailman/listinfo/containers ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace [not found] ` <487F5D6B.1090007-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org> 2008-07-17 15:01 ` Pavel Emelyanov 2008-07-17 18:13 ` sukadev-r/Jw6+rmf7HQT0dZR+AlfA @ 2008-07-23 14:34 ` Oleg Nesterov 2 siblings, 0 replies; 28+ messages in thread From: Oleg Nesterov @ 2008-07-23 14:34 UTC (permalink / raw) To: Daniel Hokka Zakrisson Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, xemul-GEFAQzZX7r8dnm+yROfE0A, linux-kernel-u79uwXL29TY76Z2rM5mHXA, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, ebiederm-aS9lmoZGLiVWk0Htik3J/w On 07/17, Daniel Hokka Zakrisson wrote: > > +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) > +{ > + struct pid *pid = task_pid(tsk); > + > + if (!pid) > + return 0; > + > + if (pid->level < ns->level) > + return 0; > + > + if (pid->numbers[ns->level].ns != ns) > + return 0; > + > + return 1; > +} > + > static __init int pid_namespaces_init(void) > { > pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); > diff --git a/kernel/signal.c b/kernel/signal.c > index 6c0958e..93713a5 100644 > --- a/kernel/signal.c > +++ b/kernel/signal.c > @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct > siginfo *info, int pid) > struct task_struct * p; > > for_each_process(p) { > - if (p->pid > 1 && !same_thread_group(p, current)) { > + if (p->pid > 1 && !same_thread_group(p, current) && > + task_in_pid_ns(p, current->nsproxy->pid_ns)) { > int err = group_send_sig_info(sig, info, p); > ++count; > if (err != -EPERM) Do we really need all these complications? Afaics, we can make a simpler patch, --- kernel/signal.c +++ kernel/signal.c @@ -1136,7 +1136,7 @@ static int kill_something_info(int sig, struct task_struct * p; for_each_process(p) { - if (p->pid > 1 && !same_thread_group(p, current)) { + if (task_pid_vnr(p) > 1 && !same_thread_group(p, current)) { int err = group_send_sig_info(sig, info, p); ++count; if (err != -EPERM) task_pid_vnr(p) returns 0 if "p" is not visible from the current's namespace. "> 1" ensures we don't kill the child reaper as well. No? Oleg. ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 14:55 ` Daniel Hokka Zakrisson (?) (?) @ 2008-07-17 15:01 ` Pavel Emelyanov [not found] ` <487F5EDB.1000008-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> 2008-07-17 15:24 ` Daniel Hokka Zakrisson -1 siblings, 2 replies; 28+ messages in thread From: Pavel Emelyanov @ 2008-07-17 15:01 UTC (permalink / raw) To: Daniel Hokka Zakrisson; +Cc: linux-kernel, containers, oleg, ebiederm, akpm Daniel Hokka Zakrisson wrote: > While moving Linux-VServer to using pid namespaces, I noticed that > kill(-1) from inside a pid namespace is currently signalling every > process in the entire system, including processes that are otherwise > unreachable from the current process. This is not a "news" actually, buy anyway - thanks :) > This patch fixes it by making sure that only processes which are in > the same pid namespace as current get signalled. This is to be done, indeed, but I do not like the proposed implementation, since you have to walk all the tasks in the system (under tasklist_lock, by the way) to search for a couple of interesting ones. Better look at how zap_pid_ns_processes works (by the way - I saw some patch doing so some time ago). > Signed-off-by: Daniel Hokka Zakrisson <daniel@hozac.com> > > diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h > index caff528..4cf41bd 100644 > --- a/include/linux/pid_namespace.h > +++ b/include/linux/pid_namespace.h > @@ -40,6 +40,8 @@ static inline struct pid_namespace *get_pid_ns(struct > pid_namespace *ns) > extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct > pid_namespace *ns); > extern void free_pid_ns(struct kref *kref); > extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); > +extern int task_in_pid_ns(struct task_struct *tsk, > + struct pid_namespace *pid_ns); > > static inline void put_pid_ns(struct pid_namespace *ns) > { > @@ -72,6 +74,12 @@ static inline void zap_pid_ns_processes(struct > pid_namespace *ns) > { > BUG(); > } > + > +static inline int task_in_pid_ns(struct task_struct *tsk, > + struct pid_namespace *ns) > +{ > + return 1; > +} > #endif /* CONFIG_PID_NS */ > > static inline struct pid_namespace *task_active_pid_ns(struct > task_struct *tsk) > diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c > index 98702b4..3e71011 100644 > --- a/kernel/pid_namespace.c > +++ b/kernel/pid_namespace.c > @@ -188,6 +188,26 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) > return; > } > > +/* > + * Checks whether tsk has a pid in the pid namespace ns. > + * Must be called with tasklist_lock read-locked or under rcu_read_lock() > + */ > +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) > +{ > + struct pid *pid = task_pid(tsk); > + > + if (!pid) > + return 0; > + > + if (pid->level < ns->level) > + return 0; > + > + if (pid->numbers[ns->level].ns != ns) > + return 0; > + > + return 1; > +} > + > static __init int pid_namespaces_init(void) > { > pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); > diff --git a/kernel/signal.c b/kernel/signal.c > index 6c0958e..93713a5 100644 > --- a/kernel/signal.c > +++ b/kernel/signal.c > @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct > siginfo *info, int pid) > struct task_struct * p; > > for_each_process(p) { > - if (p->pid > 1 && !same_thread_group(p, current)) { > + if (p->pid > 1 && !same_thread_group(p, current) && > + task_in_pid_ns(p, current->nsproxy->pid_ns)) { > int err = group_send_sig_info(sig, info, p); > ++count; > if (err != -EPERM) ^ permalink raw reply [flat|nested] 28+ messages in thread
[parent not found: <487F5EDB.1000008-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>]
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace [not found] ` <487F5EDB.1000008-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> @ 2008-07-17 15:24 ` Daniel Hokka Zakrisson 0 siblings, 0 replies; 28+ messages in thread From: Daniel Hokka Zakrisson @ 2008-07-17 15:24 UTC (permalink / raw) To: Pavel Emelyanov Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, ebiederm-aS9lmoZGLiVWk0Htik3J/w, linux-kernel-u79uwXL29TY76Z2rM5mHXA, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, oleg-6lXkIZvqkOAvJsYlp49lxw Pavel Emelyanov wrote: > Daniel Hokka Zakrisson wrote: >> While moving Linux-VServer to using pid namespaces, I noticed that >> kill(-1) from inside a pid namespace is currently signalling every >> process in the entire system, including processes that are otherwise >> unreachable from the current process. > > This is not a "news" actually, buy anyway - thanks :) And yet nobody's fixed it... Kind of a critical thing, if you actually want to use them, since most distribution's rc-scripts do a kill(-1, SIGTERM), followed by kill(-1, SIGKILL) when halting (which, needless to say, would be very bad). >> This patch fixes it by making sure that only processes which are in >> the same pid namespace as current get signalled. > > This is to be done, indeed, but I do not like the proposed implementation, > since you have to walk all the tasks in the system (under tasklist_lock, > by the way) to search for a couple of interesting ones. Better look at how > zap_pid_ns_processes works (by the way - I saw some patch doing so some > time ago). The way zap_pid_ns_processes does it is worse, since it signals every thread in the namespace rather than every thread group. So either we walk the global tasklist, or we create a per-namespace one. Is that what we want? >> Signed-off-by: Daniel Hokka Zakrisson <daniel-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org> >> >> diff --git a/include/linux/pid_namespace.h >> b/include/linux/pid_namespace.h >> index caff528..4cf41bd 100644 >> --- a/include/linux/pid_namespace.h >> +++ b/include/linux/pid_namespace.h >> @@ -40,6 +40,8 @@ static inline struct pid_namespace *get_pid_ns(struct >> pid_namespace *ns) >> extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct >> pid_namespace *ns); >> extern void free_pid_ns(struct kref *kref); >> extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); >> +extern int task_in_pid_ns(struct task_struct *tsk, >> + struct pid_namespace *pid_ns); >> >> static inline void put_pid_ns(struct pid_namespace *ns) >> { >> @@ -72,6 +74,12 @@ static inline void zap_pid_ns_processes(struct >> pid_namespace *ns) >> { >> BUG(); >> } >> + >> +static inline int task_in_pid_ns(struct task_struct *tsk, >> + struct pid_namespace *ns) >> +{ >> + return 1; >> +} >> #endif /* CONFIG_PID_NS */ >> >> static inline struct pid_namespace *task_active_pid_ns(struct >> task_struct *tsk) >> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c >> index 98702b4..3e71011 100644 >> --- a/kernel/pid_namespace.c >> +++ b/kernel/pid_namespace.c >> @@ -188,6 +188,26 @@ void zap_pid_ns_processes(struct pid_namespace >> *pid_ns) >> return; >> } >> >> +/* >> + * Checks whether tsk has a pid in the pid namespace ns. >> + * Must be called with tasklist_lock read-locked or under >> rcu_read_lock() >> + */ >> +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) >> +{ >> + struct pid *pid = task_pid(tsk); >> + >> + if (!pid) >> + return 0; >> + >> + if (pid->level < ns->level) >> + return 0; >> + >> + if (pid->numbers[ns->level].ns != ns) >> + return 0; >> + >> + return 1; >> +} >> + >> static __init int pid_namespaces_init(void) >> { >> pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); >> diff --git a/kernel/signal.c b/kernel/signal.c >> index 6c0958e..93713a5 100644 >> --- a/kernel/signal.c >> +++ b/kernel/signal.c >> @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct >> siginfo *info, int pid) >> struct task_struct * p; >> >> for_each_process(p) { >> - if (p->pid > 1 && !same_thread_group(p, current)) { >> + if (p->pid > 1 && !same_thread_group(p, current) && >> + task_in_pid_ns(p, current->nsproxy->pid_ns)) { >> int err = group_send_sig_info(sig, info, p); >> ++count; >> if (err != -EPERM) -- Daniel Hokka Zakrisson ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 15:01 ` Pavel Emelyanov [not found] ` <487F5EDB.1000008-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> @ 2008-07-17 15:24 ` Daniel Hokka Zakrisson 2008-07-17 15:54 ` Pavel Emelyanov ` (3 more replies) 1 sibling, 4 replies; 28+ messages in thread From: Daniel Hokka Zakrisson @ 2008-07-17 15:24 UTC (permalink / raw) To: Pavel Emelyanov; +Cc: linux-kernel, containers, oleg, ebiederm, akpm Pavel Emelyanov wrote: > Daniel Hokka Zakrisson wrote: >> While moving Linux-VServer to using pid namespaces, I noticed that >> kill(-1) from inside a pid namespace is currently signalling every >> process in the entire system, including processes that are otherwise >> unreachable from the current process. > > This is not a "news" actually, buy anyway - thanks :) And yet nobody's fixed it... Kind of a critical thing, if you actually want to use them, since most distribution's rc-scripts do a kill(-1, SIGTERM), followed by kill(-1, SIGKILL) when halting (which, needless to say, would be very bad). >> This patch fixes it by making sure that only processes which are in >> the same pid namespace as current get signalled. > > This is to be done, indeed, but I do not like the proposed implementation, > since you have to walk all the tasks in the system (under tasklist_lock, > by the way) to search for a couple of interesting ones. Better look at how > zap_pid_ns_processes works (by the way - I saw some patch doing so some > time ago). The way zap_pid_ns_processes does it is worse, since it signals every thread in the namespace rather than every thread group. So either we walk the global tasklist, or we create a per-namespace one. Is that what we want? >> Signed-off-by: Daniel Hokka Zakrisson <daniel@hozac.com> >> >> diff --git a/include/linux/pid_namespace.h >> b/include/linux/pid_namespace.h >> index caff528..4cf41bd 100644 >> --- a/include/linux/pid_namespace.h >> +++ b/include/linux/pid_namespace.h >> @@ -40,6 +40,8 @@ static inline struct pid_namespace *get_pid_ns(struct >> pid_namespace *ns) >> extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct >> pid_namespace *ns); >> extern void free_pid_ns(struct kref *kref); >> extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); >> +extern int task_in_pid_ns(struct task_struct *tsk, >> + struct pid_namespace *pid_ns); >> >> static inline void put_pid_ns(struct pid_namespace *ns) >> { >> @@ -72,6 +74,12 @@ static inline void zap_pid_ns_processes(struct >> pid_namespace *ns) >> { >> BUG(); >> } >> + >> +static inline int task_in_pid_ns(struct task_struct *tsk, >> + struct pid_namespace *ns) >> +{ >> + return 1; >> +} >> #endif /* CONFIG_PID_NS */ >> >> static inline struct pid_namespace *task_active_pid_ns(struct >> task_struct *tsk) >> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c >> index 98702b4..3e71011 100644 >> --- a/kernel/pid_namespace.c >> +++ b/kernel/pid_namespace.c >> @@ -188,6 +188,26 @@ void zap_pid_ns_processes(struct pid_namespace >> *pid_ns) >> return; >> } >> >> +/* >> + * Checks whether tsk has a pid in the pid namespace ns. >> + * Must be called with tasklist_lock read-locked or under >> rcu_read_lock() >> + */ >> +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) >> +{ >> + struct pid *pid = task_pid(tsk); >> + >> + if (!pid) >> + return 0; >> + >> + if (pid->level < ns->level) >> + return 0; >> + >> + if (pid->numbers[ns->level].ns != ns) >> + return 0; >> + >> + return 1; >> +} >> + >> static __init int pid_namespaces_init(void) >> { >> pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); >> diff --git a/kernel/signal.c b/kernel/signal.c >> index 6c0958e..93713a5 100644 >> --- a/kernel/signal.c >> +++ b/kernel/signal.c >> @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct >> siginfo *info, int pid) >> struct task_struct * p; >> >> for_each_process(p) { >> - if (p->pid > 1 && !same_thread_group(p, current)) { >> + if (p->pid > 1 && !same_thread_group(p, current) && >> + task_in_pid_ns(p, current->nsproxy->pid_ns)) { >> int err = group_send_sig_info(sig, info, p); >> ++count; >> if (err != -EPERM) -- Daniel Hokka Zakrisson ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 15:24 ` Daniel Hokka Zakrisson @ 2008-07-17 15:54 ` Pavel Emelyanov [not found] ` <487F6B2E.40101-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> 2008-07-17 17:32 ` Oleg Nesterov 2008-07-17 15:54 ` Pavel Emelyanov ` (2 subsequent siblings) 3 siblings, 2 replies; 28+ messages in thread From: Pavel Emelyanov @ 2008-07-17 15:54 UTC (permalink / raw) To: Daniel Hokka Zakrisson; +Cc: linux-kernel, containers, oleg, ebiederm, akpm Daniel Hokka Zakrisson wrote: > Pavel Emelyanov wrote: >> Daniel Hokka Zakrisson wrote: >>> While moving Linux-VServer to using pid namespaces, I noticed that >>> kill(-1) from inside a pid namespace is currently signalling every >>> process in the entire system, including processes that are otherwise >>> unreachable from the current process. >> This is not a "news" actually, buy anyway - thanks :) > > And yet nobody's fixed it... Kind of a critical thing, if you actually > want to use them, since most distribution's rc-scripts do a kill(-1, > SIGTERM), followed by kill(-1, SIGKILL) when halting (which, needless to > say, would be very bad). > >>> This patch fixes it by making sure that only processes which are in >>> the same pid namespace as current get signalled. >> This is to be done, indeed, but I do not like the proposed implementation, >> since you have to walk all the tasks in the system (under tasklist_lock, >> by the way) to search for a couple of interesting ones. Better look at how >> zap_pid_ns_processes works (by the way - I saw some patch doing so some >> time ago). > > The way zap_pid_ns_processes does it is worse, since it signals every > thread in the namespace rather than every thread group. So either we walk It's questionable whether there are more "threads in a pid namespace" than "processes in a system". E.g. on my notebook there are ~110 processes and ~150 threads. So having this setup launched in 10 containers you'll have to walk 1100 tasks, while zap_pid_ns_processes only 150 ;) Some real-life example with containers: on one of our servers with 10 containers serving as git repo, bulding system and some other stuff there are ~200 process totally and ~20 threads in each container. See? I tend to believe that walking threads in a container is cheaper then walking processes in a system... > the global tasklist, or we create a per-namespace one. Is that what we > want? We want to kill all tasks in current pid namespace. There are variants of how to do this. You particular implementation of handling this case seems poor to me for the reasons described above. >>> Signed-off-by: Daniel Hokka Zakrisson <daniel@hozac.com> >>> >>> diff --git a/include/linux/pid_namespace.h >>> b/include/linux/pid_namespace.h >>> index caff528..4cf41bd 100644 >>> --- a/include/linux/pid_namespace.h >>> +++ b/include/linux/pid_namespace.h >>> @@ -40,6 +40,8 @@ static inline struct pid_namespace *get_pid_ns(struct >>> pid_namespace *ns) >>> extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct >>> pid_namespace *ns); >>> extern void free_pid_ns(struct kref *kref); >>> extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); >>> +extern int task_in_pid_ns(struct task_struct *tsk, >>> + struct pid_namespace *pid_ns); >>> >>> static inline void put_pid_ns(struct pid_namespace *ns) >>> { >>> @@ -72,6 +74,12 @@ static inline void zap_pid_ns_processes(struct >>> pid_namespace *ns) >>> { >>> BUG(); >>> } >>> + >>> +static inline int task_in_pid_ns(struct task_struct *tsk, >>> + struct pid_namespace *ns) >>> +{ >>> + return 1; >>> +} >>> #endif /* CONFIG_PID_NS */ >>> >>> static inline struct pid_namespace *task_active_pid_ns(struct >>> task_struct *tsk) >>> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c >>> index 98702b4..3e71011 100644 >>> --- a/kernel/pid_namespace.c >>> +++ b/kernel/pid_namespace.c >>> @@ -188,6 +188,26 @@ void zap_pid_ns_processes(struct pid_namespace >>> *pid_ns) >>> return; >>> } >>> >>> +/* >>> + * Checks whether tsk has a pid in the pid namespace ns. >>> + * Must be called with tasklist_lock read-locked or under >>> rcu_read_lock() >>> + */ >>> +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) >>> +{ >>> + struct pid *pid = task_pid(tsk); >>> + >>> + if (!pid) >>> + return 0; >>> + >>> + if (pid->level < ns->level) >>> + return 0; >>> + >>> + if (pid->numbers[ns->level].ns != ns) >>> + return 0; >>> + >>> + return 1; >>> +} >>> + >>> static __init int pid_namespaces_init(void) >>> { >>> pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); >>> diff --git a/kernel/signal.c b/kernel/signal.c >>> index 6c0958e..93713a5 100644 >>> --- a/kernel/signal.c >>> +++ b/kernel/signal.c >>> @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct >>> siginfo *info, int pid) >>> struct task_struct * p; >>> >>> for_each_process(p) { >>> - if (p->pid > 1 && !same_thread_group(p, current)) { >>> + if (p->pid > 1 && !same_thread_group(p, current) && >>> + task_in_pid_ns(p, current->nsproxy->pid_ns)) { >>> int err = group_send_sig_info(sig, info, p); >>> ++count; >>> if (err != -EPERM) > ^ permalink raw reply [flat|nested] 28+ messages in thread
[parent not found: <487F6B2E.40101-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>]
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace [not found] ` <487F6B2E.40101-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> @ 2008-07-17 17:32 ` Oleg Nesterov 0 siblings, 0 replies; 28+ messages in thread From: Oleg Nesterov @ 2008-07-17 17:32 UTC (permalink / raw) To: Pavel Emelyanov Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, linux-kernel-u79uwXL29TY76Z2rM5mHXA, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, ebiederm-aS9lmoZGLiVWk0Htik3J/w On 07/17, Pavel Emelyanov wrote: > > Daniel Hokka Zakrisson wrote: > > > > The way zap_pid_ns_processes does it is worse, since it signals every > > thread in the namespace rather than every thread group. So either we walk > > It's questionable whether there are more "threads in a pid namespace" than > "processes in a system". > > E.g. on my notebook there are ~110 processes and ~150 threads. So having > this setup launched in 10 containers you'll have to walk 1100 tasks, while > zap_pid_ns_processes only 150 ;) > > Some real-life example with containers: on one of our servers with 10 > containers serving as git repo, bulding system and some other stuff there > are ~200 process totally and ~20 threads in each container. See? > > I tend to believe that walking threads in a container is cheaper then > walking processes in a system... kill_something_info() can't walk threads, think about the realtime signals. Anyway, I think we should change kill_something_info(-1) to use rcu_read_lock() instead of tasklist. Oleg. ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 15:54 ` Pavel Emelyanov [not found] ` <487F6B2E.40101-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org> @ 2008-07-17 17:32 ` Oleg Nesterov [not found] ` <20080717173254.GA20948-6lXkIZvqkOAvJsYlp49lxw@public.gmane.org> 2008-07-17 17:50 ` Eric W. Biederman 1 sibling, 2 replies; 28+ messages in thread From: Oleg Nesterov @ 2008-07-17 17:32 UTC (permalink / raw) To: Pavel Emelyanov Cc: Daniel Hokka Zakrisson, linux-kernel, containers, ebiederm, akpm On 07/17, Pavel Emelyanov wrote: > > Daniel Hokka Zakrisson wrote: > > > > The way zap_pid_ns_processes does it is worse, since it signals every > > thread in the namespace rather than every thread group. So either we walk > > It's questionable whether there are more "threads in a pid namespace" than > "processes in a system". > > E.g. on my notebook there are ~110 processes and ~150 threads. So having > this setup launched in 10 containers you'll have to walk 1100 tasks, while > zap_pid_ns_processes only 150 ;) > > Some real-life example with containers: on one of our servers with 10 > containers serving as git repo, bulding system and some other stuff there > are ~200 process totally and ~20 threads in each container. See? > > I tend to believe that walking threads in a container is cheaper then > walking processes in a system... kill_something_info() can't walk threads, think about the realtime signals. Anyway, I think we should change kill_something_info(-1) to use rcu_read_lock() instead of tasklist. Oleg. ^ permalink raw reply [flat|nested] 28+ messages in thread
[parent not found: <20080717173254.GA20948-6lXkIZvqkOAvJsYlp49lxw@public.gmane.org>]
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace [not found] ` <20080717173254.GA20948-6lXkIZvqkOAvJsYlp49lxw@public.gmane.org> @ 2008-07-17 17:50 ` Eric W. Biederman 0 siblings, 0 replies; 28+ messages in thread From: Eric W. Biederman @ 2008-07-17 17:50 UTC (permalink / raw) To: Oleg Nesterov Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, linux-kernel-u79uwXL29TY76Z2rM5mHXA, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, Pavel Emelyanov Oleg Nesterov <oleg-6lXkIZvqkOAvJsYlp49lxw@public.gmane.org> writes: > kill_something_info() can't walk threads, think about the realtime signals. walking threads is fine delivering signals to non thread group leaders is a problem. > Anyway, I think we should change kill_something_info(-1) to use rcu_read_lock() > instead of tasklist. Being dense I think the locking implications of a correct implementation are more then we are ready to deal with to fix this bug. Although I remember discussing it and seeing something reasonable. Eric ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 17:32 ` Oleg Nesterov [not found] ` <20080717173254.GA20948-6lXkIZvqkOAvJsYlp49lxw@public.gmane.org> @ 2008-07-17 17:50 ` Eric W. Biederman 1 sibling, 0 replies; 28+ messages in thread From: Eric W. Biederman @ 2008-07-17 17:50 UTC (permalink / raw) To: Oleg Nesterov Cc: Pavel Emelyanov, Daniel Hokka Zakrisson, linux-kernel, containers, akpm Oleg Nesterov <oleg@tv-sign.ru> writes: > kill_something_info() can't walk threads, think about the realtime signals. walking threads is fine delivering signals to non thread group leaders is a problem. > Anyway, I think we should change kill_something_info(-1) to use rcu_read_lock() > instead of tasklist. Being dense I think the locking implications of a correct implementation are more then we are ready to deal with to fix this bug. Although I remember discussing it and seeing something reasonable. Eric ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 15:24 ` Daniel Hokka Zakrisson 2008-07-17 15:54 ` Pavel Emelyanov @ 2008-07-17 15:54 ` Pavel Emelyanov 2008-07-17 17:45 ` Eric W. Biederman 2008-07-17 17:45 ` Eric W. Biederman 3 siblings, 0 replies; 28+ messages in thread From: Pavel Emelyanov @ 2008-07-17 15:54 UTC (permalink / raw) To: Daniel Hokka Zakrisson Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, ebiederm-aS9lmoZGLiVWk0Htik3J/w, linux-kernel-u79uwXL29TY76Z2rM5mHXA, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, oleg-6lXkIZvqkOAvJsYlp49lxw Daniel Hokka Zakrisson wrote: > Pavel Emelyanov wrote: >> Daniel Hokka Zakrisson wrote: >>> While moving Linux-VServer to using pid namespaces, I noticed that >>> kill(-1) from inside a pid namespace is currently signalling every >>> process in the entire system, including processes that are otherwise >>> unreachable from the current process. >> This is not a "news" actually, buy anyway - thanks :) > > And yet nobody's fixed it... Kind of a critical thing, if you actually > want to use them, since most distribution's rc-scripts do a kill(-1, > SIGTERM), followed by kill(-1, SIGKILL) when halting (which, needless to > say, would be very bad). > >>> This patch fixes it by making sure that only processes which are in >>> the same pid namespace as current get signalled. >> This is to be done, indeed, but I do not like the proposed implementation, >> since you have to walk all the tasks in the system (under tasklist_lock, >> by the way) to search for a couple of interesting ones. Better look at how >> zap_pid_ns_processes works (by the way - I saw some patch doing so some >> time ago). > > The way zap_pid_ns_processes does it is worse, since it signals every > thread in the namespace rather than every thread group. So either we walk It's questionable whether there are more "threads in a pid namespace" than "processes in a system". E.g. on my notebook there are ~110 processes and ~150 threads. So having this setup launched in 10 containers you'll have to walk 1100 tasks, while zap_pid_ns_processes only 150 ;) Some real-life example with containers: on one of our servers with 10 containers serving as git repo, bulding system and some other stuff there are ~200 process totally and ~20 threads in each container. See? I tend to believe that walking threads in a container is cheaper then walking processes in a system... > the global tasklist, or we create a per-namespace one. Is that what we > want? We want to kill all tasks in current pid namespace. There are variants of how to do this. You particular implementation of handling this case seems poor to me for the reasons described above. >>> Signed-off-by: Daniel Hokka Zakrisson <daniel-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org> >>> >>> diff --git a/include/linux/pid_namespace.h >>> b/include/linux/pid_namespace.h >>> index caff528..4cf41bd 100644 >>> --- a/include/linux/pid_namespace.h >>> +++ b/include/linux/pid_namespace.h >>> @@ -40,6 +40,8 @@ static inline struct pid_namespace *get_pid_ns(struct >>> pid_namespace *ns) >>> extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct >>> pid_namespace *ns); >>> extern void free_pid_ns(struct kref *kref); >>> extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); >>> +extern int task_in_pid_ns(struct task_struct *tsk, >>> + struct pid_namespace *pid_ns); >>> >>> static inline void put_pid_ns(struct pid_namespace *ns) >>> { >>> @@ -72,6 +74,12 @@ static inline void zap_pid_ns_processes(struct >>> pid_namespace *ns) >>> { >>> BUG(); >>> } >>> + >>> +static inline int task_in_pid_ns(struct task_struct *tsk, >>> + struct pid_namespace *ns) >>> +{ >>> + return 1; >>> +} >>> #endif /* CONFIG_PID_NS */ >>> >>> static inline struct pid_namespace *task_active_pid_ns(struct >>> task_struct *tsk) >>> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c >>> index 98702b4..3e71011 100644 >>> --- a/kernel/pid_namespace.c >>> +++ b/kernel/pid_namespace.c >>> @@ -188,6 +188,26 @@ void zap_pid_ns_processes(struct pid_namespace >>> *pid_ns) >>> return; >>> } >>> >>> +/* >>> + * Checks whether tsk has a pid in the pid namespace ns. >>> + * Must be called with tasklist_lock read-locked or under >>> rcu_read_lock() >>> + */ >>> +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) >>> +{ >>> + struct pid *pid = task_pid(tsk); >>> + >>> + if (!pid) >>> + return 0; >>> + >>> + if (pid->level < ns->level) >>> + return 0; >>> + >>> + if (pid->numbers[ns->level].ns != ns) >>> + return 0; >>> + >>> + return 1; >>> +} >>> + >>> static __init int pid_namespaces_init(void) >>> { >>> pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); >>> diff --git a/kernel/signal.c b/kernel/signal.c >>> index 6c0958e..93713a5 100644 >>> --- a/kernel/signal.c >>> +++ b/kernel/signal.c >>> @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct >>> siginfo *info, int pid) >>> struct task_struct * p; >>> >>> for_each_process(p) { >>> - if (p->pid > 1 && !same_thread_group(p, current)) { >>> + if (p->pid > 1 && !same_thread_group(p, current) && >>> + task_in_pid_ns(p, current->nsproxy->pid_ns)) { >>> int err = group_send_sig_info(sig, info, p); >>> ++count; >>> if (err != -EPERM) > ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 15:24 ` Daniel Hokka Zakrisson 2008-07-17 15:54 ` Pavel Emelyanov 2008-07-17 15:54 ` Pavel Emelyanov @ 2008-07-17 17:45 ` Eric W. Biederman 2008-07-17 18:39 ` Daniel Hokka Zakrisson [not found] ` <m13am8wgk6.fsf-B27657KtZYmhTnVgQlOflh2eb7JE58TQ@public.gmane.org> 2008-07-17 17:45 ` Eric W. Biederman 3 siblings, 2 replies; 28+ messages in thread From: Eric W. Biederman @ 2008-07-17 17:45 UTC (permalink / raw) To: Daniel Hokka Zakrisson Cc: Pavel Emelyanov, linux-kernel, containers, oleg, akpm "Daniel Hokka Zakrisson" <daniel@hozac.com> writes: > Pavel Emelyanov wrote: >> Daniel Hokka Zakrisson wrote: >>> While moving Linux-VServer to using pid namespaces, I noticed that >>> kill(-1) from inside a pid namespace is currently signalling every >>> process in the entire system, including processes that are otherwise >>> unreachable from the current process. >> >> This is not a "news" actually, buy anyway - thanks :) > > And yet nobody's fixed it... Kind of a critical thing, if you actually > want to use them, since most distribution's rc-scripts do a kill(-1, > SIGTERM), followed by kill(-1, SIGKILL) when halting (which, needless to > say, would be very bad). > >>> This patch fixes it by making sure that only processes which are in >>> the same pid namespace as current get signalled. >> >> This is to be done, indeed, but I do not like the proposed implementation, >> since you have to walk all the tasks in the system (under tasklist_lock, >> by the way) to search for a couple of interesting ones. Better look at how >> zap_pid_ns_processes works (by the way - I saw some patch doing so some >> time ago). > > The way zap_pid_ns_processes does it is worse, since it signals every > thread in the namespace rather than every thread group. So either we walk > the global tasklist, or we create a per-namespace one. Is that what we > want? Can you please introduce kill_pidns_info and have both kill_something_info and zap_pid_ns_processes call this common function? We want to walk the set of all pids in a pid namespace. /proc does this and it is the recommended idiom. If walking all of the pids in a pid namespace is not fast enough we can accelerate that. You are correct signalling every thread in a namespace is worse, in fact it is semantically incorrect. zap_pid_ns_processes gets away with it because it is sending SIGKILL. Therefore kill_pidns_info should skip sending a signal to every task that is not the thread_group_leader. We need to hold the tasklist_lock to prevent new processes from joining the list of all processes. Otherwise we could run the code under the rcu_read_lock. Eric ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 17:45 ` Eric W. Biederman @ 2008-07-17 18:39 ` Daniel Hokka Zakrisson 2008-07-17 18:45 ` Eric W. Biederman 2008-07-17 18:45 ` Eric W. Biederman [not found] ` <m13am8wgk6.fsf-B27657KtZYmhTnVgQlOflh2eb7JE58TQ@public.gmane.org> 1 sibling, 2 replies; 28+ messages in thread From: Daniel Hokka Zakrisson @ 2008-07-17 18:39 UTC (permalink / raw) To: Eric W. Biederman; +Cc: Pavel Emelyanov, linux-kernel, containers, oleg, akpm Eric W. Biederman wrote: > "Daniel Hokka Zakrisson" <daniel@hozac.com> writes: > >> Pavel Emelyanov wrote: >>> Daniel Hokka Zakrisson wrote: >>>> While moving Linux-VServer to using pid namespaces, I noticed that >>>> kill(-1) from inside a pid namespace is currently signalling every >>>> process in the entire system, including processes that are otherwise >>>> unreachable from the current process. >>> >>> This is not a "news" actually, buy anyway - thanks :) >> >> And yet nobody's fixed it... Kind of a critical thing, if you actually >> want to use them, since most distribution's rc-scripts do a kill(-1, >> SIGTERM), followed by kill(-1, SIGKILL) when halting (which, needless to >> say, would be very bad). >> >>>> This patch fixes it by making sure that only processes which are in >>>> the same pid namespace as current get signalled. >>> >>> This is to be done, indeed, but I do not like the proposed >>> implementation, >>> since you have to walk all the tasks in the system (under >>> tasklist_lock, >>> by the way) to search for a couple of interesting ones. Better look at >>> how >>> zap_pid_ns_processes works (by the way - I saw some patch doing so some >>> time ago). >> >> The way zap_pid_ns_processes does it is worse, since it signals every >> thread in the namespace rather than every thread group. So either we >> walk >> the global tasklist, or we create a per-namespace one. Is that what we >> want? > > Can you please introduce kill_pidns_info and have both > kill_something_info and zap_pid_ns_processes call this common > function? Looks like you've already done that. :-) (Referring to Sukadev's email.) Is there any reason we don't just merge that patch? > We want to walk the set of all pids in a pid namespace. /proc does > this and it is the recommended idiom. If walking all of the pids in a > pid namespace is not fast enough we can accelerate that. > > You are correct signalling every thread in a namespace is worse, in > fact it is semantically incorrect. zap_pid_ns_processes gets away > with it because it is sending SIGKILL. Therefore kill_pidns_info > should skip sending a signal to every task that is not the > thread_group_leader. > > We need to hold the tasklist_lock to prevent new processes from > joining the list of all processes. Otherwise we could run the code > under the rcu_read_lock. > > Eric -- Daniel Hokka Zakrisson ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 18:39 ` Daniel Hokka Zakrisson @ 2008-07-17 18:45 ` Eric W. Biederman 2008-07-17 18:45 ` Eric W. Biederman 1 sibling, 0 replies; 28+ messages in thread From: Eric W. Biederman @ 2008-07-17 18:45 UTC (permalink / raw) To: Daniel Hokka Zakrisson Cc: Pavel Emelyanov, linux-kernel, containers, oleg, akpm "Daniel Hokka Zakrisson" <daniel@hozac.com> writes: > Looks like you've already done that. :-) (Referring to Sukadev's email.) > Is there any reason we don't just merge that patch? I knew I had done something like that. Sure let's revive the patch and send it. I don't know why it got lost the first time. Eric ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 18:39 ` Daniel Hokka Zakrisson 2008-07-17 18:45 ` Eric W. Biederman @ 2008-07-17 18:45 ` Eric W. Biederman 1 sibling, 0 replies; 28+ messages in thread From: Eric W. Biederman @ 2008-07-17 18:45 UTC (permalink / raw) To: Daniel Hokka Zakrisson Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, oleg-6lXkIZvqkOAvJsYlp49lxw, linux-kernel-u79uwXL29TY76Z2rM5mHXA, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, Pavel Emelyanov "Daniel Hokka Zakrisson" <daniel-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org> writes: > Looks like you've already done that. :-) (Referring to Sukadev's email.) > Is there any reason we don't just merge that patch? I knew I had done something like that. Sure let's revive the patch and send it. I don't know why it got lost the first time. Eric ^ permalink raw reply [flat|nested] 28+ messages in thread
[parent not found: <m13am8wgk6.fsf-B27657KtZYmhTnVgQlOflh2eb7JE58TQ@public.gmane.org>]
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace [not found] ` <m13am8wgk6.fsf-B27657KtZYmhTnVgQlOflh2eb7JE58TQ@public.gmane.org> @ 2008-07-17 18:39 ` Daniel Hokka Zakrisson 0 siblings, 0 replies; 28+ messages in thread From: Daniel Hokka Zakrisson @ 2008-07-17 18:39 UTC (permalink / raw) To: Eric W. Biederman Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, oleg-6lXkIZvqkOAvJsYlp49lxw, linux-kernel-u79uwXL29TY76Z2rM5mHXA, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, Pavel Emelyanov Eric W. Biederman wrote: > "Daniel Hokka Zakrisson" <daniel-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org> writes: > >> Pavel Emelyanov wrote: >>> Daniel Hokka Zakrisson wrote: >>>> While moving Linux-VServer to using pid namespaces, I noticed that >>>> kill(-1) from inside a pid namespace is currently signalling every >>>> process in the entire system, including processes that are otherwise >>>> unreachable from the current process. >>> >>> This is not a "news" actually, buy anyway - thanks :) >> >> And yet nobody's fixed it... Kind of a critical thing, if you actually >> want to use them, since most distribution's rc-scripts do a kill(-1, >> SIGTERM), followed by kill(-1, SIGKILL) when halting (which, needless to >> say, would be very bad). >> >>>> This patch fixes it by making sure that only processes which are in >>>> the same pid namespace as current get signalled. >>> >>> This is to be done, indeed, but I do not like the proposed >>> implementation, >>> since you have to walk all the tasks in the system (under >>> tasklist_lock, >>> by the way) to search for a couple of interesting ones. Better look at >>> how >>> zap_pid_ns_processes works (by the way - I saw some patch doing so some >>> time ago). >> >> The way zap_pid_ns_processes does it is worse, since it signals every >> thread in the namespace rather than every thread group. So either we >> walk >> the global tasklist, or we create a per-namespace one. Is that what we >> want? > > Can you please introduce kill_pidns_info and have both > kill_something_info and zap_pid_ns_processes call this common > function? Looks like you've already done that. :-) (Referring to Sukadev's email.) Is there any reason we don't just merge that patch? > We want to walk the set of all pids in a pid namespace. /proc does > this and it is the recommended idiom. If walking all of the pids in a > pid namespace is not fast enough we can accelerate that. > > You are correct signalling every thread in a namespace is worse, in > fact it is semantically incorrect. zap_pid_ns_processes gets away > with it because it is sending SIGKILL. Therefore kill_pidns_info > should skip sending a signal to every task that is not the > thread_group_leader. > > We need to hold the tasklist_lock to prevent new processes from > joining the list of all processes. Otherwise we could run the code > under the rcu_read_lock. > > Eric -- Daniel Hokka Zakrisson ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 15:24 ` Daniel Hokka Zakrisson ` (2 preceding siblings ...) 2008-07-17 17:45 ` Eric W. Biederman @ 2008-07-17 17:45 ` Eric W. Biederman 3 siblings, 0 replies; 28+ messages in thread From: Eric W. Biederman @ 2008-07-17 17:45 UTC (permalink / raw) To: Daniel Hokka Zakrisson Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, oleg-6lXkIZvqkOAvJsYlp49lxw, linux-kernel-u79uwXL29TY76Z2rM5mHXA, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, Pavel Emelyanov "Daniel Hokka Zakrisson" <daniel-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org> writes: > Pavel Emelyanov wrote: >> Daniel Hokka Zakrisson wrote: >>> While moving Linux-VServer to using pid namespaces, I noticed that >>> kill(-1) from inside a pid namespace is currently signalling every >>> process in the entire system, including processes that are otherwise >>> unreachable from the current process. >> >> This is not a "news" actually, buy anyway - thanks :) > > And yet nobody's fixed it... Kind of a critical thing, if you actually > want to use them, since most distribution's rc-scripts do a kill(-1, > SIGTERM), followed by kill(-1, SIGKILL) when halting (which, needless to > say, would be very bad). > >>> This patch fixes it by making sure that only processes which are in >>> the same pid namespace as current get signalled. >> >> This is to be done, indeed, but I do not like the proposed implementation, >> since you have to walk all the tasks in the system (under tasklist_lock, >> by the way) to search for a couple of interesting ones. Better look at how >> zap_pid_ns_processes works (by the way - I saw some patch doing so some >> time ago). > > The way zap_pid_ns_processes does it is worse, since it signals every > thread in the namespace rather than every thread group. So either we walk > the global tasklist, or we create a per-namespace one. Is that what we > want? Can you please introduce kill_pidns_info and have both kill_something_info and zap_pid_ns_processes call this common function? We want to walk the set of all pids in a pid namespace. /proc does this and it is the recommended idiom. If walking all of the pids in a pid namespace is not fast enough we can accelerate that. You are correct signalling every thread in a namespace is worse, in fact it is semantically incorrect. zap_pid_ns_processes gets away with it because it is sending SIGKILL. Therefore kill_pidns_info should skip sending a signal to every task that is not the thread_group_leader. We need to hold the tasklist_lock to prevent new processes from joining the list of all processes. Otherwise we could run the code under the rcu_read_lock. Eric ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 14:55 ` Daniel Hokka Zakrisson ` (2 preceding siblings ...) (?) @ 2008-07-17 18:13 ` sukadev 2008-07-17 18:44 ` Daniel Hokka Zakrisson ` (2 more replies) -1 siblings, 3 replies; 28+ messages in thread From: sukadev @ 2008-07-17 18:13 UTC (permalink / raw) To: Daniel Hokka Zakrisson Cc: linux-kernel, containers, xemul, oleg, akpm, ebiederm Daniel Hokka Zakrisson [daniel@hozac.com] wrote: | While moving Linux-VServer to using pid namespaces, I noticed that | kill(-1) from inside a pid namespace is currently signalling every | process in the entire system, including processes that are otherwise | unreachable from the current process. | | This patch fixes it by making sure that only processes which are in | the same pid namespace as current get signalled. | | Signed-off-by: Daniel Hokka Zakrisson <daniel@hozac.com> | | diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h | index caff528..4cf41bd 100644 | --- a/include/linux/pid_namespace.h | +++ b/include/linux/pid_namespace.h | @@ -40,6 +40,8 @@ static inline struct pid_namespace *get_pid_ns(struct | pid_namespace *ns) | extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct | pid_namespace *ns); | extern void free_pid_ns(struct kref *kref); | extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); | +extern int task_in_pid_ns(struct task_struct *tsk, | + struct pid_namespace *pid_ns); | | static inline void put_pid_ns(struct pid_namespace *ns) | { | @@ -72,6 +74,12 @@ static inline void zap_pid_ns_processes(struct | pid_namespace *ns) | { | BUG(); | } | + | +static inline int task_in_pid_ns(struct task_struct *tsk, | + struct pid_namespace *ns) | +{ | + return 1; | +} | #endif /* CONFIG_PID_NS */ | | static inline struct pid_namespace *task_active_pid_ns(struct | task_struct *tsk) | diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c | index 98702b4..3e71011 100644 | --- a/kernel/pid_namespace.c | +++ b/kernel/pid_namespace.c | @@ -188,6 +188,26 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | return; | } | | +/* | + * Checks whether tsk has a pid in the pid namespace ns. | + * Must be called with tasklist_lock read-locked or under rcu_read_lock() | + */ | +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) | +{ | + struct pid *pid = task_pid(tsk); | + | + if (!pid) | + return 0; | + | + if (pid->level < ns->level) | + return 0; ns can be NULL if tsk is exiting. Like Pavel said, we had couple of attempts to fix the larger problem of signal semantics in containers but did not have a consensus on handling blocked/unhandled signals to container-init. It would still be good to fix this "kill -1" problem. Eric had a slightly optimized interface, 'pid_in_pid_ns()' in following patchset. Maybe we could use that ? https://lists.linux-foundation.org/pipermail/containers/2007-December/009174.html | + | + if (pid->numbers[ns->level].ns != ns) | + return 0; | + | + return 1; | +} | + | static __init int pid_namespaces_init(void) | { | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | diff --git a/kernel/signal.c b/kernel/signal.c | index 6c0958e..93713a5 100644 | --- a/kernel/signal.c | +++ b/kernel/signal.c | @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct | siginfo *info, int pid) | struct task_struct * p; | | for_each_process(p) { | - if (p->pid > 1 && !same_thread_group(p, current)) { | + if (p->pid > 1 && !same_thread_group(p, current) && | + task_in_pid_ns(p, current->nsproxy->pid_ns)) { | int err = group_send_sig_info(sig, info, p); | ++count; | if (err != -EPERM) | -- | 1.5.5.1 | _______________________________________________ | Containers mailing list | Containers@lists.linux-foundation.org | https://lists.linux-foundation.org/mailman/listinfo/containers ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 18:13 ` sukadev @ 2008-07-17 18:44 ` Daniel Hokka Zakrisson 2008-07-17 18:46 ` Eric W. Biederman [not found] ` <20080717181313.GA2289-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> 2 siblings, 0 replies; 28+ messages in thread From: Daniel Hokka Zakrisson @ 2008-07-17 18:44 UTC (permalink / raw) To: sukadev; +Cc: linux-kernel, containers, xemul, oleg, akpm, ebiederm sukadev@us.ibm.com wrote: > Daniel Hokka Zakrisson [daniel@hozac.com] wrote: > | While moving Linux-VServer to using pid namespaces, I noticed that > | kill(-1) from inside a pid namespace is currently signalling every > | process in the entire system, including processes that are otherwise > | unreachable from the current process. > | > | This patch fixes it by making sure that only processes which are in > | the same pid namespace as current get signalled. > | > | Signed-off-by: Daniel Hokka Zakrisson <daniel@hozac.com> > | > | diff --git a/include/linux/pid_namespace.h > b/include/linux/pid_namespace.h > | index caff528..4cf41bd 100644 > | --- a/include/linux/pid_namespace.h > | +++ b/include/linux/pid_namespace.h > | @@ -40,6 +40,8 @@ static inline struct pid_namespace *get_pid_ns(struct > | pid_namespace *ns) > | extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct > | pid_namespace *ns); > | extern void free_pid_ns(struct kref *kref); > | extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); > | +extern int task_in_pid_ns(struct task_struct *tsk, > | + struct pid_namespace *pid_ns); > | > | static inline void put_pid_ns(struct pid_namespace *ns) > | { > | @@ -72,6 +74,12 @@ static inline void zap_pid_ns_processes(struct > | pid_namespace *ns) > | { > | BUG(); > | } > | + > | +static inline int task_in_pid_ns(struct task_struct *tsk, > | + struct pid_namespace *ns) > | +{ > | + return 1; > | +} > | #endif /* CONFIG_PID_NS */ > | > | static inline struct pid_namespace *task_active_pid_ns(struct > | task_struct *tsk) > | diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c > | index 98702b4..3e71011 100644 > | --- a/kernel/pid_namespace.c > | +++ b/kernel/pid_namespace.c > | @@ -188,6 +188,26 @@ void zap_pid_ns_processes(struct pid_namespace > *pid_ns) > | return; > | } > | > | +/* > | + * Checks whether tsk has a pid in the pid namespace ns. > | + * Must be called with tasklist_lock read-locked or under > rcu_read_lock() > | + */ > | +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) > | +{ > | + struct pid *pid = task_pid(tsk); > | + > | + if (!pid) > | + return 0; > | + > | + if (pid->level < ns->level) > | + return 0; > > ns can be NULL if tsk is exiting. ns is from current, and this is currently only called from kill_something_info, so it should not be exiting in this path. > Like Pavel said, we had couple of attempts to fix the larger problem of > signal semantics in containers but did not have a consensus on handling > blocked/unhandled signals to container-init. > > It would still be good to fix this "kill -1" problem. It is a separate issue, so, yeah. > Eric had a slightly optimized interface, 'pid_in_pid_ns()' in following > patchset. Maybe we could use that ? > > https://lists.linux-foundation.org/pipermail/containers/2007-December/009174.html See my response to Eric. I think that patch looks good... (Well, nr could be set to 2 initially, to avoid the nr <= 1 check.) > | + > | + if (pid->numbers[ns->level].ns != ns) > | + return 0; > | + > | + return 1; > | +} > | + > | static __init int pid_namespaces_init(void) > | { > | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); > | diff --git a/kernel/signal.c b/kernel/signal.c > | index 6c0958e..93713a5 100644 > | --- a/kernel/signal.c > | +++ b/kernel/signal.c > | @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct > | siginfo *info, int pid) > | struct task_struct * p; > | > | for_each_process(p) { > | - if (p->pid > 1 && !same_thread_group(p, current)) { > | + if (p->pid > 1 && !same_thread_group(p, current) && > | + task_in_pid_ns(p, current->nsproxy->pid_ns)) { > | int err = group_send_sig_info(sig, info, p); > | ++count; > | if (err != -EPERM) > | -- > | 1.5.5.1 -- Daniel Hokka Zakrisson ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 18:13 ` sukadev 2008-07-17 18:44 ` Daniel Hokka Zakrisson @ 2008-07-17 18:46 ` Eric W. Biederman [not found] ` <20080717181313.GA2289-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> 2 siblings, 0 replies; 28+ messages in thread From: Eric W. Biederman @ 2008-07-17 18:46 UTC (permalink / raw) To: sukadev Cc: Daniel Hokka Zakrisson, linux-kernel, containers, xemul, oleg, akpm, ebiederm sukadev@us.ibm.com writes: > > Like Pavel said, we had couple of attempts to fix the larger problem of > signal semantics in containers but did not have a consensus on handling > blocked/unhandled signals to container-init. Oh. I thought we were pretty close then I or somebody ran out of steam. > It would still be good to fix this "kill -1" problem. > > Eric had a slightly optimized interface, 'pid_in_pid_ns()' in following > patchset. Maybe we could use that ? > > https://lists.linux-foundation.org/pipermail/containers/2007-December/009174.html Eric ^ permalink raw reply [flat|nested] 28+ messages in thread
[parent not found: <20080717181313.GA2289-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace [not found] ` <20080717181313.GA2289-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> @ 2008-07-17 18:44 ` Daniel Hokka Zakrisson 2008-07-17 18:46 ` Eric W. Biederman 1 sibling, 0 replies; 28+ messages in thread From: Daniel Hokka Zakrisson @ 2008-07-17 18:44 UTC (permalink / raw) To: sukadev-r/Jw6+rmf7HQT0dZR+AlfA Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, linux-kernel-u79uwXL29TY76Z2rM5mHXA, ebiederm-aS9lmoZGLiVWk0Htik3J/w, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, oleg-6lXkIZvqkOAvJsYlp49lxw, xemul-GEFAQzZX7r8dnm+yROfE0A sukadev-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org wrote: > Daniel Hokka Zakrisson [daniel-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org] wrote: > | While moving Linux-VServer to using pid namespaces, I noticed that > | kill(-1) from inside a pid namespace is currently signalling every > | process in the entire system, including processes that are otherwise > | unreachable from the current process. > | > | This patch fixes it by making sure that only processes which are in > | the same pid namespace as current get signalled. > | > | Signed-off-by: Daniel Hokka Zakrisson <daniel-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org> > | > | diff --git a/include/linux/pid_namespace.h > b/include/linux/pid_namespace.h > | index caff528..4cf41bd 100644 > | --- a/include/linux/pid_namespace.h > | +++ b/include/linux/pid_namespace.h > | @@ -40,6 +40,8 @@ static inline struct pid_namespace *get_pid_ns(struct > | pid_namespace *ns) > | extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct > | pid_namespace *ns); > | extern void free_pid_ns(struct kref *kref); > | extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); > | +extern int task_in_pid_ns(struct task_struct *tsk, > | + struct pid_namespace *pid_ns); > | > | static inline void put_pid_ns(struct pid_namespace *ns) > | { > | @@ -72,6 +74,12 @@ static inline void zap_pid_ns_processes(struct > | pid_namespace *ns) > | { > | BUG(); > | } > | + > | +static inline int task_in_pid_ns(struct task_struct *tsk, > | + struct pid_namespace *ns) > | +{ > | + return 1; > | +} > | #endif /* CONFIG_PID_NS */ > | > | static inline struct pid_namespace *task_active_pid_ns(struct > | task_struct *tsk) > | diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c > | index 98702b4..3e71011 100644 > | --- a/kernel/pid_namespace.c > | +++ b/kernel/pid_namespace.c > | @@ -188,6 +188,26 @@ void zap_pid_ns_processes(struct pid_namespace > *pid_ns) > | return; > | } > | > | +/* > | + * Checks whether tsk has a pid in the pid namespace ns. > | + * Must be called with tasklist_lock read-locked or under > rcu_read_lock() > | + */ > | +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) > | +{ > | + struct pid *pid = task_pid(tsk); > | + > | + if (!pid) > | + return 0; > | + > | + if (pid->level < ns->level) > | + return 0; > > ns can be NULL if tsk is exiting. ns is from current, and this is currently only called from kill_something_info, so it should not be exiting in this path. > Like Pavel said, we had couple of attempts to fix the larger problem of > signal semantics in containers but did not have a consensus on handling > blocked/unhandled signals to container-init. > > It would still be good to fix this "kill -1" problem. It is a separate issue, so, yeah. > Eric had a slightly optimized interface, 'pid_in_pid_ns()' in following > patchset. Maybe we could use that ? > > https://lists.linux-foundation.org/pipermail/containers/2007-December/009174.html See my response to Eric. I think that patch looks good... (Well, nr could be set to 2 initially, to avoid the nr <= 1 check.) > | + > | + if (pid->numbers[ns->level].ns != ns) > | + return 0; > | + > | + return 1; > | +} > | + > | static __init int pid_namespaces_init(void) > | { > | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); > | diff --git a/kernel/signal.c b/kernel/signal.c > | index 6c0958e..93713a5 100644 > | --- a/kernel/signal.c > | +++ b/kernel/signal.c > | @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct > | siginfo *info, int pid) > | struct task_struct * p; > | > | for_each_process(p) { > | - if (p->pid > 1 && !same_thread_group(p, current)) { > | + if (p->pid > 1 && !same_thread_group(p, current) && > | + task_in_pid_ns(p, current->nsproxy->pid_ns)) { > | int err = group_send_sig_info(sig, info, p); > | ++count; > | if (err != -EPERM) > | -- > | 1.5.5.1 -- Daniel Hokka Zakrisson ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace [not found] ` <20080717181313.GA2289-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> 2008-07-17 18:44 ` Daniel Hokka Zakrisson @ 2008-07-17 18:46 ` Eric W. Biederman 1 sibling, 0 replies; 28+ messages in thread From: Eric W. Biederman @ 2008-07-17 18:46 UTC (permalink / raw) To: sukadev-r/Jw6+rmf7HQT0dZR+AlfA Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, linux-kernel-u79uwXL29TY76Z2rM5mHXA, ebiederm-aS9lmoZGLiVWk0Htik3J/w, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, oleg-6lXkIZvqkOAvJsYlp49lxw, xemul-GEFAQzZX7r8dnm+yROfE0A sukadev-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org writes: > > Like Pavel said, we had couple of attempts to fix the larger problem of > signal semantics in containers but did not have a consensus on handling > blocked/unhandled signals to container-init. Oh. I thought we were pretty close then I or somebody ran out of steam. > It would still be good to fix this "kill -1" problem. > > Eric had a slightly optimized interface, 'pid_in_pid_ns()' in following > patchset. Maybe we could use that ? > > https://lists.linux-foundation.org/pipermail/containers/2007-December/009174.html Eric ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-17 14:55 ` Daniel Hokka Zakrisson ` (3 preceding siblings ...) (?) @ 2008-07-23 14:34 ` Oleg Nesterov [not found] ` <20080723143411.GA2905-6lXkIZvqkOAvJsYlp49lxw@public.gmane.org> -1 siblings, 1 reply; 28+ messages in thread From: Oleg Nesterov @ 2008-07-23 14:34 UTC (permalink / raw) To: Daniel Hokka Zakrisson; +Cc: linux-kernel, containers, ebiederm, xemul, akpm On 07/17, Daniel Hokka Zakrisson wrote: > > +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) > +{ > + struct pid *pid = task_pid(tsk); > + > + if (!pid) > + return 0; > + > + if (pid->level < ns->level) > + return 0; > + > + if (pid->numbers[ns->level].ns != ns) > + return 0; > + > + return 1; > +} > + > static __init int pid_namespaces_init(void) > { > pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); > diff --git a/kernel/signal.c b/kernel/signal.c > index 6c0958e..93713a5 100644 > --- a/kernel/signal.c > +++ b/kernel/signal.c > @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct > siginfo *info, int pid) > struct task_struct * p; > > for_each_process(p) { > - if (p->pid > 1 && !same_thread_group(p, current)) { > + if (p->pid > 1 && !same_thread_group(p, current) && > + task_in_pid_ns(p, current->nsproxy->pid_ns)) { > int err = group_send_sig_info(sig, info, p); > ++count; > if (err != -EPERM) Do we really need all these complications? Afaics, we can make a simpler patch, --- kernel/signal.c +++ kernel/signal.c @@ -1136,7 +1136,7 @@ static int kill_something_info(int sig, struct task_struct * p; for_each_process(p) { - if (p->pid > 1 && !same_thread_group(p, current)) { + if (task_pid_vnr(p) > 1 && !same_thread_group(p, current)) { int err = group_send_sig_info(sig, info, p); ++count; if (err != -EPERM) task_pid_vnr(p) returns 0 if "p" is not visible from the current's namespace. "> 1" ensures we don't kill the child reaper as well. No? Oleg. ^ permalink raw reply [flat|nested] 28+ messages in thread
[parent not found: <20080723143411.GA2905-6lXkIZvqkOAvJsYlp49lxw@public.gmane.org>]
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace 2008-07-23 14:34 ` Oleg Nesterov @ 2008-07-23 16:09 ` Daniel Hokka Zakrisson 0 siblings, 0 replies; 28+ messages in thread From: Daniel Hokka Zakrisson @ 2008-07-23 16:09 UTC (permalink / raw) To: Oleg Nesterov Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, xemul-GEFAQzZX7r8dnm+yROfE0A, linux-kernel-u79uwXL29TY76Z2rM5mHXA, akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, ebiederm-aS9lmoZGLiVWk0Htik3J/w Oleg Nesterov wrote: > On 07/17, Daniel Hokka Zakrisson wrote: >> >> +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) >> +{ >> + struct pid *pid = task_pid(tsk); >> + >> + if (!pid) >> + return 0; >> + >> + if (pid->level < ns->level) >> + return 0; >> + >> + if (pid->numbers[ns->level].ns != ns) >> + return 0; >> + >> + return 1; >> +} >> + >> static __init int pid_namespaces_init(void) >> { >> pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); >> diff --git a/kernel/signal.c b/kernel/signal.c >> index 6c0958e..93713a5 100644 >> --- a/kernel/signal.c >> +++ b/kernel/signal.c >> @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct >> siginfo *info, int pid) >> struct task_struct * p; >> >> for_each_process(p) { >> - if (p->pid > 1 && !same_thread_group(p, current)) { >> + if (p->pid > 1 && !same_thread_group(p, current) && >> + task_in_pid_ns(p, current->nsproxy->pid_ns)) { >> int err = group_send_sig_info(sig, info, p); >> ++count; >> if (err != -EPERM) > > Do we really need all these complications? Afaics, we can make > a simpler patch, > > --- kernel/signal.c > +++ kernel/signal.c > @@ -1136,7 +1136,7 @@ static int kill_something_info(int sig, > struct task_struct * p; > > for_each_process(p) { > - if (p->pid > 1 && !same_thread_group(p, current)) { > + if (task_pid_vnr(p) > 1 && !same_thread_group(p, current)) { > int err = group_send_sig_info(sig, info, p); > ++count; > if (err != -EPERM) > > > task_pid_vnr(p) returns 0 if "p" is not visible from the current's > namespace. "> 1" ensures we don't kill the child reaper as well. > > No? > > Oleg. You are absolutely right, that is sufficient and much cleaner. -- Daniel Hokka Zakrisson ^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace @ 2008-07-23 16:09 ` Daniel Hokka Zakrisson 0 siblings, 0 replies; 28+ messages in thread From: Daniel Hokka Zakrisson @ 2008-07-23 16:09 UTC (permalink / raw) To: Oleg Nesterov; +Cc: linux-kernel, containers, ebiederm, xemul, akpm Oleg Nesterov wrote: > On 07/17, Daniel Hokka Zakrisson wrote: >> >> +int task_in_pid_ns(struct task_struct *tsk, struct pid_namespace *ns) >> +{ >> + struct pid *pid = task_pid(tsk); >> + >> + if (!pid) >> + return 0; >> + >> + if (pid->level < ns->level) >> + return 0; >> + >> + if (pid->numbers[ns->level].ns != ns) >> + return 0; >> + >> + return 1; >> +} >> + >> static __init int pid_namespaces_init(void) >> { >> pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); >> diff --git a/kernel/signal.c b/kernel/signal.c >> index 6c0958e..93713a5 100644 >> --- a/kernel/signal.c >> +++ b/kernel/signal.c >> @@ -1145,7 +1145,8 @@ static int kill_something_info(int sig, struct >> siginfo *info, int pid) >> struct task_struct * p; >> >> for_each_process(p) { >> - if (p->pid > 1 && !same_thread_group(p, current)) { >> + if (p->pid > 1 && !same_thread_group(p, current) && >> + task_in_pid_ns(p, current->nsproxy->pid_ns)) { >> int err = group_send_sig_info(sig, info, p); >> ++count; >> if (err != -EPERM) > > Do we really need all these complications? Afaics, we can make > a simpler patch, > > --- kernel/signal.c > +++ kernel/signal.c > @@ -1136,7 +1136,7 @@ static int kill_something_info(int sig, > struct task_struct * p; > > for_each_process(p) { > - if (p->pid > 1 && !same_thread_group(p, current)) { > + if (task_pid_vnr(p) > 1 && !same_thread_group(p, current)) { > int err = group_send_sig_info(sig, info, p); > ++count; > if (err != -EPERM) > > > task_pid_vnr(p) returns 0 if "p" is not visible from the current's > namespace. "> 1" ensures we don't kill the child reaper as well. > > No? > > Oleg. You are absolutely right, that is sufficient and much cleaner. -- Daniel Hokka Zakrisson ^ permalink raw reply [flat|nested] 28+ messages in thread
end of thread, other threads:[~2008-07-23 16:10 UTC | newest]
Thread overview: 28+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-17 14:55 [PATCH 1/2] signals: kill(-1) should only signal processes in the same namespace Daniel Hokka Zakrisson
2008-07-17 14:55 ` Daniel Hokka Zakrisson
[not found] ` <487F5D6B.1090007-nym3zxDgnZcAvxtiuMwx3w@public.gmane.org>
2008-07-17 15:01 ` Pavel Emelyanov
2008-07-17 18:13 ` sukadev-r/Jw6+rmf7HQT0dZR+AlfA
2008-07-23 14:34 ` Oleg Nesterov
2008-07-17 15:01 ` Pavel Emelyanov
[not found] ` <487F5EDB.1000008-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2008-07-17 15:24 ` Daniel Hokka Zakrisson
2008-07-17 15:24 ` Daniel Hokka Zakrisson
2008-07-17 15:54 ` Pavel Emelyanov
[not found] ` <487F6B2E.40101-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2008-07-17 17:32 ` Oleg Nesterov
2008-07-17 17:32 ` Oleg Nesterov
[not found] ` <20080717173254.GA20948-6lXkIZvqkOAvJsYlp49lxw@public.gmane.org>
2008-07-17 17:50 ` Eric W. Biederman
2008-07-17 17:50 ` Eric W. Biederman
2008-07-17 15:54 ` Pavel Emelyanov
2008-07-17 17:45 ` Eric W. Biederman
2008-07-17 18:39 ` Daniel Hokka Zakrisson
2008-07-17 18:45 ` Eric W. Biederman
2008-07-17 18:45 ` Eric W. Biederman
[not found] ` <m13am8wgk6.fsf-B27657KtZYmhTnVgQlOflh2eb7JE58TQ@public.gmane.org>
2008-07-17 18:39 ` Daniel Hokka Zakrisson
2008-07-17 17:45 ` Eric W. Biederman
2008-07-17 18:13 ` sukadev
2008-07-17 18:44 ` Daniel Hokka Zakrisson
2008-07-17 18:46 ` Eric W. Biederman
[not found] ` <20080717181313.GA2289-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2008-07-17 18:44 ` Daniel Hokka Zakrisson
2008-07-17 18:46 ` Eric W. Biederman
2008-07-23 14:34 ` Oleg Nesterov
[not found] ` <20080723143411.GA2905-6lXkIZvqkOAvJsYlp49lxw@public.gmane.org>
2008-07-23 16:09 ` Daniel Hokka Zakrisson
2008-07-23 16:09 ` Daniel Hokka Zakrisson
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.