All of lore.kernel.org
 help / color / mirror / Atom feed
From: Serge Hallyn <serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
To: Daniel Lezcano <daniel.lezcano-GANU6spQydw@public.gmane.org>
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org,
	oleg-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org,
	akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org
Subject: Re: [PATCH 1/1][V5] Add reboot_pid_ns to handle the reboot syscall
Date: Thu, 5 Jan 2012 13:29:54 -0600	[thread overview]
Message-ID: <20120105192954.GA16083@sergelap> (raw)
In-Reply-To: <1325754410-32600-2-git-send-email-daniel.lezcano-GANU6spQydw@public.gmane.org>

Quoting Daniel Lezcano (daniel.lezcano-GANU6spQydw@public.gmane.org):
> In the case of a child pid namespace, rebooting the system does not
> really makes sense. When the pid namespace is used in conjunction
> with the other namespaces in order to create a linux container, the
> reboot syscall leads to some problems.
> 
> A container can reboot the host. That can be fixed by dropping
> the sys_reboot capability but we are unable to correctly to poweroff/
> halt/reboot a container and the container stays stuck at the shutdown
> time with the container's init process waiting indefinitively.
> 
> After several attempts, no solution from userspace was found to reliabily
> handle the shutdown from a container.
> 
> This patch propose to make the init process of the child pid namespace to
> exit with a signal status set to : SIGINT if the child pid namespace called
> "halt/poweroff" and SIGHUP if the child pid namespace called "reboot".
> When the reboot syscall is called and we are not in the initial
> pid namespace, we kill the pid namespace for "HALT", "POWEROFF", "RESTART",
> and "RESTART2". Otherwise we return EINVAL.
> 
> Returning EINVAL is also an easy way to check if this feature is supported
> by the kernel when invoking another 'reboot' option like CAD.
> 
> By this way the parent process of the child pid namespace knows if
> it rebooted or not and can take the right decision.
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano-GANU6spQydw@public.gmane.org>
> Acked-by: Serge Hallyn <serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
> Reviewed-by: Oleg Nesterov <oleg-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

The testcase in [PATCH 0/1] passed for me, and reboot from init_pid_ns
works as usual.

Tested-by: Serge Hallyn <serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>

thanks,
-serge

> ---
>  include/linux/pid_namespace.h |    8 +++++++-
>  kernel/pid_namespace.c        |   33 +++++++++++++++++++++++++++++++++
>  kernel/sys.c                  |    8 ++++++++
>  3 files changed, 48 insertions(+), 1 deletions(-)
> 
> diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
> index e7cf666..b90c798 100644
> --- a/include/linux/pid_namespace.h
> +++ b/include/linux/pid_namespace.h
> @@ -32,6 +32,7 @@ struct pid_namespace {
>  #endif
>  	gid_t pid_gid;
>  	int hide_pid;
> +	int reboot;
>  };
>  
>  extern struct pid_namespace init_pid_ns;
> @@ -47,6 +48,7 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
>  extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
>  extern void free_pid_ns(struct kref *kref);
>  extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
> +extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
>  
>  static inline void put_pid_ns(struct pid_namespace *ns)
>  {
> @@ -74,11 +76,15 @@ static inline void put_pid_ns(struct pid_namespace *ns)
>  {
>  }
>  
> -
>  static inline void zap_pid_ns_processes(struct pid_namespace *ns)
>  {
>  	BUG();
>  }
> +
> +static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
> +{
> +	return 0;
> +}
>  #endif /* CONFIG_PID_NS */
>  
>  extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
> index a896839..0d355e8 100644
> --- a/kernel/pid_namespace.c
> +++ b/kernel/pid_namespace.c
> @@ -15,6 +15,7 @@
>  #include <linux/acct.h>
>  #include <linux/slab.h>
>  #include <linux/proc_fs.h>
> +#include <linux/reboot.h>
>  
>  #define BITS_PER_PAGE		(PAGE_SIZE*8)
>  
> @@ -187,6 +188,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
>  		rc = sys_wait4(-1, NULL, __WALL, NULL);
>  	} while (rc != -ECHILD);
>  
> +	if (pid_ns->reboot)
> +		current->signal->group_exit_code = pid_ns->reboot;
> +
>  	acct_exit_ns(pid_ns);
>  	return;
>  }
> @@ -221,6 +225,35 @@ static struct ctl_table pid_ns_ctl_table[] = {
>  
>  static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
>  
> +int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
> +{
> +	if (pid_ns == &init_pid_ns)
> +		return 0;
> +
> +	switch(cmd) {
> +	case LINUX_REBOOT_CMD_RESTART2:
> +	case LINUX_REBOOT_CMD_RESTART:
> +		pid_ns->reboot = SIGHUP;
> +		break;
> +
> +	case LINUX_REBOOT_CMD_POWER_OFF:
> +	case LINUX_REBOOT_CMD_HALT:
> +		pid_ns->reboot = SIGINT;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	read_lock(&tasklist_lock);
> +	force_sig(SIGKILL, pid_ns->child_reaper);
> +	read_unlock(&tasklist_lock);
> +
> +	do_exit(0);
> +
> +	/* Not reached */
> +	return 0;
> +}
> +
>  static __init int pid_namespaces_init(void)
>  {
>  	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
> diff --git a/kernel/sys.c b/kernel/sys.c
> index 4070153..bd924fa 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -444,6 +444,14 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
>  	                magic2 != LINUX_REBOOT_MAGIC2C))
>  		return -EINVAL;
>  
> +	/* In case the pid namespaces are enabled, the current task is in a
> +	 * child pid_namespace and the command is handled by 'reboot_pid_ns',
> +	 * this one will invoke 'do_exit'.
> +	 */
> +	ret = reboot_pid_ns(task_active_pid_ns(current), cmd);
> +	if (ret)
> +		return ret;
> +
>  	/* Instead of trying to make the power_off code look like
>  	 * halt when pm_power_off is not set do it the easy way.
>  	 */
> -- 
> 1.7.5.4
> 

WARNING: multiple messages have this Message-ID (diff)
From: Serge Hallyn <serge.hallyn@canonical.com>
To: Daniel Lezcano <daniel.lezcano@free.fr>
Cc: akpm@linux-foundation.org, oleg@redhat.com,
	containers@lists.linux-foundation.org, gkurz@fr.ibm.com,
	linux-kernel@vger.kernel.org, mtk.manpages@gmail.com
Subject: Re: [PATCH 1/1][V5] Add reboot_pid_ns to handle the reboot syscall
Date: Thu, 5 Jan 2012 13:29:54 -0600	[thread overview]
Message-ID: <20120105192954.GA16083@sergelap> (raw)
In-Reply-To: <1325754410-32600-2-git-send-email-daniel.lezcano@free.fr>

Quoting Daniel Lezcano (daniel.lezcano@free.fr):
> In the case of a child pid namespace, rebooting the system does not
> really makes sense. When the pid namespace is used in conjunction
> with the other namespaces in order to create a linux container, the
> reboot syscall leads to some problems.
> 
> A container can reboot the host. That can be fixed by dropping
> the sys_reboot capability but we are unable to correctly to poweroff/
> halt/reboot a container and the container stays stuck at the shutdown
> time with the container's init process waiting indefinitively.
> 
> After several attempts, no solution from userspace was found to reliabily
> handle the shutdown from a container.
> 
> This patch propose to make the init process of the child pid namespace to
> exit with a signal status set to : SIGINT if the child pid namespace called
> "halt/poweroff" and SIGHUP if the child pid namespace called "reboot".
> When the reboot syscall is called and we are not in the initial
> pid namespace, we kill the pid namespace for "HALT", "POWEROFF", "RESTART",
> and "RESTART2". Otherwise we return EINVAL.
> 
> Returning EINVAL is also an easy way to check if this feature is supported
> by the kernel when invoking another 'reboot' option like CAD.
> 
> By this way the parent process of the child pid namespace knows if
> it rebooted or not and can take the right decision.
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr>
> Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
> Reviewed-by: Oleg Nesterov <oleg@redhat.com>

The testcase in [PATCH 0/1] passed for me, and reboot from init_pid_ns
works as usual.

Tested-by: Serge Hallyn <serge.hallyn@canonical.com>

thanks,
-serge

> ---
>  include/linux/pid_namespace.h |    8 +++++++-
>  kernel/pid_namespace.c        |   33 +++++++++++++++++++++++++++++++++
>  kernel/sys.c                  |    8 ++++++++
>  3 files changed, 48 insertions(+), 1 deletions(-)
> 
> diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
> index e7cf666..b90c798 100644
> --- a/include/linux/pid_namespace.h
> +++ b/include/linux/pid_namespace.h
> @@ -32,6 +32,7 @@ struct pid_namespace {
>  #endif
>  	gid_t pid_gid;
>  	int hide_pid;
> +	int reboot;
>  };
>  
>  extern struct pid_namespace init_pid_ns;
> @@ -47,6 +48,7 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
>  extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
>  extern void free_pid_ns(struct kref *kref);
>  extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
> +extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
>  
>  static inline void put_pid_ns(struct pid_namespace *ns)
>  {
> @@ -74,11 +76,15 @@ static inline void put_pid_ns(struct pid_namespace *ns)
>  {
>  }
>  
> -
>  static inline void zap_pid_ns_processes(struct pid_namespace *ns)
>  {
>  	BUG();
>  }
> +
> +static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
> +{
> +	return 0;
> +}
>  #endif /* CONFIG_PID_NS */
>  
>  extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
> index a896839..0d355e8 100644
> --- a/kernel/pid_namespace.c
> +++ b/kernel/pid_namespace.c
> @@ -15,6 +15,7 @@
>  #include <linux/acct.h>
>  #include <linux/slab.h>
>  #include <linux/proc_fs.h>
> +#include <linux/reboot.h>
>  
>  #define BITS_PER_PAGE		(PAGE_SIZE*8)
>  
> @@ -187,6 +188,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
>  		rc = sys_wait4(-1, NULL, __WALL, NULL);
>  	} while (rc != -ECHILD);
>  
> +	if (pid_ns->reboot)
> +		current->signal->group_exit_code = pid_ns->reboot;
> +
>  	acct_exit_ns(pid_ns);
>  	return;
>  }
> @@ -221,6 +225,35 @@ static struct ctl_table pid_ns_ctl_table[] = {
>  
>  static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
>  
> +int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
> +{
> +	if (pid_ns == &init_pid_ns)
> +		return 0;
> +
> +	switch(cmd) {
> +	case LINUX_REBOOT_CMD_RESTART2:
> +	case LINUX_REBOOT_CMD_RESTART:
> +		pid_ns->reboot = SIGHUP;
> +		break;
> +
> +	case LINUX_REBOOT_CMD_POWER_OFF:
> +	case LINUX_REBOOT_CMD_HALT:
> +		pid_ns->reboot = SIGINT;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	read_lock(&tasklist_lock);
> +	force_sig(SIGKILL, pid_ns->child_reaper);
> +	read_unlock(&tasklist_lock);
> +
> +	do_exit(0);
> +
> +	/* Not reached */
> +	return 0;
> +}
> +
>  static __init int pid_namespaces_init(void)
>  {
>  	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
> diff --git a/kernel/sys.c b/kernel/sys.c
> index 4070153..bd924fa 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -444,6 +444,14 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
>  	                magic2 != LINUX_REBOOT_MAGIC2C))
>  		return -EINVAL;
>  
> +	/* In case the pid namespaces are enabled, the current task is in a
> +	 * child pid_namespace and the command is handled by 'reboot_pid_ns',
> +	 * this one will invoke 'do_exit'.
> +	 */
> +	ret = reboot_pid_ns(task_active_pid_ns(current), cmd);
> +	if (ret)
> +		return ret;
> +
>  	/* Instead of trying to make the power_off code look like
>  	 * halt when pm_power_off is not set do it the easy way.
>  	 */
> -- 
> 1.7.5.4
> 

  parent reply	other threads:[~2012-01-05 19:29 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-01-05  9:06 [PATCH 0/1][V5] Handle reboot in a child pid namespace Daniel Lezcano
2012-01-05  9:06 ` Daniel Lezcano
     [not found] ` <1325754410-32600-1-git-send-email-daniel.lezcano-GANU6spQydw@public.gmane.org>
2012-01-05  9:06   ` [PATCH 1/1][V5] Add reboot_pid_ns to handle the reboot syscall Daniel Lezcano
2012-01-05  9:06     ` Daniel Lezcano
     [not found]     ` <1325754410-32600-2-git-send-email-daniel.lezcano-GANU6spQydw@public.gmane.org>
2012-01-05 19:29       ` Serge Hallyn [this message]
2012-01-05 19:29         ` Serge Hallyn
2012-01-11 10:23         ` Serge Hallyn
2012-01-11 10:23           ` Serge Hallyn
2012-01-11 10:45           ` Andrew Morton
2012-01-11 10:45             ` Andrew Morton
     [not found]             ` <20120111024530.7b3607e7.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
2012-01-11 10:49               ` Serge E. Hallyn
2012-01-11 10:49                 ` Serge E. Hallyn
2012-02-03  0:10       ` Andrew Morton
2012-02-03  0:10         ` Andrew Morton
     [not found]         ` <20120202161018.e3c62965.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
2012-02-03  8:59           ` Daniel Lezcano
2012-02-03  8:59             ` Daniel Lezcano
     [not found]             ` <4F2BA1EA.7060901-GANU6spQydw@public.gmane.org>
2012-02-03 15:47               ` Serge Hallyn
2012-02-03 15:47                 ` Serge Hallyn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120105192954.GA16083@sergelap \
    --to=serge.hallyn-z7wlfzj8ewms+fvcfc7uqw@public.gmane.org \
    --cc=akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org \
    --cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
    --cc=daniel.lezcano-GANU6spQydw@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org \
    --cc=oleg-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.