* Re: [PATCH RFC v2] pidns: introduce syscall getvpid
[not found] <20150924135332.27813.21640.stgit@buzz>
@ 2015-09-24 14:56 ` Oleg Nesterov
[not found] ` <20150924145647.GA24151-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
[not found] ` <CALYGNiPCVFNLi=1iL1gOf411iU6tT+DbVx1JuGG+f73Rpvx3dw@mail.gmail.com>
2015-09-25 8:36 ` Chen Fan
1 sibling, 2 replies; 5+ messages in thread
From: Oleg Nesterov @ 2015-09-24 14:56 UTC (permalink / raw)
To: Konstantin Khlebnikov
Cc: linux-api-u79uwXL29TY76Z2rM5mHXA,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
Serge Hallyn, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
Eric W. Biederman, Chen Fan, Andrew Morton, Linus Torvalds
On 09/24, Konstantin Khlebnikov wrote:
>
> +SYSCALL_DEFINE3(getvpid, pid_t, pid, int, source, int, target)
> +{
> + struct file *source_file = NULL, *target_file = NULL;
> + struct pid_namespace *source_ns, *target_ns;
> + struct pid *struct_pid;
> + struct ns_common *ns;
> + pid_t result;
> +
> + if (source >= 0) {
> + source_file = proc_ns_fget(source);
> + result = PTR_ERR(source_file);
> + if (IS_ERR(source_file))
> + goto out;
> + ns = get_proc_ns(file_inode(source_file));
> + result = -EINVAL;
> + if (ns->ops->type != CLONE_NEWPID)
> + goto out;
> + source_ns = container_of(ns, struct pid_namespace, ns);
> + } else
> + source_ns = task_active_pid_ns(current);
> +
> + if (target >= 0) {
> + target_file = proc_ns_fget(target);
> + result = PTR_ERR(target_file);
> + if (IS_ERR(target_file))
> + goto out;
> + ns = get_proc_ns(file_inode(target_file));
> + result = -EINVAL;
> + if (ns->ops->type != CLONE_NEWPID)
> + goto out;
> + target_ns = container_of(ns, struct pid_namespace, ns);
> + } else
> + target_ns = task_active_pid_ns(current);
> +
Hmm. Eric, Konstantin, how about (uncompiled/untested) patch below
in a preparation? The code above doesn't look very readable.
In fact I think another helper
#define proc_ns_xxx(file, type) \
container_of(get_proc_ns(file_inode(file)), \
struct type, ns)
makes sense too.
Oleg.
---
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 99521e7..0877dd6 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -118,9 +118,10 @@ int ns_get_name(char *buf, size_t size, struct task_struct *task,
return res;
}
-struct file *proc_ns_fget(int fd)
+struct file *proc_ns_fget(int fd, int nstype)
{
struct file *file;
+ struct ns_common *ns;
file = fget(fd);
if (!file)
@@ -129,6 +130,10 @@ struct file *proc_ns_fget(int fd)
if (file->f_op != &ns_file_operations)
goto out_invalid;
+ ns = get_proc_ns(file_inode(file));
+ if (nstype && (ns->ops->type != nstype))
+ goto out_invalid;
+
return file;
out_invalid:
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 42dfc61..84c9770 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -65,7 +65,7 @@ static inline int ns_alloc_inum(struct ns_common *ns)
#define ns_free_inum(ns) proc_free_inum((ns)->inum)
-extern struct file *proc_ns_fget(int fd);
+extern struct file *proc_ns_fget(int fd, int nstype);
#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
extern void *ns_get_path(struct path *path, struct task_struct *task,
const struct proc_ns_operations *ns_ops);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 49746c8..fee18ba 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -226,21 +226,17 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
struct ns_common *ns;
int err;
- file = proc_ns_fget(fd);
+ file = proc_ns_fget(fd, nstype);
if (IS_ERR(file))
return PTR_ERR(file);
- err = -EINVAL;
- ns = get_proc_ns(file_inode(file));
- if (nstype && (ns->ops->type != nstype))
- goto out;
-
new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
if (IS_ERR(new_nsproxy)) {
err = PTR_ERR(new_nsproxy);
goto out;
}
+ ns = get_proc_ns(file_inode(file));
err = ns->ops->install(new_nsproxy, ns);
if (err) {
free_nsproxy(new_nsproxy);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 572af00..9dfbe68 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -424,15 +424,12 @@ struct net *get_net_ns_by_fd(int fd)
struct ns_common *ns;
struct net *net;
- file = proc_ns_fget(fd);
+ file = proc_ns_fget(fd, CLONE_NEWNET);
if (IS_ERR(file))
return ERR_CAST(file);
ns = get_proc_ns(file_inode(file));
- if (ns->ops == &netns_operations)
- net = get_net(container_of(ns, struct net, ns));
- else
- net = ERR_PTR(-EINVAL);
+ net = get_net(container_of(ns, struct net, ns));
fput(file);
return net;
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH RFC v2] pidns: introduce syscall getvpid
[not found] <20150924135332.27813.21640.stgit@buzz>
2015-09-24 14:56 ` [PATCH RFC v2] pidns: introduce syscall getvpid Oleg Nesterov
@ 2015-09-25 8:36 ` Chen Fan
1 sibling, 0 replies; 5+ messages in thread
From: Chen Fan @ 2015-09-25 8:36 UTC (permalink / raw)
To: Konstantin Khlebnikov, linux-api-u79uwXL29TY76Z2rM5mHXA,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
Cc: Serge Hallyn, Oleg Nesterov, Eric W. Biederman, Andrew Morton,
Linus Torvalds
On 09/24/2015 09:53 PM, Konstantin Khlebnikov wrote:
> pid_t getvpid(pid_t pid, int source, int target);
>
> This syscall converts pid from source pid-namespace into pid in target
> pid-namespace. Namespaces are defined by file descriptors pointing to
> namespace entries in proc (/proc/[pid]/ns/pid). If source / target is
> negative then current pid namespace is used.
>
> If pid is negative then getvpid() returns pid of parent task for -pid.
>
> If pid is unreachable from target namespace then syscall returns zero.
>
> Errors:
> ESRCH task not found
> EBADF closed file descriptor
> EINVAL not pid-namespace file descriptor
>
> Examples:
> getvpid(pid, ns, -1) -> pid in our pid namespace
> getvpid(pid, -1, ns) -> pid in container
> getvpid(1, ns1, ns2) > 0 -> ns1 inside ns2
> getvpid(1, ns1, ns2) == 0 -> ns1 outside ns2
> getvpid(1, ns, -1) -> init task of pid-namespace
> getvpid(-1, ns, -1) -> task in parent pid-namespace
> getvpid(-pid, -1, -1) -> get ppid by pid
>
> Signed-off-by: Konstantin Khlebnikov <khlebnikov-XoJtRXgx1JseBXzfvpsJ4g@public.gmane.org>
>
> ---
>
> v2:
> * use namespace-fd as second/third argument
> * add -pid for getting parent pid
> * move code into kernel/sys.c next to getppid
> * drop ifdef CONFIG_PID_NS
> * add generic syscall
> ---
> arch/x86/entry/syscalls/syscall_32.tbl | 1 +
> arch/x86/entry/syscalls/syscall_64.tbl | 1 +
> include/linux/syscalls.h | 1 +
> include/uapi/asm-generic/unistd.h | 4 ++
> kernel/sys.c | 63 ++++++++++++++++++++++++++++++++
> 5 files changed, 69 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
> index 7663c455b9f6..dadb55d42fc9 100644
> --- a/arch/x86/entry/syscalls/syscall_32.tbl
> +++ b/arch/x86/entry/syscalls/syscall_32.tbl
> @@ -382,3 +382,4 @@
> 373 i386 shutdown sys_shutdown
> 374 i386 userfaultfd sys_userfaultfd
> 375 i386 membarrier sys_membarrier
> +376 i386 getvpid sys_getvpid
> diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
> index 278842fdf1f6..0338f2eb3b7c 100644
> --- a/arch/x86/entry/syscalls/syscall_64.tbl
> +++ b/arch/x86/entry/syscalls/syscall_64.tbl
> @@ -331,6 +331,7 @@
> 322 64 execveat stub_execveat
> 323 common userfaultfd sys_userfaultfd
> 324 common membarrier sys_membarrier
> +325 common getvpid sys_getvpid
>
> #
> # x32-specific system call numbers start at 512 to avoid cache impact
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index a460e2ef2843..01ac603c8b5c 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -222,6 +222,7 @@ asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __us
> asmlinkage long sys_alarm(unsigned int seconds);
> asmlinkage long sys_getpid(void);
> asmlinkage long sys_getppid(void);
> +asmlinkage long sys_getvpid(pid_t pid, int source, int target);
> asmlinkage long sys_getuid(void);
> asmlinkage long sys_geteuid(void);
> asmlinkage long sys_getgid(void);
> diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
> index 8da542a2874d..163df44b23cf 100644
> --- a/include/uapi/asm-generic/unistd.h
> +++ b/include/uapi/asm-generic/unistd.h
> @@ -711,9 +711,11 @@ __SYSCALL(__NR_bpf, sys_bpf)
> __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
> #define __NR_membarrier 282
> __SYSCALL(__NR_membarrier, sys_membarrier)
> +#define __NR_getvpid 283
> +__SYSCALL(__NR_getvpid, sys_getvpid)
>
> #undef __NR_syscalls
> -#define __NR_syscalls 283
> +#define __NR_syscalls 284
>
> /*
> * All syscalls below here should go away really,
> diff --git a/kernel/sys.c b/kernel/sys.c
> index fa2f2f671a5c..fbfe938dd9d7 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -46,6 +46,7 @@
> #include <linux/syscalls.h>
> #include <linux/kprobes.h>
> #include <linux/user_namespace.h>
> +#include <linux/proc_ns.h>
> #include <linux/binfmts.h>
>
> #include <linux/sched.h>
> @@ -855,6 +856,68 @@ SYSCALL_DEFINE0(getppid)
> return pid;
> }
>
> +SYSCALL_DEFINE3(getvpid, pid_t, pid, int, source, int, target)
> +{
> + struct file *source_file = NULL, *target_file = NULL;
> + struct pid_namespace *source_ns, *target_ns;
> + struct pid *struct_pid;
> + struct ns_common *ns;
> + pid_t result;
> +
> + if (source >= 0) {
> + source_file = proc_ns_fget(source);
> + result = PTR_ERR(source_file);
> + if (IS_ERR(source_file))
> + goto out;
> + ns = get_proc_ns(file_inode(source_file));
> + result = -EINVAL;
> + if (ns->ops->type != CLONE_NEWPID)
> + goto out;
> + source_ns = container_of(ns, struct pid_namespace, ns);
> + } else
> + source_ns = task_active_pid_ns(current);
> +
> + if (target >= 0) {
> + target_file = proc_ns_fget(target);
> + result = PTR_ERR(target_file);
> + if (IS_ERR(target_file))
> + goto out;
> + ns = get_proc_ns(file_inode(target_file));
> + result = -EINVAL;
> + if (ns->ops->type != CLONE_NEWPID)
> + goto out;
> + target_ns = container_of(ns, struct pid_namespace, ns);
> + } else
> + target_ns = task_active_pid_ns(current);
> +
the source ns and target ns translation looks the same, why not extract
a new method to do the job.
Thanks,
Chen
> + rcu_read_lock();
> + struct_pid = find_pid_ns(abs(pid), source_ns);
> +
> + if (struct_pid && pid < 0) {
> + struct task_struct *task;
> +
> + task = pid_task(struct_pid, PIDTYPE_PID);
> + if (task)
> + task = rcu_dereference(task->real_parent);
> + struct_pid = task ? task_pid(task) : NULL;
> + }
> +
> + if (struct_pid)
> + result = pid_nr_ns(struct_pid, target_ns);
> + else
> + result = -ESRCH;
> + rcu_read_unlock();
> +
> +out:
> + if (!IS_ERR_OR_NULL(target_file))
> + fput(target_file);
> +
> + if (!IS_ERR_OR_NULL(source_file))
> + fput(source_file);
> +
> + return result;
> +}
> +
> SYSCALL_DEFINE0(getuid)
> {
> /* Only we change this so SMP safe */
>
> .
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH RFC v2] pidns: introduce syscall getvpid
@ 2015-09-24 13:53 Konstantin Khlebnikov
0 siblings, 0 replies; 5+ messages in thread
From: Konstantin Khlebnikov @ 2015-09-24 13:53 UTC (permalink / raw)
To: linux-api-u79uwXL29TY76Z2rM5mHXA,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
Cc: Serge Hallyn, Oleg Nesterov, Eric W. Biederman, Chen Fan,
Andrew Morton, Linus Torvalds
pid_t getvpid(pid_t pid, int source, int target);
This syscall converts pid from source pid-namespace into pid in target
pid-namespace. Namespaces are defined by file descriptors pointing to
namespace entries in proc (/proc/[pid]/ns/pid). If source / target is
negative then current pid namespace is used.
If pid is negative then getvpid() returns pid of parent task for -pid.
If pid is unreachable from target namespace then syscall returns zero.
Errors:
ESRCH task not found
EBADF closed file descriptor
EINVAL not pid-namespace file descriptor
Examples:
getvpid(pid, ns, -1) -> pid in our pid namespace
getvpid(pid, -1, ns) -> pid in container
getvpid(1, ns1, ns2) > 0 -> ns1 inside ns2
getvpid(1, ns1, ns2) == 0 -> ns1 outside ns2
getvpid(1, ns, -1) -> init task of pid-namespace
getvpid(-1, ns, -1) -> task in parent pid-namespace
getvpid(-pid, -1, -1) -> get ppid by pid
Signed-off-by: Konstantin Khlebnikov <khlebnikov-XoJtRXgx1JseBXzfvpsJ4g@public.gmane.org>
---
v2:
* use namespace-fd as second/third argument
* add -pid for getting parent pid
* move code into kernel/sys.c next to getppid
* drop ifdef CONFIG_PID_NS
* add generic syscall
---
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
include/linux/syscalls.h | 1 +
include/uapi/asm-generic/unistd.h | 4 ++
kernel/sys.c | 63 ++++++++++++++++++++++++++++++++
5 files changed, 69 insertions(+), 1 deletion(-)
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 7663c455b9f6..dadb55d42fc9 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -382,3 +382,4 @@
373 i386 shutdown sys_shutdown
374 i386 userfaultfd sys_userfaultfd
375 i386 membarrier sys_membarrier
+376 i386 getvpid sys_getvpid
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 278842fdf1f6..0338f2eb3b7c 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -331,6 +331,7 @@
322 64 execveat stub_execveat
323 common userfaultfd sys_userfaultfd
324 common membarrier sys_membarrier
+325 common getvpid sys_getvpid
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a460e2ef2843..01ac603c8b5c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -222,6 +222,7 @@ asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __us
asmlinkage long sys_alarm(unsigned int seconds);
asmlinkage long sys_getpid(void);
asmlinkage long sys_getppid(void);
+asmlinkage long sys_getvpid(pid_t pid, int source, int target);
asmlinkage long sys_getuid(void);
asmlinkage long sys_geteuid(void);
asmlinkage long sys_getgid(void);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 8da542a2874d..163df44b23cf 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -711,9 +711,11 @@ __SYSCALL(__NR_bpf, sys_bpf)
__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
#define __NR_membarrier 282
__SYSCALL(__NR_membarrier, sys_membarrier)
+#define __NR_getvpid 283
+__SYSCALL(__NR_getvpid, sys_getvpid)
#undef __NR_syscalls
-#define __NR_syscalls 283
+#define __NR_syscalls 284
/*
* All syscalls below here should go away really,
diff --git a/kernel/sys.c b/kernel/sys.c
index fa2f2f671a5c..fbfe938dd9d7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -46,6 +46,7 @@
#include <linux/syscalls.h>
#include <linux/kprobes.h>
#include <linux/user_namespace.h>
+#include <linux/proc_ns.h>
#include <linux/binfmts.h>
#include <linux/sched.h>
@@ -855,6 +856,68 @@ SYSCALL_DEFINE0(getppid)
return pid;
}
+SYSCALL_DEFINE3(getvpid, pid_t, pid, int, source, int, target)
+{
+ struct file *source_file = NULL, *target_file = NULL;
+ struct pid_namespace *source_ns, *target_ns;
+ struct pid *struct_pid;
+ struct ns_common *ns;
+ pid_t result;
+
+ if (source >= 0) {
+ source_file = proc_ns_fget(source);
+ result = PTR_ERR(source_file);
+ if (IS_ERR(source_file))
+ goto out;
+ ns = get_proc_ns(file_inode(source_file));
+ result = -EINVAL;
+ if (ns->ops->type != CLONE_NEWPID)
+ goto out;
+ source_ns = container_of(ns, struct pid_namespace, ns);
+ } else
+ source_ns = task_active_pid_ns(current);
+
+ if (target >= 0) {
+ target_file = proc_ns_fget(target);
+ result = PTR_ERR(target_file);
+ if (IS_ERR(target_file))
+ goto out;
+ ns = get_proc_ns(file_inode(target_file));
+ result = -EINVAL;
+ if (ns->ops->type != CLONE_NEWPID)
+ goto out;
+ target_ns = container_of(ns, struct pid_namespace, ns);
+ } else
+ target_ns = task_active_pid_ns(current);
+
+ rcu_read_lock();
+ struct_pid = find_pid_ns(abs(pid), source_ns);
+
+ if (struct_pid && pid < 0) {
+ struct task_struct *task;
+
+ task = pid_task(struct_pid, PIDTYPE_PID);
+ if (task)
+ task = rcu_dereference(task->real_parent);
+ struct_pid = task ? task_pid(task) : NULL;
+ }
+
+ if (struct_pid)
+ result = pid_nr_ns(struct_pid, target_ns);
+ else
+ result = -ESRCH;
+ rcu_read_unlock();
+
+out:
+ if (!IS_ERR_OR_NULL(target_file))
+ fput(target_file);
+
+ if (!IS_ERR_OR_NULL(source_file))
+ fput(source_file);
+
+ return result;
+}
+
SYSCALL_DEFINE0(getuid)
{
/* Only we change this so SMP safe */
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2015-09-25 8:36 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20150924135332.27813.21640.stgit@buzz>
2015-09-24 14:56 ` [PATCH RFC v2] pidns: introduce syscall getvpid Oleg Nesterov
[not found] ` <20150924145647.GA24151-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-09-24 15:47 ` Konstantin Khlebnikov
[not found] ` <CALYGNiPCVFNLi=1iL1gOf411iU6tT+DbVx1JuGG+f73Rpvx3dw@mail.gmail.com>
[not found] ` <CALYGNiPCVFNLi=1iL1gOf411iU6tT+DbVx1JuGG+f73Rpvx3dw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-09-24 17:34 ` Oleg Nesterov
2015-09-25 8:36 ` Chen Fan
2015-09-24 13:53 Konstantin Khlebnikov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox