* [PATCH RFC v2] pidns: introduce syscall getvpid
@ 2015-09-24 13:53 Konstantin Khlebnikov
2015-09-24 14:56 ` Oleg Nesterov
2015-09-25 8:36 ` Chen Fan
0 siblings, 2 replies; 5+ messages in thread
From: Konstantin Khlebnikov @ 2015-09-24 13:53 UTC (permalink / raw)
To: linux-api-u79uwXL29TY76Z2rM5mHXA,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
Cc: Serge Hallyn, Oleg Nesterov, Eric W. Biederman, Chen Fan,
Andrew Morton, Linus Torvalds, Stéphane Graber
pid_t getvpid(pid_t pid, int source, int target);
This syscall converts pid from source pid-namespace into pid in target
pid-namespace. Namespaces are defined by file descriptors pointing to
namespace entries in proc (/proc/[pid]/ns/pid). If source / target is
negative then current pid namespace is used.
If pid is negative then getvpid() returns pid of parent task for -pid.
If pid is unreachable from target namespace then syscall returns zero.
Errors:
ESRCH task not found
EBADF closed file descriptor
EINVAL not pid-namespace file descriptor
Examples:
getvpid(pid, ns, -1) -> pid in our pid namespace
getvpid(pid, -1, ns) -> pid in container
getvpid(1, ns1, ns2) > 0 -> ns1 inside ns2
getvpid(1, ns1, ns2) == 0 -> ns1 outside ns2
getvpid(1, ns, -1) -> init task of pid-namespace
getvpid(-1, ns, -1) -> task in parent pid-namespace
getvpid(-pid, -1, -1) -> get ppid by pid
Signed-off-by: Konstantin Khlebnikov <khlebnikov-XoJtRXgx1JseBXzfvpsJ4g@public.gmane.org>
---
v2:
* use namespace-fd as second/third argument
* add -pid for getting parent pid
* move code into kernel/sys.c next to getppid
* drop ifdef CONFIG_PID_NS
* add generic syscall
---
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
include/linux/syscalls.h | 1 +
include/uapi/asm-generic/unistd.h | 4 ++
kernel/sys.c | 63 ++++++++++++++++++++++++++++++++
5 files changed, 69 insertions(+), 1 deletion(-)
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 7663c455b9f6..dadb55d42fc9 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -382,3 +382,4 @@
373 i386 shutdown sys_shutdown
374 i386 userfaultfd sys_userfaultfd
375 i386 membarrier sys_membarrier
+376 i386 getvpid sys_getvpid
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 278842fdf1f6..0338f2eb3b7c 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -331,6 +331,7 @@
322 64 execveat stub_execveat
323 common userfaultfd sys_userfaultfd
324 common membarrier sys_membarrier
+325 common getvpid sys_getvpid
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a460e2ef2843..01ac603c8b5c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -222,6 +222,7 @@ asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __us
asmlinkage long sys_alarm(unsigned int seconds);
asmlinkage long sys_getpid(void);
asmlinkage long sys_getppid(void);
+asmlinkage long sys_getvpid(pid_t pid, int source, int target);
asmlinkage long sys_getuid(void);
asmlinkage long sys_geteuid(void);
asmlinkage long sys_getgid(void);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 8da542a2874d..163df44b23cf 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -711,9 +711,11 @@ __SYSCALL(__NR_bpf, sys_bpf)
__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
#define __NR_membarrier 282
__SYSCALL(__NR_membarrier, sys_membarrier)
+#define __NR_getvpid 283
+__SYSCALL(__NR_getvpid, sys_getvpid)
#undef __NR_syscalls
-#define __NR_syscalls 283
+#define __NR_syscalls 284
/*
* All syscalls below here should go away really,
diff --git a/kernel/sys.c b/kernel/sys.c
index fa2f2f671a5c..fbfe938dd9d7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -46,6 +46,7 @@
#include <linux/syscalls.h>
#include <linux/kprobes.h>
#include <linux/user_namespace.h>
+#include <linux/proc_ns.h>
#include <linux/binfmts.h>
#include <linux/sched.h>
@@ -855,6 +856,68 @@ SYSCALL_DEFINE0(getppid)
return pid;
}
+SYSCALL_DEFINE3(getvpid, pid_t, pid, int, source, int, target)
+{
+ struct file *source_file = NULL, *target_file = NULL;
+ struct pid_namespace *source_ns, *target_ns;
+ struct pid *struct_pid;
+ struct ns_common *ns;
+ pid_t result;
+
+ if (source >= 0) {
+ source_file = proc_ns_fget(source);
+ result = PTR_ERR(source_file);
+ if (IS_ERR(source_file))
+ goto out;
+ ns = get_proc_ns(file_inode(source_file));
+ result = -EINVAL;
+ if (ns->ops->type != CLONE_NEWPID)
+ goto out;
+ source_ns = container_of(ns, struct pid_namespace, ns);
+ } else
+ source_ns = task_active_pid_ns(current);
+
+ if (target >= 0) {
+ target_file = proc_ns_fget(target);
+ result = PTR_ERR(target_file);
+ if (IS_ERR(target_file))
+ goto out;
+ ns = get_proc_ns(file_inode(target_file));
+ result = -EINVAL;
+ if (ns->ops->type != CLONE_NEWPID)
+ goto out;
+ target_ns = container_of(ns, struct pid_namespace, ns);
+ } else
+ target_ns = task_active_pid_ns(current);
+
+ rcu_read_lock();
+ struct_pid = find_pid_ns(abs(pid), source_ns);
+
+ if (struct_pid && pid < 0) {
+ struct task_struct *task;
+
+ task = pid_task(struct_pid, PIDTYPE_PID);
+ if (task)
+ task = rcu_dereference(task->real_parent);
+ struct_pid = task ? task_pid(task) : NULL;
+ }
+
+ if (struct_pid)
+ result = pid_nr_ns(struct_pid, target_ns);
+ else
+ result = -ESRCH;
+ rcu_read_unlock();
+
+out:
+ if (!IS_ERR_OR_NULL(target_file))
+ fput(target_file);
+
+ if (!IS_ERR_OR_NULL(source_file))
+ fput(source_file);
+
+ return result;
+}
+
SYSCALL_DEFINE0(getuid)
{
/* Only we change this so SMP safe */
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH RFC v2] pidns: introduce syscall getvpid
2015-09-24 13:53 [PATCH RFC v2] pidns: introduce syscall getvpid Konstantin Khlebnikov
@ 2015-09-24 14:56 ` Oleg Nesterov
[not found] ` <20150924145647.GA24151-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-09-25 8:36 ` Chen Fan
1 sibling, 1 reply; 5+ messages in thread
From: Oleg Nesterov @ 2015-09-24 14:56 UTC (permalink / raw)
To: Konstantin Khlebnikov
Cc: linux-api-u79uwXL29TY76Z2rM5mHXA,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
Serge Hallyn, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
Eric W. Biederman, Chen Fan, Andrew Morton, Linus Torvalds
On 09/24, Konstantin Khlebnikov wrote:
>
> +SYSCALL_DEFINE3(getvpid, pid_t, pid, int, source, int, target)
> +{
> + struct file *source_file = NULL, *target_file = NULL;
> + struct pid_namespace *source_ns, *target_ns;
> + struct pid *struct_pid;
> + struct ns_common *ns;
> + pid_t result;
> +
> + if (source >= 0) {
> + source_file = proc_ns_fget(source);
> + result = PTR_ERR(source_file);
> + if (IS_ERR(source_file))
> + goto out;
> + ns = get_proc_ns(file_inode(source_file));
> + result = -EINVAL;
> + if (ns->ops->type != CLONE_NEWPID)
> + goto out;
> + source_ns = container_of(ns, struct pid_namespace, ns);
> + } else
> + source_ns = task_active_pid_ns(current);
> +
> + if (target >= 0) {
> + target_file = proc_ns_fget(target);
> + result = PTR_ERR(target_file);
> + if (IS_ERR(target_file))
> + goto out;
> + ns = get_proc_ns(file_inode(target_file));
> + result = -EINVAL;
> + if (ns->ops->type != CLONE_NEWPID)
> + goto out;
> + target_ns = container_of(ns, struct pid_namespace, ns);
> + } else
> + target_ns = task_active_pid_ns(current);
> +
Hmm. Eric, Konstantin, how about (uncompiled/untested) patch below
in a preparation? The code above doesn't look very readable.
In fact I think another helper
#define proc_ns_xxx(file, type) \
container_of(get_proc_ns(file_inode(file)), \
struct type, ns)
makes sense too.
Oleg.
---
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 99521e7..0877dd6 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -118,9 +118,10 @@ int ns_get_name(char *buf, size_t size, struct task_struct *task,
return res;
}
-struct file *proc_ns_fget(int fd)
+struct file *proc_ns_fget(int fd, int nstype)
{
struct file *file;
+ struct ns_common *ns;
file = fget(fd);
if (!file)
@@ -129,6 +130,10 @@ struct file *proc_ns_fget(int fd)
if (file->f_op != &ns_file_operations)
goto out_invalid;
+ ns = get_proc_ns(file_inode(file));
+ if (nstype && (ns->ops->type != nstype))
+ goto out_invalid;
+
return file;
out_invalid:
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 42dfc61..84c9770 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -65,7 +65,7 @@ static inline int ns_alloc_inum(struct ns_common *ns)
#define ns_free_inum(ns) proc_free_inum((ns)->inum)
-extern struct file *proc_ns_fget(int fd);
+extern struct file *proc_ns_fget(int fd, int nstype);
#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
extern void *ns_get_path(struct path *path, struct task_struct *task,
const struct proc_ns_operations *ns_ops);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 49746c8..fee18ba 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -226,21 +226,17 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
struct ns_common *ns;
int err;
- file = proc_ns_fget(fd);
+ file = proc_ns_fget(fd, nstype);
if (IS_ERR(file))
return PTR_ERR(file);
- err = -EINVAL;
- ns = get_proc_ns(file_inode(file));
- if (nstype && (ns->ops->type != nstype))
- goto out;
-
new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
if (IS_ERR(new_nsproxy)) {
err = PTR_ERR(new_nsproxy);
goto out;
}
+ ns = get_proc_ns(file_inode(file));
err = ns->ops->install(new_nsproxy, ns);
if (err) {
free_nsproxy(new_nsproxy);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 572af00..9dfbe68 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -424,15 +424,12 @@ struct net *get_net_ns_by_fd(int fd)
struct ns_common *ns;
struct net *net;
- file = proc_ns_fget(fd);
+ file = proc_ns_fget(fd, CLONE_NEWNET);
if (IS_ERR(file))
return ERR_CAST(file);
ns = get_proc_ns(file_inode(file));
- if (ns->ops == &netns_operations)
- net = get_net(container_of(ns, struct net, ns));
- else
- net = ERR_PTR(-EINVAL);
+ net = get_net(container_of(ns, struct net, ns));
fput(file);
return net;
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH RFC v2] pidns: introduce syscall getvpid
[not found] ` <20150924145647.GA24151-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2015-09-24 15:47 ` Konstantin Khlebnikov
[not found] ` <CALYGNiPCVFNLi=1iL1gOf411iU6tT+DbVx1JuGG+f73Rpvx3dw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Konstantin Khlebnikov @ 2015-09-24 15:47 UTC (permalink / raw)
To: Oleg Nesterov
Cc: Konstantin Khlebnikov, Linux API, Linux Containers,
Linux Kernel Mailing List, Serge Hallyn, Eric W. Biederman,
Chen Fan, Andrew Morton, Linus Torvalds, Stéphane Graber
On Thu, Sep 24, 2015 at 5:56 PM, Oleg Nesterov <oleg-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
> On 09/24, Konstantin Khlebnikov wrote:
>>
>> +SYSCALL_DEFINE3(getvpid, pid_t, pid, int, source, int, target)
>> +{
>> + struct file *source_file = NULL, *target_file = NULL;
>> + struct pid_namespace *source_ns, *target_ns;
>> + struct pid *struct_pid;
>> + struct ns_common *ns;
>> + pid_t result;
>> +
>> + if (source >= 0) {
>> + source_file = proc_ns_fget(source);
>> + result = PTR_ERR(source_file);
>> + if (IS_ERR(source_file))
>> + goto out;
>> + ns = get_proc_ns(file_inode(source_file));
>> + result = -EINVAL;
>> + if (ns->ops->type != CLONE_NEWPID)
>> + goto out;
>> + source_ns = container_of(ns, struct pid_namespace, ns);
>> + } else
>> + source_ns = task_active_pid_ns(current);
>> +
>> + if (target >= 0) {
>> + target_file = proc_ns_fget(target);
>> + result = PTR_ERR(target_file);
>> + if (IS_ERR(target_file))
>> + goto out;
>> + ns = get_proc_ns(file_inode(target_file));
>> + result = -EINVAL;
>> + if (ns->ops->type != CLONE_NEWPID)
>> + goto out;
>> + target_ns = container_of(ns, struct pid_namespace, ns);
>> + } else
>> + target_ns = task_active_pid_ns(current);
>> +
>
> Hmm. Eric, Konstantin, how about (uncompiled/untested) patch below
> in a preparation? The code above doesn't look very readable.
I've tried to do something like that but that comes too far so send patch as is.
Actually we can go deeper and replace struct file* with struct fd: this saves
couple atomic ops for singlethreaded task.
>
> In fact I think another helper
>
> #define proc_ns_xxx(file, type) \
> container_of(get_proc_ns(file_inode(file)), \
> struct type, ns)
>
> makes sense too.
>
> Oleg.
> ---
>
> diff --git a/fs/nsfs.c b/fs/nsfs.c
> index 99521e7..0877dd6 100644
> --- a/fs/nsfs.c
> +++ b/fs/nsfs.c
> @@ -118,9 +118,10 @@ int ns_get_name(char *buf, size_t size, struct task_struct *task,
> return res;
> }
>
> -struct file *proc_ns_fget(int fd)
> +struct file *proc_ns_fget(int fd, int nstype)
> {
> struct file *file;
> + struct ns_common *ns;
>
> file = fget(fd);
> if (!file)
> @@ -129,6 +130,10 @@ struct file *proc_ns_fget(int fd)
> if (file->f_op != &ns_file_operations)
> goto out_invalid;
>
> + ns = get_proc_ns(file_inode(file));
> + if (nstype && (ns->ops->type != nstype))
> + goto out_invalid;
> +
> return file;
>
> out_invalid:
> diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
> index 42dfc61..84c9770 100644
> --- a/include/linux/proc_ns.h
> +++ b/include/linux/proc_ns.h
> @@ -65,7 +65,7 @@ static inline int ns_alloc_inum(struct ns_common *ns)
>
> #define ns_free_inum(ns) proc_free_inum((ns)->inum)
>
> -extern struct file *proc_ns_fget(int fd);
> +extern struct file *proc_ns_fget(int fd, int nstype);
> #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
> extern void *ns_get_path(struct path *path, struct task_struct *task,
> const struct proc_ns_operations *ns_ops);
> diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
> index 49746c8..fee18ba 100644
> --- a/kernel/nsproxy.c
> +++ b/kernel/nsproxy.c
> @@ -226,21 +226,17 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
> struct ns_common *ns;
> int err;
>
> - file = proc_ns_fget(fd);
> + file = proc_ns_fget(fd, nstype);
> if (IS_ERR(file))
> return PTR_ERR(file);
>
> - err = -EINVAL;
> - ns = get_proc_ns(file_inode(file));
> - if (nstype && (ns->ops->type != nstype))
> - goto out;
> -
> new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
> if (IS_ERR(new_nsproxy)) {
> err = PTR_ERR(new_nsproxy);
> goto out;
> }
>
> + ns = get_proc_ns(file_inode(file));
> err = ns->ops->install(new_nsproxy, ns);
> if (err) {
> free_nsproxy(new_nsproxy);
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index 572af00..9dfbe68 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -424,15 +424,12 @@ struct net *get_net_ns_by_fd(int fd)
> struct ns_common *ns;
> struct net *net;
>
> - file = proc_ns_fget(fd);
> + file = proc_ns_fget(fd, CLONE_NEWNET);
> if (IS_ERR(file))
> return ERR_CAST(file);
>
> ns = get_proc_ns(file_inode(file));
> - if (ns->ops == &netns_operations)
> - net = get_net(container_of(ns, struct net, ns));
> - else
> - net = ERR_PTR(-EINVAL);
> + net = get_net(container_of(ns, struct net, ns));
>
> fput(file);
> return net;
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-api" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH RFC v2] pidns: introduce syscall getvpid
[not found] ` <CALYGNiPCVFNLi=1iL1gOf411iU6tT+DbVx1JuGG+f73Rpvx3dw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2015-09-24 17:34 ` Oleg Nesterov
0 siblings, 0 replies; 5+ messages in thread
From: Oleg Nesterov @ 2015-09-24 17:34 UTC (permalink / raw)
To: Konstantin Khlebnikov
Cc: Konstantin Khlebnikov, Linux API, Linux Containers,
Linux Kernel Mailing List, Serge Hallyn, Eric W. Biederman,
Chen Fan, Andrew Morton, Linus Torvalds, Stéphane Graber
On 09/24, Konstantin Khlebnikov wrote:
>
> On Thu, Sep 24, 2015 at 5:56 PM, Oleg Nesterov <oleg-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
> > On 09/24, Konstantin Khlebnikov wrote:
> >>
> >> +SYSCALL_DEFINE3(getvpid, pid_t, pid, int, source, int, target)
> >> +{
> >> + struct file *source_file = NULL, *target_file = NULL;
> >> + struct pid_namespace *source_ns, *target_ns;
> >> + struct pid *struct_pid;
> >> + struct ns_common *ns;
> >> + pid_t result;
> >> +
> >> + if (source >= 0) {
> >> + source_file = proc_ns_fget(source);
> >> + result = PTR_ERR(source_file);
> >> + if (IS_ERR(source_file))
> >> + goto out;
> >> + ns = get_proc_ns(file_inode(source_file));
> >> + result = -EINVAL;
> >> + if (ns->ops->type != CLONE_NEWPID)
> >> + goto out;
> >> + source_ns = container_of(ns, struct pid_namespace, ns);
> >> + } else
> >> + source_ns = task_active_pid_ns(current);
> >> +
> >> + if (target >= 0) {
> >> + target_file = proc_ns_fget(target);
> >> + result = PTR_ERR(target_file);
> >> + if (IS_ERR(target_file))
> >> + goto out;
> >> + ns = get_proc_ns(file_inode(target_file));
> >> + result = -EINVAL;
> >> + if (ns->ops->type != CLONE_NEWPID)
> >> + goto out;
> >> + target_ns = container_of(ns, struct pid_namespace, ns);
> >> + } else
> >> + target_ns = task_active_pid_ns(current);
> >> +
> >
> > Hmm. Eric, Konstantin, how about (uncompiled/untested) patch below
> > in a preparation? The code above doesn't look very readable.
>
> I've tried to do something like that but that comes too far so send patch as is.
OK, I won't insist.
The code above asks for cleanup/factorization, but we can do this
later.
> Actually we can go deeper and replace struct file* with struct fd: this saves
> couple atomic ops for singlethreaded task.
it's not about performance...
But yes, we can do more and create get_pid_ns_by_fd() similar to
get_net_ns_by_fd(). We do not need file at all, we need pid_ns.
Oleg.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH RFC v2] pidns: introduce syscall getvpid
2015-09-24 13:53 [PATCH RFC v2] pidns: introduce syscall getvpid Konstantin Khlebnikov
2015-09-24 14:56 ` Oleg Nesterov
@ 2015-09-25 8:36 ` Chen Fan
1 sibling, 0 replies; 5+ messages in thread
From: Chen Fan @ 2015-09-25 8:36 UTC (permalink / raw)
To: Konstantin Khlebnikov, linux-api-u79uwXL29TY76Z2rM5mHXA,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
Cc: Serge Hallyn, Oleg Nesterov, Eric W. Biederman, Andrew Morton,
Linus Torvalds
On 09/24/2015 09:53 PM, Konstantin Khlebnikov wrote:
> pid_t getvpid(pid_t pid, int source, int target);
>
> This syscall converts pid from source pid-namespace into pid in target
> pid-namespace. Namespaces are defined by file descriptors pointing to
> namespace entries in proc (/proc/[pid]/ns/pid). If source / target is
> negative then current pid namespace is used.
>
> If pid is negative then getvpid() returns pid of parent task for -pid.
>
> If pid is unreachable from target namespace then syscall returns zero.
>
> Errors:
> ESRCH task not found
> EBADF closed file descriptor
> EINVAL not pid-namespace file descriptor
>
> Examples:
> getvpid(pid, ns, -1) -> pid in our pid namespace
> getvpid(pid, -1, ns) -> pid in container
> getvpid(1, ns1, ns2) > 0 -> ns1 inside ns2
> getvpid(1, ns1, ns2) == 0 -> ns1 outside ns2
> getvpid(1, ns, -1) -> init task of pid-namespace
> getvpid(-1, ns, -1) -> task in parent pid-namespace
> getvpid(-pid, -1, -1) -> get ppid by pid
>
> Signed-off-by: Konstantin Khlebnikov <khlebnikov-XoJtRXgx1JseBXzfvpsJ4g@public.gmane.org>
>
> ---
>
> v2:
> * use namespace-fd as second/third argument
> * add -pid for getting parent pid
> * move code into kernel/sys.c next to getppid
> * drop ifdef CONFIG_PID_NS
> * add generic syscall
> ---
> arch/x86/entry/syscalls/syscall_32.tbl | 1 +
> arch/x86/entry/syscalls/syscall_64.tbl | 1 +
> include/linux/syscalls.h | 1 +
> include/uapi/asm-generic/unistd.h | 4 ++
> kernel/sys.c | 63 ++++++++++++++++++++++++++++++++
> 5 files changed, 69 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
> index 7663c455b9f6..dadb55d42fc9 100644
> --- a/arch/x86/entry/syscalls/syscall_32.tbl
> +++ b/arch/x86/entry/syscalls/syscall_32.tbl
> @@ -382,3 +382,4 @@
> 373 i386 shutdown sys_shutdown
> 374 i386 userfaultfd sys_userfaultfd
> 375 i386 membarrier sys_membarrier
> +376 i386 getvpid sys_getvpid
> diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
> index 278842fdf1f6..0338f2eb3b7c 100644
> --- a/arch/x86/entry/syscalls/syscall_64.tbl
> +++ b/arch/x86/entry/syscalls/syscall_64.tbl
> @@ -331,6 +331,7 @@
> 322 64 execveat stub_execveat
> 323 common userfaultfd sys_userfaultfd
> 324 common membarrier sys_membarrier
> +325 common getvpid sys_getvpid
>
> #
> # x32-specific system call numbers start at 512 to avoid cache impact
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index a460e2ef2843..01ac603c8b5c 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -222,6 +222,7 @@ asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __us
> asmlinkage long sys_alarm(unsigned int seconds);
> asmlinkage long sys_getpid(void);
> asmlinkage long sys_getppid(void);
> +asmlinkage long sys_getvpid(pid_t pid, int source, int target);
> asmlinkage long sys_getuid(void);
> asmlinkage long sys_geteuid(void);
> asmlinkage long sys_getgid(void);
> diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
> index 8da542a2874d..163df44b23cf 100644
> --- a/include/uapi/asm-generic/unistd.h
> +++ b/include/uapi/asm-generic/unistd.h
> @@ -711,9 +711,11 @@ __SYSCALL(__NR_bpf, sys_bpf)
> __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
> #define __NR_membarrier 282
> __SYSCALL(__NR_membarrier, sys_membarrier)
> +#define __NR_getvpid 283
> +__SYSCALL(__NR_getvpid, sys_getvpid)
>
> #undef __NR_syscalls
> -#define __NR_syscalls 283
> +#define __NR_syscalls 284
>
> /*
> * All syscalls below here should go away really,
> diff --git a/kernel/sys.c b/kernel/sys.c
> index fa2f2f671a5c..fbfe938dd9d7 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -46,6 +46,7 @@
> #include <linux/syscalls.h>
> #include <linux/kprobes.h>
> #include <linux/user_namespace.h>
> +#include <linux/proc_ns.h>
> #include <linux/binfmts.h>
>
> #include <linux/sched.h>
> @@ -855,6 +856,68 @@ SYSCALL_DEFINE0(getppid)
> return pid;
> }
>
> +SYSCALL_DEFINE3(getvpid, pid_t, pid, int, source, int, target)
> +{
> + struct file *source_file = NULL, *target_file = NULL;
> + struct pid_namespace *source_ns, *target_ns;
> + struct pid *struct_pid;
> + struct ns_common *ns;
> + pid_t result;
> +
> + if (source >= 0) {
> + source_file = proc_ns_fget(source);
> + result = PTR_ERR(source_file);
> + if (IS_ERR(source_file))
> + goto out;
> + ns = get_proc_ns(file_inode(source_file));
> + result = -EINVAL;
> + if (ns->ops->type != CLONE_NEWPID)
> + goto out;
> + source_ns = container_of(ns, struct pid_namespace, ns);
> + } else
> + source_ns = task_active_pid_ns(current);
> +
> + if (target >= 0) {
> + target_file = proc_ns_fget(target);
> + result = PTR_ERR(target_file);
> + if (IS_ERR(target_file))
> + goto out;
> + ns = get_proc_ns(file_inode(target_file));
> + result = -EINVAL;
> + if (ns->ops->type != CLONE_NEWPID)
> + goto out;
> + target_ns = container_of(ns, struct pid_namespace, ns);
> + } else
> + target_ns = task_active_pid_ns(current);
> +
the source ns and target ns translation looks the same, why not extract
a new method to do the job.
Thanks,
Chen
> + rcu_read_lock();
> + struct_pid = find_pid_ns(abs(pid), source_ns);
> +
> + if (struct_pid && pid < 0) {
> + struct task_struct *task;
> +
> + task = pid_task(struct_pid, PIDTYPE_PID);
> + if (task)
> + task = rcu_dereference(task->real_parent);
> + struct_pid = task ? task_pid(task) : NULL;
> + }
> +
> + if (struct_pid)
> + result = pid_nr_ns(struct_pid, target_ns);
> + else
> + result = -ESRCH;
> + rcu_read_unlock();
> +
> +out:
> + if (!IS_ERR_OR_NULL(target_file))
> + fput(target_file);
> +
> + if (!IS_ERR_OR_NULL(source_file))
> + fput(source_file);
> +
> + return result;
> +}
> +
> SYSCALL_DEFINE0(getuid)
> {
> /* Only we change this so SMP safe */
>
> .
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2015-09-25 8:36 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-24 13:53 [PATCH RFC v2] pidns: introduce syscall getvpid Konstantin Khlebnikov
2015-09-24 14:56 ` Oleg Nesterov
[not found] ` <20150924145647.GA24151-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-09-24 15:47 ` Konstantin Khlebnikov
[not found] ` <CALYGNiPCVFNLi=1iL1gOf411iU6tT+DbVx1JuGG+f73Rpvx3dw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-09-24 17:34 ` Oleg Nesterov
2015-09-25 8:36 ` Chen Fan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).