* [PATCH 1/2] New system call, unshare
@ 2005-08-08 13:33 Janak Desai
2005-08-08 14:46 ` Alan Cox
0 siblings, 1 reply; 5+ messages in thread
From: Janak Desai @ 2005-08-08 13:33 UTC (permalink / raw)
To: viro, sds, linuxram, ericvh, dwalsh, jmorris, akpm, torvalds, gh,
linux-fsdevel
Cc: linux-kernel
[PATCH 1/2] unshare system call: System Call handler function sys_unshare
Signed-off-by: Janak Desai
fork.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 196 insertions(+), 6 deletions(-)
diff -Naurp 2.6.13-rc5-mm1/kernel/fork.c 2.6.13-rc5-mm1+unshare/kernel/fork.c
--- 2.6.13-rc5-mm1/kernel/fork.c 2005-08-07 15:33:45.000000000 +0000
+++ 2.6.13-rc5-mm1+unshare/kernel/fork.c 2005-08-07 19:03:49.000000000 +0000
@@ -57,6 +57,17 @@ int nr_threads; /* The idle threads do
int max_threads; /* tunable limit on nr_threads */
+/*
+ * mm_copy gets called from clone or unshare system calls. When called
+ * from clone, mm_struct may be shared depending on the clone flags
+ * argument, however, when called from the unshare system call, a private
+ * copy of mm_struct is made.
+ */
+enum mm_copy_share {
+ MAY_SHARE,
+ UNSHARE,
+};
+
DEFINE_PER_CPU(unsigned long, process_counts) = 0;
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
@@ -447,16 +458,26 @@ void mm_release(struct task_struct *tsk,
}
}
-static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_mm(unsigned long clone_flags, struct task_struct * tsk,
+ enum mm_copy_share copy_share_action)
{
struct mm_struct * mm, *oldmm;
int retval;
- tsk->min_flt = tsk->maj_flt = 0;
- tsk->nvcsw = tsk->nivcsw = 0;
+ /*
+ * If the process memory is being duplicated as part of the
+ * unshare system call, we are working with the current process
+ * and not a newly allocated task strucutre, and should not
+ * zero out fault info, context switch counts, mm and active_mm
+ * fields.
+ */
+ if (copy_share_action == MAY_SHARE) {
+ tsk->min_flt = tsk->maj_flt = 0;
+ tsk->nvcsw = tsk->nivcsw = 0;
- tsk->mm = NULL;
- tsk->active_mm = NULL;
+ tsk->mm = NULL;
+ tsk->active_mm = NULL;
+ }
/*
* Are we cloning a kernel thread?
@@ -973,7 +994,7 @@ static task_t *copy_process(unsigned lon
goto bad_fork_cleanup_fs;
if ((retval = copy_signal(clone_flags, p)))
goto bad_fork_cleanup_sighand;
- if ((retval = copy_mm(clone_flags, p)))
+ if ((retval = copy_mm(clone_flags, p, MAY_SHARE)))
goto bad_fork_cleanup_signal;
if ((retval = copy_keys(clone_flags, p)))
goto bad_fork_cleanup_mm;
@@ -1288,3 +1309,172 @@ void __init proc_caches_init(void)
sizeof(struct mm_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
}
+
+/*
+ * unshare_mm is called from the unshare system call handler function to
+ * make a private copy of the mm_struct structure. It calls copy_mm with
+ * CLONE_VM flag cleard, to ensure that a private copy of mm_struct is made,
+ * and with mm_copy_share enum set to UNSHARE, to ensure that copy_mm
+ * does not clear fault info, context switch counts, mm and active_mm
+ * fields of the mm_struct.
+ */
+static int unshare_mm(unsigned long unshare_flags, struct task_struct *tsk)
+{
+ int retval = 0;
+ struct mm_struct *mm = tsk->mm;
+
+ /*
+ * If the virtual memory is being shared, make a private
+ * copy and disassociate the process from the shared virtual
+ * memory.
+ */
+ if (atomic_read(&mm->mm_users) > 1) {
+ retval = copy_mm((unshare_flags & ~CLONE_VM), tsk, UNSHARE);
+
+ /*
+ * If copy_mm was successful, decrement the number of users
+ * on the original, shared, mm_struct.
+ */
+ if (!retval)
+ atomic_dec(&mm->mm_users);
+ }
+ return retval;
+}
+
+/*
+ * unshare_sighand is called from the unshare system call handler function to
+ * make a private copy of the sighand_struct structure. It calls copy_sighand
+ * with CLONE_SIGHAND cleared to ensure that a new signal handler structure
+ * is cloned from the current shared one.
+ */
+static int unshare_sighand(unsigned long unshare_flags, struct task_struct *tsk)
+{
+ int retval = 0;
+ struct sighand_struct *sighand = tsk->sighand;
+
+ /*
+ * If the signal handlers are being shared, make a private
+ * copy and disassociate the process from the shared signal
+ * handlers.
+ */
+ if (atomic_read(&sighand->count) > 1) {
+ retval = copy_sighand((unshare_flags & ~CLONE_SIGHAND), tsk);
+
+ /*
+ * If copy_sighand was successful, decrement the use count
+ * on the original, shared, sighand_struct.
+ */
+ if (!retval)
+ atomic_dec(&sighand->count);
+ }
+ return retval;
+}
+
+/*
+ * unshare_namespace is called from the unshare system call handler
+ * function to make a private copy of the current shared namespace. It
+ * calls copy_namespace with CLONE_NEWNS set to ensure that a new
+ * namespace is cloned from the current namespace.
+ */
+static int unshare_namespace(struct task_struct *tsk)
+{
+ int retval = 0;
+ struct namespace *namespace = tsk->namespace;
+
+ /*
+ * If the namespace is being shared, make a private copy
+ * and disassociate the process from the shared namespace.
+ */
+ if (atomic_read(&namespace->count) > 1) {
+ retval = copy_namespace(CLONE_NEWNS, tsk);
+
+ /*
+ * If copy_namespace was successful, decrement the use count
+ * on the original, shared, namespace struct.
+ */
+ if (!retval)
+ atomic_dec(&namespace->count);
+ }
+ return retval;
+}
+
+/*
+ * unshare allows a process to 'unshare' part of the process
+ * context which was originally shared using clone.
+ */
+asmlinkage long sys_unshare(unsigned long unshare_flags)
+{
+ struct task_struct *tsk = current;
+ int retval = 0;
+ struct namespace *namespace;
+ struct mm_struct *mm;
+ struct sighand_struct *sighand;
+
+ if (unshare_flags & ~(CLONE_NEWNS | CLONE_VM | CLONE_SIGHAND))
+ goto bad_unshare_invalid_val;
+
+ /*
+ * Shared signal handlers imply shared VM, so if CLONE_SIGHAND is
+ * set, CLONE_VM must also be set in the system call argument.
+ */
+ if ((unshare_flags & CLONE_SIGHAND) && !(unshare_flags & CLONE_VM))
+ goto bad_unshare_invalid_val;
+
+ task_lock(tsk);
+ namespace = tsk->namespace;
+ mm = tsk->mm;
+ sighand = tsk->sighand;
+
+ if (unshare_flags & CLONE_VM) {
+ retval = unshare_mm(unshare_flags, tsk);
+ if (retval)
+ goto unshare_unlock_task;
+ else if (unshare_flags & CLONE_SIGHAND) {
+ retval = unshare_sighand(unshare_flags, tsk);
+ if (retval)
+ goto bad_unshare_cleanup_mm;
+ }
+ }
+
+ if (unshare_flags & CLONE_NEWNS) {
+ retval = unshare_namespace(tsk);
+ if (retval)
+ goto bad_unshare_cleanup_sighand;
+ }
+
+unshare_unlock_task:
+ task_unlock(tsk);
+
+unshare_out:
+ return retval;
+
+bad_unshare_cleanup_sighand:
+ /*
+ * If signal handlers were unshared (private copy was made),
+ * clean them up (delete the private copy) and restore
+ * the task to point to the old, shared, value.
+ */
+ if (unshare_flags & CLONE_SIGHAND) {
+ exit_sighand(tsk);
+ tsk->sighand = sighand;
+ atomic_inc(&sighand->count);
+ }
+
+bad_unshare_cleanup_mm:
+ /*
+ * If mm struct was unshared (private copy was made),
+ * clean it up (delete the private copy) and restore
+ * the task to point to the old, shared, value.
+ */
+ if (unshare_flags & CLONE_VM) {
+ if (tsk->mm)
+ mmput(tsk->mm);
+ tsk->mm = mm;
+ atomic_inc(&mm->mm_users);
+ }
+ goto unshare_unlock_task;
+
+bad_unshare_invalid_val:
+ retval = -EINVAL;
+ goto unshare_out;
+}
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] New system call, unshare
2005-08-08 14:46 ` Alan Cox
@ 2005-08-08 14:41 ` serge
2005-08-08 15:37 ` Avi Kivity
2005-08-23 6:07 ` Al Viro
2 siblings, 0 replies; 5+ messages in thread
From: serge @ 2005-08-08 14:41 UTC (permalink / raw)
To: Alan Cox
Cc: Janak Desai, viro, sds, linuxram, ericvh, dwalsh, jmorris, akpm,
torvalds, gh, linux-fsdevel, linux-kernel
Quoting Alan Cox (alan@lxorguk.ukuu.org.uk):
> On Llu, 2005-08-08 at 09:33 -0400, Janak Desai wrote:
> >
> > [PATCH 1/2] unshare system call: System Call handler function sys_unshare
>
>
> Given the complexity of the kernel code involved and the obscurity of
> the functionality why not just do another clone() in userspace to
> unshare the things you want to unshare and then _exit the parent ?
The problem I had when I tried using just clone() was that it's
not possible to have a pam library clone() and have the process
being authenticated end up with the new namespace. At least not
that I could figure out. Seemed possible that cloning, exiting the
original thread, and returning from the new thread could work, but
it didn't seem to work when I tried it.
thanks,
-serge
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] New system call, unshare
2005-08-08 13:33 [PATCH 1/2] New system call, unshare Janak Desai
@ 2005-08-08 14:46 ` Alan Cox
2005-08-08 14:41 ` serge
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Alan Cox @ 2005-08-08 14:46 UTC (permalink / raw)
To: Janak Desai
Cc: viro, sds, linuxram, ericvh, dwalsh, jmorris, akpm, torvalds, gh,
linux-fsdevel, linux-kernel
On Llu, 2005-08-08 at 09:33 -0400, Janak Desai wrote:
>
> [PATCH 1/2] unshare system call: System Call handler function sys_unshare
Given the complexity of the kernel code involved and the obscurity of
the functionality why not just do another clone() in userspace to
unshare the things you want to unshare and then _exit the parent ?
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] New system call, unshare
2005-08-08 14:46 ` Alan Cox
2005-08-08 14:41 ` serge
@ 2005-08-08 15:37 ` Avi Kivity
2005-08-23 6:07 ` Al Viro
2 siblings, 0 replies; 5+ messages in thread
From: Avi Kivity @ 2005-08-08 15:37 UTC (permalink / raw)
To: Alan Cox
Cc: Janak Desai, viro, sds, linuxram, ericvh, dwalsh, jmorris, akpm,
torvalds, gh, linux-fsdevel, linux-kernel
Alan Cox wrote:
>On Llu, 2005-08-08 at 09:33 -0400, Janak Desai wrote:
>
>
>>[PATCH 1/2] unshare system call: System Call handler function sys_unshare
>>
>>
>
>
>Given the complexity of the kernel code involved and the obscurity of
>the functionality why not just do another clone() in userspace to
>unshare the things you want to unshare and then _exit the parent ?
>
>
>
suppose somebody wait()s for the parent? you've turned a synchronous
operation into an asynchronous one.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] New system call, unshare
2005-08-08 14:46 ` Alan Cox
2005-08-08 14:41 ` serge
2005-08-08 15:37 ` Avi Kivity
@ 2005-08-23 6:07 ` Al Viro
2 siblings, 0 replies; 5+ messages in thread
From: Al Viro @ 2005-08-23 6:07 UTC (permalink / raw)
To: Alan Cox
Cc: Janak Desai, sds, linuxram, ericvh, dwalsh, jmorris, akpm,
torvalds, gh, linux-fsdevel, linux-kernel
On Mon, Aug 08, 2005 at 03:46:06PM +0100, Alan Cox wrote:
> On Llu, 2005-08-08 at 09:33 -0400, Janak Desai wrote:
> >
> > [PATCH 1/2] unshare system call: System Call handler function sys_unshare
>
>
> Given the complexity of the kernel code involved and the obscurity of
> the functionality why not just do another clone() in userspace to
> unshare the things you want to unshare and then _exit the parent ?
Because you want to keep children? Because you don't want to deal with
the implications for sessions/groups/etc.?
FWIW, syscall makes sense. It is a valid primitive and the only reason
to keep it out of clone() (i.e. not making it just another flag to clone())
is that clone() is already cluttered _and_ uses bad calling conventions
for that stuff ("I want to retain <list>" rather than "I want private <list>").
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2005-08-23 6:07 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-08-08 13:33 [PATCH 1/2] New system call, unshare Janak Desai
2005-08-08 14:46 ` Alan Cox
2005-08-08 14:41 ` serge
2005-08-08 15:37 ` Avi Kivity
2005-08-23 6:07 ` Al Viro
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).