From: pierre.peiffer-6ktuUTfB/bM@public.gmane.org
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
Subject: [PATCH 2.6.24-rc8-mm1 12/15] (RFC) IPC/semaphores: make use of RCU to free the sem_undo_list
Date: Tue, 29 Jan 2008 17:02:41 +0100 [thread overview]
Message-ID: <20080129162131.743970100@bull.net> (raw)
In-Reply-To: 20080129160229.612172683@bull.net
[-- Attachment #1: ipc_exit_sem_use_rcu.patch --]
[-- Type: text/plain, Size: 5120 bytes --]
From: Pierre Peiffer <pierre.peiffer-6ktuUTfB/bM@public.gmane.org>
Today, the sem_undo_list is freed when the last task using it exits.
There is no mechanism in place, that allows a safe concurrent access to
the sem_undo_list of a target task and protects efficiently against a
task-exit.
That is okay for now as we don't need this.
As I would like to provide a /proc interface to access this data, I need
such a safe access, without blocking the target task if possible.
This patch proposes to introduce the use of RCU to delay the real free of
these sem_undo_list structures. They can then be accessed in a safe manner
by any tasks inside read critical section, this way:
struct sem_undo_list *undo_list;
int ret;
...
rcu_read_lock();
undo_list = rcu_dereference(task->sysvsem.undo_list);
if (undo_list)
ret = atomic_inc_not_zero(&undo_list->refcnt);
rcu_read_unlock();
...
if (undo_list && ret) {
/* section where undo_list can be used quietly */
...
}
...
Signed-off-by: Pierre Peiffer <pierre.peiffer-6ktuUTfB/bM@public.gmane.org>
---
include/linux/sem.h | 7 +++++--
ipc/sem.c | 42 ++++++++++++++++++++++++++----------------
2 files changed, 31 insertions(+), 18 deletions(-)
Index: b/include/linux/sem.h
===================================================================
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -115,7 +115,8 @@ struct sem_queue {
};
/* Each task has a list of undo requests. They are executed automatically
- * when the process exits.
+ * when the last refcnt of sem_undo_list is released (ie when the process exits
+ * in the general case)
*/
struct sem_undo {
struct sem_undo * proc_next; /* next entry on this process */
@@ -125,12 +126,14 @@ struct sem_undo {
};
/* sem_undo_list controls shared access to the list of sem_undo structures
- * that may be shared among all a CLONE_SYSVSEM task group.
+ * that may be shared among all a CLONE_SYSVSEM task group or with an external
+ * process which changes the list through procfs.
*/
struct sem_undo_list {
atomic_t refcnt;
spinlock_t lock;
struct sem_undo *proc_list;
+ struct ipc_namespace *ns;
};
struct sysv_sem {
Index: b/ipc/sem.c
===================================================================
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1038,6 +1038,7 @@ static inline int get_undo_list(struct s
return -ENOMEM;
spin_lock_init(&undo_list->lock);
atomic_set(&undo_list->refcnt, 1);
+ undo_list->ns = get_ipc_ns(current->nsproxy->ipc_ns);
current->sysvsem.undo_list = undo_list;
}
*undo_listp = undo_list;
@@ -1316,7 +1317,8 @@ int copy_semundo(unsigned long clone_fla
}
/*
- * add semadj values to semaphores, free undo structures.
+ * add semadj values to semaphores, free undo structures, if there is no
+ * more user.
* undo structures are not freed when semaphore arrays are destroyed
* so some of them may be out of date.
* IMPLEMENTATION NOTE: There is some confusion over whether the
@@ -1326,23 +1328,17 @@ int copy_semundo(unsigned long clone_fla
* The original implementation attempted to do this (queue and wait).
* The current implementation does not do so. The POSIX standard
* and SVID should be consulted to determine what behavior is mandated.
+ *
+ * Note:
+ * A concurrent task is only allowed to access and go through the list
+ * of sem_undo if it successfully grabs a refcnt.
*/
-void exit_sem(struct task_struct *tsk)
+static void free_semundo_list(struct sem_undo_list *undo_list)
{
- struct sem_undo_list *undo_list;
struct sem_undo *u, **up;
- struct ipc_namespace *ns;
- undo_list = tsk->sysvsem.undo_list;
- if (!undo_list)
- return;
-
- if (!atomic_dec_and_test(&undo_list->refcnt))
- return;
-
- ns = tsk->nsproxy->ipc_ns;
- /* There's no need to hold the semundo list lock, as current
- * is the last task exiting for this undo list.
+ /* There's no need to hold the semundo list lock, as there are
+ * no more tasks or possible users for this undo list.
*/
for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) {
struct sem_array *sma;
@@ -1354,7 +1350,7 @@ void exit_sem(struct task_struct *tsk)
if(semid == -1)
continue;
- sma = sem_lock(ns, semid);
+ sma = sem_lock(undo_list->ns, semid);
if (IS_ERR(sma))
continue;
@@ -1368,7 +1364,8 @@ void exit_sem(struct task_struct *tsk)
if (u == un)
goto found;
}
- printk ("exit_sem undo list error id=%d\n", u->semid);
+ printk(KERN_ERR "free_semundo_list error id=%d\n",
+ u->semid);
goto next_entry;
found:
*unp = un->id_next;
@@ -1404,9 +1401,22 @@ found:
next_entry:
sem_unlock(sma);
}
+ put_ipc_ns(undo_list->ns);
kfree(undo_list);
}
+/* called from do_exit() */
+void exit_sem(struct task_struct *tsk)
+{
+ struct sem_undo_list *ul = tsk->sysvsem.undo_list;
+ if (ul) {
+ rcu_assign_pointer(tsk->sysvsem.undo_list, NULL);
+ synchronize_rcu();
+ if (atomic_dec_and_test(&ul->refcnt))
+ free_semundo_list(ul);
+ }
+}
+
#ifdef CONFIG_PROC_FS
static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
{
--
Pierre Peiffer
WARNING: multiple messages have this Message-ID (diff)
From: pierre.peiffer@bull.net
To: linux-kernel@vger.kernel.org, containers@lists.linux-foundation.org
Subject: [PATCH 2.6.24-rc8-mm1 12/15] (RFC) IPC/semaphores: make use of RCU to free the sem_undo_list
Date: Tue, 29 Jan 2008 17:02:41 +0100 [thread overview]
Message-ID: <20080129162131.743970100@bull.net> (raw)
In-Reply-To: 20080129160229.612172683@bull.net
[-- Attachment #1: ipc_exit_sem_use_rcu.patch --]
[-- Type: text/plain, Size: 5080 bytes --]
From: Pierre Peiffer <pierre.peiffer@bull.net>
Today, the sem_undo_list is freed when the last task using it exits.
There is no mechanism in place, that allows a safe concurrent access to
the sem_undo_list of a target task and protects efficiently against a
task-exit.
That is okay for now as we don't need this.
As I would like to provide a /proc interface to access this data, I need
such a safe access, without blocking the target task if possible.
This patch proposes to introduce the use of RCU to delay the real free of
these sem_undo_list structures. They can then be accessed in a safe manner
by any tasks inside read critical section, this way:
struct sem_undo_list *undo_list;
int ret;
...
rcu_read_lock();
undo_list = rcu_dereference(task->sysvsem.undo_list);
if (undo_list)
ret = atomic_inc_not_zero(&undo_list->refcnt);
rcu_read_unlock();
...
if (undo_list && ret) {
/* section where undo_list can be used quietly */
...
}
...
Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>
---
include/linux/sem.h | 7 +++++--
ipc/sem.c | 42 ++++++++++++++++++++++++++----------------
2 files changed, 31 insertions(+), 18 deletions(-)
Index: b/include/linux/sem.h
===================================================================
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -115,7 +115,8 @@ struct sem_queue {
};
/* Each task has a list of undo requests. They are executed automatically
- * when the process exits.
+ * when the last refcnt of sem_undo_list is released (ie when the process exits
+ * in the general case)
*/
struct sem_undo {
struct sem_undo * proc_next; /* next entry on this process */
@@ -125,12 +126,14 @@ struct sem_undo {
};
/* sem_undo_list controls shared access to the list of sem_undo structures
- * that may be shared among all a CLONE_SYSVSEM task group.
+ * that may be shared among all a CLONE_SYSVSEM task group or with an external
+ * process which changes the list through procfs.
*/
struct sem_undo_list {
atomic_t refcnt;
spinlock_t lock;
struct sem_undo *proc_list;
+ struct ipc_namespace *ns;
};
struct sysv_sem {
Index: b/ipc/sem.c
===================================================================
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1038,6 +1038,7 @@ static inline int get_undo_list(struct s
return -ENOMEM;
spin_lock_init(&undo_list->lock);
atomic_set(&undo_list->refcnt, 1);
+ undo_list->ns = get_ipc_ns(current->nsproxy->ipc_ns);
current->sysvsem.undo_list = undo_list;
}
*undo_listp = undo_list;
@@ -1316,7 +1317,8 @@ int copy_semundo(unsigned long clone_fla
}
/*
- * add semadj values to semaphores, free undo structures.
+ * add semadj values to semaphores, free undo structures, if there is no
+ * more user.
* undo structures are not freed when semaphore arrays are destroyed
* so some of them may be out of date.
* IMPLEMENTATION NOTE: There is some confusion over whether the
@@ -1326,23 +1328,17 @@ int copy_semundo(unsigned long clone_fla
* The original implementation attempted to do this (queue and wait).
* The current implementation does not do so. The POSIX standard
* and SVID should be consulted to determine what behavior is mandated.
+ *
+ * Note:
+ * A concurrent task is only allowed to access and go through the list
+ * of sem_undo if it successfully grabs a refcnt.
*/
-void exit_sem(struct task_struct *tsk)
+static void free_semundo_list(struct sem_undo_list *undo_list)
{
- struct sem_undo_list *undo_list;
struct sem_undo *u, **up;
- struct ipc_namespace *ns;
- undo_list = tsk->sysvsem.undo_list;
- if (!undo_list)
- return;
-
- if (!atomic_dec_and_test(&undo_list->refcnt))
- return;
-
- ns = tsk->nsproxy->ipc_ns;
- /* There's no need to hold the semundo list lock, as current
- * is the last task exiting for this undo list.
+ /* There's no need to hold the semundo list lock, as there are
+ * no more tasks or possible users for this undo list.
*/
for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) {
struct sem_array *sma;
@@ -1354,7 +1350,7 @@ void exit_sem(struct task_struct *tsk)
if(semid == -1)
continue;
- sma = sem_lock(ns, semid);
+ sma = sem_lock(undo_list->ns, semid);
if (IS_ERR(sma))
continue;
@@ -1368,7 +1364,8 @@ void exit_sem(struct task_struct *tsk)
if (u == un)
goto found;
}
- printk ("exit_sem undo list error id=%d\n", u->semid);
+ printk(KERN_ERR "free_semundo_list error id=%d\n",
+ u->semid);
goto next_entry;
found:
*unp = un->id_next;
@@ -1404,9 +1401,22 @@ found:
next_entry:
sem_unlock(sma);
}
+ put_ipc_ns(undo_list->ns);
kfree(undo_list);
}
+/* called from do_exit() */
+void exit_sem(struct task_struct *tsk)
+{
+ struct sem_undo_list *ul = tsk->sysvsem.undo_list;
+ if (ul) {
+ rcu_assign_pointer(tsk->sysvsem.undo_list, NULL);
+ synchronize_rcu();
+ if (atomic_dec_and_test(&ul->refcnt))
+ free_semundo_list(ul);
+ }
+}
+
#ifdef CONFIG_PROC_FS
static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
{
--
Pierre Peiffer
next prev parent reply other threads:[~2008-01-29 16:02 UTC|newest]
Thread overview: 91+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-01-29 16:02 [PATCH 2.6.24-rc8-mm1 00/15] IPC: code rewrite + new functionalities pierre.peiffer
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 01/15] IPC/semaphores: code factorisation pierre.peiffer
2008-01-29 16:02 ` pierre.peiffer-6ktuUTfB/bM
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 02/15] IPC/shared memory: introduce shmctl_down pierre.peiffer
2008-01-29 16:02 ` pierre.peiffer-6ktuUTfB/bM
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 03/15] IPC/message queues: introduce msgctl_down pierre.peiffer
2008-01-29 16:02 ` pierre.peiffer-6ktuUTfB/bM
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 04/15] IPC/semaphores: move the rwmutex handling inside semctl_down pierre.peiffer
2008-01-29 16:02 ` pierre.peiffer-6ktuUTfB/bM
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 05/15] IPC/semaphores: remove one unused parameter from semctl_down() pierre.peiffer
2008-01-31 8:32 ` Nadia Derbey
[not found] ` <47A187AE.1000809-6ktuUTfB/bM@public.gmane.org>
2008-01-31 10:18 ` Pierre Peiffer
2008-01-31 10:18 ` Pierre Peiffer
[not found] ` <47A1A076.4060108-6ktuUTfB/bM@public.gmane.org>
2008-01-31 11:30 ` Nadia Derbey
2008-01-31 11:30 ` Nadia Derbey
[not found] ` <20080129161758.154775218-6ktuUTfB/bM@public.gmane.org>
2008-01-31 8:32 ` Nadia Derbey
2008-01-29 16:02 ` pierre.peiffer-6ktuUTfB/bM
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 06/15] IPC: get rid of the use *_setbuf structure pierre.peiffer-6ktuUTfB/bM
2008-01-29 16:02 ` pierre.peiffer
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 07/15] IPC: introduce ipc_update_perm() pierre.peiffer-6ktuUTfB/bM
2008-01-29 16:02 ` pierre.peiffer
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 08/15] IPC: consolidate all xxxctl_down() functions pierre.peiffer-6ktuUTfB/bM
2008-01-29 16:02 ` pierre.peiffer
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 09/15] (RFC) IPC: new kernel API to change an ID pierre.peiffer-6ktuUTfB/bM
2008-01-29 16:02 ` pierre.peiffer
[not found] ` <20080129162000.454857358-6ktuUTfB/bM@public.gmane.org>
2008-01-29 21:06 ` Alexey Dobriyan
2008-01-29 21:06 ` Alexey Dobriyan
2008-01-31 9:00 ` Pierre Peiffer
2008-01-31 9:54 ` Kirill Korotaev
[not found] ` <47A19AC2.7040709-3ImXcnM4P+0@public.gmane.org>
2008-01-31 11:57 ` Pierre Peiffer
2008-01-31 11:57 ` Pierre Peiffer
2008-01-31 13:11 ` Kirill Korotaev
2008-01-31 16:10 ` Cedric Le Goater
[not found] ` <47A1F2DB.7080600-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2008-02-04 13:41 ` Kirill Korotaev
2008-02-04 13:41 ` Kirill Korotaev
[not found] ` <47A71606.5030201-3ImXcnM4P+0@public.gmane.org>
2008-02-04 14:06 ` [Devel] " Pavel Emelyanov
2008-02-04 14:06 ` Pavel Emelyanov
[not found] ` <47A71BDF.5000801-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>
2008-02-04 15:00 ` Daniel Lezcano
2008-02-04 15:00 ` Daniel Lezcano
2008-02-04 15:16 ` Pavel Emelyanov
[not found] ` <47A72891.4000404-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>
2008-02-04 15:16 ` Pavel Emelyanov
[not found] ` <47A1C8FE.9010700-3ImXcnM4P+0@public.gmane.org>
2008-01-31 16:10 ` Cedric Le Goater
[not found] ` <47A1B78C.7050405-6ktuUTfB/bM@public.gmane.org>
2008-01-31 13:11 ` Kirill Korotaev
2008-02-05 9:51 ` Oren Laadan
2008-02-05 9:51 ` Oren Laadan
[not found] ` <47A83194.8060808-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-02-05 18:00 ` Dave Hansen
2008-02-05 18:42 ` Serge E. Hallyn
2008-02-05 18:00 ` Dave Hansen
2008-02-05 18:42 ` Serge E. Hallyn
[not found] ` <20080205184234.GA28923-6s5zFf/epYLPQpwDFJZrxKsjOiXwFzmk@public.gmane.org>
2008-02-06 2:07 ` Oren Laadan
2008-02-06 2:07 ` Oren Laadan
[not found] ` <47A91652.4090506-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2008-02-06 5:00 ` Serge E. Hallyn
2008-02-06 5:00 ` Serge E. Hallyn
2008-02-08 10:12 ` Pierre Peiffer
2008-02-08 10:12 ` Pierre Peiffer
[not found] ` <47A18E47.5050206-6ktuUTfB/bM@public.gmane.org>
2008-01-31 9:54 ` Kirill Korotaev
[not found] ` <20080129210656.GB1990-QDJVlCTZ4KWTKS93B3g+7KFoa47nwP16@public.gmane.org>
2008-01-30 9:52 ` Pierre Peiffer
2008-01-30 9:52 ` Pierre Peiffer
2008-01-31 9:00 ` Pierre Peiffer
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 10/15] (RFC) IPC: new IPC_SETID command to modify " pierre.peiffer-6ktuUTfB/bM
2008-01-29 16:02 ` pierre.peiffer
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 11/15] (RFC) IPC: new IPC_SETALL command to modify all settings pierre.peiffer-6ktuUTfB/bM
2008-01-29 16:02 ` pierre.peiffer
2008-01-29 16:02 ` pierre.peiffer-6ktuUTfB/bM [this message]
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 12/15] (RFC) IPC/semaphores: make use of RCU to free the sem_undo_list pierre.peiffer
2008-01-30 21:26 ` Serge E. Hallyn
2008-01-31 9:52 ` Pierre Peiffer
[not found] ` <20080130212650.GA8945-6s5zFf/epYLPQpwDFJZrxFMas7LaWZ9n@public.gmane.org>
2008-01-31 9:52 ` Pierre Peiffer
[not found] ` <20080129162131.743970100-6ktuUTfB/bM@public.gmane.org>
2008-01-30 21:26 ` Serge E. Hallyn
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 13/15] (RFC) IPC/semaphores: per <pid> semundo file in procfs pierre.peiffer-6ktuUTfB/bM
2008-01-29 16:02 ` pierre.peiffer
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 14/15] (RFC) IPC/semaphores: prepare semundo code to work on another task than current pierre.peiffer-6ktuUTfB/bM
2008-01-29 16:02 ` pierre.peiffer
2008-01-30 21:44 ` Serge E. Hallyn
[not found] ` <20080130214430.GB8945-6s5zFf/epYLPQpwDFJZrxFMas7LaWZ9n@public.gmane.org>
2008-01-31 9:48 ` Pierre Peiffer
2008-01-31 9:48 ` Pierre Peiffer
2008-01-31 18:01 ` Serge E. Hallyn
2008-02-01 12:09 ` Pierre Peiffer
[not found] ` <20080131180125.GA5617-6s5zFf/epYL1ENwx4SLHqw@public.gmane.org>
2008-02-01 12:09 ` Pierre Peiffer
[not found] ` <47A19988.5030108-6ktuUTfB/bM@public.gmane.org>
2008-01-31 18:01 ` Serge E. Hallyn
[not found] ` <20080129162232.843976550-6ktuUTfB/bM@public.gmane.org>
2008-01-30 21:44 ` Serge E. Hallyn
2008-01-29 16:02 ` [PATCH 2.6.24-rc8-mm1 15/15] (RFC) IPC/semaphores: add write() operation to semundo file in procfs pierre.peiffer-6ktuUTfB/bM
2008-01-29 16:02 ` pierre.peiffer
2008-02-02 18:23 ` [PATCH 2.6.24-rc8-mm1 00/15] IPC: code rewrite + new functionalities Pavel Machek
[not found] ` <20080202182351.GC4456-+ZI9xUNit7I@public.gmane.org>
2008-02-04 13:52 ` Pierre Peiffer
2008-02-04 15:44 ` Benjamin Thery
2008-02-04 13:52 ` Pierre Peiffer
2008-02-04 15:44 ` Benjamin Thery
2008-02-04 19:51 ` Pavel Machek
[not found] ` <47A732E2.1000504-6ktuUTfB/bM@public.gmane.org>
2008-02-04 19:51 ` Pavel Machek
[not found] ` <20080129160229.612172683-6ktuUTfB/bM@public.gmane.org>
2008-02-02 18:23 ` Pavel Machek
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080129162131.743970100@bull.net \
--to=pierre.peiffer-6ktuutfb/bm@public.gmane.org \
--cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
--cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.