From: Al Viro <viro@ZenIV.linux.org.uk>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Subject: [RFC] new locking primitive (pulled from fs_pin)
Date: Mon, 13 Apr 2015 20:05:48 +0100 [thread overview]
Message-ID: <20150413190548.GQ889@ZenIV.linux.org.uk> (raw)
New structure. Intended use: embed into an object that will always
be freed with RCU delay.
Initialize with init_kill_once(&object->kill_once).
Use by grabbing rcu_read_lock(), finding the object, then
if (needs_killing(&object->kill_once)) {
// do whatever actions you want, including
// removal of references from the places
// where they could be found
finished_killing(&object->kill_once);
// arrange for RCU-delayed freeing
}
If several threads attempt that, only the first one will
see needs_killing() return true *and* everything else will
wait in needs_killing() until the first one is past
finished_killing(). Note that they might end up returning
after object gets freed - needs_killing() itself is very
careful about dereferencing and its caller MUST NOT touch
the object after getting false from needs_killing().
needs_killing() must be called with rcu_read_lock() held and
drops it in all cases.
This thing used to be the locking side of fs/fs_pin.c, but
IMO it might make sense to separate it from fs_pin completely -
it could be useful elsewhere...
Comments (and especially suggestions on better names) are welcome...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index b06c987..b124faf 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -12,10 +12,7 @@ void pin_remove(struct fs_pin *pin)
hlist_del(&pin->m_list);
hlist_del(&pin->s_list);
spin_unlock(&pin_lock);
- spin_lock_irq(&pin->wait.lock);
- pin->done = 1;
- wake_up_locked(&pin->wait);
- spin_unlock_irq(&pin->wait.lock);
+ finished_killing(&pin->head);
}
void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p)
@@ -34,43 +31,12 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m)
void pin_kill(struct fs_pin *p)
{
- wait_queue_t wait;
-
if (!p) {
rcu_read_unlock();
return;
}
- init_wait(&wait);
- spin_lock_irq(&p->wait.lock);
- if (likely(!p->done)) {
- p->done = -1;
- spin_unlock_irq(&p->wait.lock);
- rcu_read_unlock();
+ if (needs_killing(&p->head))
p->kill(p);
- return;
- }
- if (p->done > 0) {
- spin_unlock_irq(&p->wait.lock);
- rcu_read_unlock();
- return;
- }
- __add_wait_queue(&p->wait, &wait);
- while (1) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock_irq(&p->wait.lock);
- rcu_read_unlock();
- schedule();
- rcu_read_lock();
- if (likely(list_empty(&wait.task_list)))
- break;
- /* OK, we know p couldn't have been freed yet */
- spin_lock_irq(&p->wait.lock);
- if (p->done > 0) {
- spin_unlock_irq(&p->wait.lock);
- break;
- }
- }
- rcu_read_unlock();
}
void mnt_pin_kill(struct mount *m)
diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h
index 9dc4e03..f65daad 100644
--- a/include/linux/fs_pin.h
+++ b/include/linux/fs_pin.h
@@ -1,8 +1,7 @@
-#include <linux/wait.h>
+#include <linux/kill_once.h>
struct fs_pin {
- wait_queue_head_t wait;
- int done;
+ struct kill_once head;
struct hlist_node s_list;
struct hlist_node m_list;
void (*kill)(struct fs_pin *);
@@ -12,7 +11,7 @@ struct vfsmount;
static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *))
{
- init_waitqueue_head(&p->wait);
+ init_kill_once(&p->head);
p->kill = kill;
}
diff --git a/include/linux/kill_once.h b/include/linux/kill_once.h
new file mode 100644
index 0000000..03a3717
--- /dev/null
+++ b/include/linux/kill_once.h
@@ -0,0 +1,28 @@
+#include <linux/wait.h>
+
+/*
+Intended use:
+ rcu_read_lock();
+ p = <....>
+ if (needs_killing(p)) {
+ kill it
+ finished_killing(p);
+ arrange for rcu-delayed freeing
+ } else {
+ we are guaranteed that it is an ex-parrot
+ }
+*/
+
+struct kill_once {
+ wait_queue_head_t wait;
+ int done;
+};
+
+static inline void init_kill_once(struct kill_once *p)
+{
+ init_waitqueue_head(&p->wait);
+ p->done = 0;
+}
+
+bool needs_killing(struct kill_once *);
+void finished_killing(struct kill_once *);
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index de7a416..c404207 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -1,5 +1,5 @@
-obj-y += mutex.o semaphore.o rwsem.o
+obj-y += mutex.o semaphore.o rwsem.o kill_once.o
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
diff --git a/kernel/locking/kill_once.c b/kernel/locking/kill_once.c
new file mode 100644
index 0000000..f59ad4b
--- /dev/null
+++ b/kernel/locking/kill_once.c
@@ -0,0 +1,59 @@
+#include <linux/sched.h>
+#include <linux/kill_once.h>
+
+void finished_killing(struct kill_once *p)
+{
+ spin_lock_irq(&p->wait.lock);
+ p->done = 1;
+ wake_up_locked(&p->wait);
+ spin_unlock_irq(&p->wait.lock);
+}
+
+bool needs_killing(struct kill_once *p)
+{
+ wait_queue_t wait;
+
+ init_wait(&wait);
+ spin_lock_irq(&p->wait.lock);
+ if (likely(!p->done)) {
+ p->done = -1;
+ spin_unlock_irq(&p->wait.lock);
+ rcu_read_unlock();
+ return true;
+ }
+ if (p->done > 0) {
+ spin_unlock_irq(&p->wait.lock);
+ rcu_read_unlock();
+ return false;
+ }
+ __add_wait_queue(&p->wait, &wait);
+ while (1) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock_irq(&p->wait.lock);
+ rcu_read_unlock();
+ schedule();
+ rcu_read_lock();
+ /*
+ * racy, but we are OK with false negatives -
+ * if we observe anything other than an empty
+ * wait.task_list after taking rcu_read_lock(),
+ * we know that RCU grace period started after
+ * pin_remove() couldn't have ended yet and
+ * dereferencing p is still safe.
+ */
+ if (likely(list_empty(&wait.task_list)))
+ break;
+ /*
+ * OK, we know p couldn't have been freed yet and
+ * can take that spinlock safely
+ */
+ spin_lock_irq(&p->wait.lock);
+ /* now we can check p->done */
+ if (p->done > 0) {
+ spin_unlock_irq(&p->wait.lock);
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return false;
+}
next reply other threads:[~2015-04-13 19:05 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-04-13 19:05 Al Viro [this message]
2015-04-13 21:37 ` [RFC] new locking primitive (pulled from fs_pin) Linus Torvalds
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150413190548.GQ889@ZenIV.linux.org.uk \
--to=viro@zeniv.linux.org.uk \
--cc=linux-kernel@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.