All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] new locking primitive (pulled from fs_pin)
@ 2015-04-13 19:05 Al Viro
  2015-04-13 21:37 ` Linus Torvalds
  0 siblings, 1 reply; 2+ messages in thread
From: Al Viro @ 2015-04-13 19:05 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel

New structure.  Intended use: embed into an object that will always
be freed with RCU delay.

Initialize with init_kill_once(&object->kill_once).

Use by grabbing rcu_read_lock(), finding the object, then
if (needs_killing(&object->kill_once)) {
	// do whatever actions you want, including
	// removal of references from the places
	// where they could be found
	finished_killing(&object->kill_once);
	// arrange for RCU-delayed freeing
}

If several threads attempt that, only the first one will
see needs_killing() return true *and* everything else will
wait in needs_killing() until the first one is past
finished_killing().  Note that they might end up returning
after object gets freed - needs_killing() itself is very
careful about dereferencing and its caller MUST NOT touch
the object after getting false from needs_killing().

needs_killing() must be called with rcu_read_lock() held and
drops it in all cases.

This thing used to be the locking side of fs/fs_pin.c, but
IMO it might make sense to separate it from fs_pin completely -
it could be useful elsewhere...

Comments (and especially suggestions on better names) are welcome...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index b06c987..b124faf 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -12,10 +12,7 @@ void pin_remove(struct fs_pin *pin)
 	hlist_del(&pin->m_list);
 	hlist_del(&pin->s_list);
 	spin_unlock(&pin_lock);
-	spin_lock_irq(&pin->wait.lock);
-	pin->done = 1;
-	wake_up_locked(&pin->wait);
-	spin_unlock_irq(&pin->wait.lock);
+	finished_killing(&pin->head);
 }
 
 void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p)
@@ -34,43 +31,12 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m)
 
 void pin_kill(struct fs_pin *p)
 {
-	wait_queue_t wait;
-
 	if (!p) {
 		rcu_read_unlock();
 		return;
 	}
-	init_wait(&wait);
-	spin_lock_irq(&p->wait.lock);
-	if (likely(!p->done)) {
-		p->done = -1;
-		spin_unlock_irq(&p->wait.lock);
-		rcu_read_unlock();
+	if (needs_killing(&p->head))
 		p->kill(p);
-		return;
-	}
-	if (p->done > 0) {
-		spin_unlock_irq(&p->wait.lock);
-		rcu_read_unlock();
-		return;
-	}
-	__add_wait_queue(&p->wait, &wait);
-	while (1) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		spin_unlock_irq(&p->wait.lock);
-		rcu_read_unlock();
-		schedule();
-		rcu_read_lock();
-		if (likely(list_empty(&wait.task_list)))
-			break;
-		/* OK, we know p couldn't have been freed yet */
-		spin_lock_irq(&p->wait.lock);
-		if (p->done > 0) {
-			spin_unlock_irq(&p->wait.lock);
-			break;
-		}
-	}
-	rcu_read_unlock();
 }
 
 void mnt_pin_kill(struct mount *m)
diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h
index 9dc4e03..f65daad 100644
--- a/include/linux/fs_pin.h
+++ b/include/linux/fs_pin.h
@@ -1,8 +1,7 @@
-#include <linux/wait.h>
+#include <linux/kill_once.h>
 
 struct fs_pin {
-	wait_queue_head_t	wait;
-	int			done;
+	struct kill_once	head;
 	struct hlist_node	s_list;
 	struct hlist_node	m_list;
 	void (*kill)(struct fs_pin *);
@@ -12,7 +11,7 @@ struct vfsmount;
 
 static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *))
 {
-	init_waitqueue_head(&p->wait);
+	init_kill_once(&p->head);
 	p->kill = kill;
 }
 
diff --git a/include/linux/kill_once.h b/include/linux/kill_once.h
new file mode 100644
index 0000000..03a3717
--- /dev/null
+++ b/include/linux/kill_once.h
@@ -0,0 +1,28 @@
+#include <linux/wait.h>
+
+/*
+Intended use:
+	rcu_read_lock();
+	p = <....>
+	if (needs_killing(p)) {
+		kill it
+		finished_killing(p);
+		arrange for rcu-delayed freeing
+	} else {
+		we are guaranteed that it is an ex-parrot
+	}
+*/
+
+struct kill_once {
+	wait_queue_head_t	wait;
+	int			done;
+};
+
+static inline void init_kill_once(struct kill_once *p)
+{
+	init_waitqueue_head(&p->wait);
+	p->done = 0;
+}
+
+bool needs_killing(struct kill_once *);
+void finished_killing(struct kill_once *);
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index de7a416..c404207 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -1,5 +1,5 @@
 
-obj-y += mutex.o semaphore.o rwsem.o
+obj-y += mutex.o semaphore.o rwsem.o kill_once.o
 
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
diff --git a/kernel/locking/kill_once.c b/kernel/locking/kill_once.c
new file mode 100644
index 0000000..f59ad4b
--- /dev/null
+++ b/kernel/locking/kill_once.c
@@ -0,0 +1,59 @@
+#include <linux/sched.h>
+#include <linux/kill_once.h>
+
+void finished_killing(struct kill_once *p)
+{
+	spin_lock_irq(&p->wait.lock);
+	p->done = 1;
+	wake_up_locked(&p->wait);
+	spin_unlock_irq(&p->wait.lock);
+}
+
+bool needs_killing(struct kill_once *p)
+{
+	wait_queue_t wait;
+
+	init_wait(&wait);
+	spin_lock_irq(&p->wait.lock);
+	if (likely(!p->done)) {
+		p->done = -1;
+		spin_unlock_irq(&p->wait.lock);
+		rcu_read_unlock();
+		return true;
+	}
+	if (p->done > 0) {
+		spin_unlock_irq(&p->wait.lock);
+		rcu_read_unlock();
+		return false;
+	}
+	__add_wait_queue(&p->wait, &wait);
+	while (1) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		spin_unlock_irq(&p->wait.lock);
+		rcu_read_unlock();
+		schedule();
+		rcu_read_lock();
+		/*
+		 * racy, but we are OK with false negatives -
+		 * if we observe anything other than an empty
+		 * wait.task_list after taking rcu_read_lock(),
+		 * we know that RCU grace period started after
+		 * pin_remove() couldn't have ended yet and
+		 * dereferencing p is still safe.
+		 */
+		if (likely(list_empty(&wait.task_list)))
+			break;
+		/*
+		 * OK, we know p couldn't have been freed yet and
+		 * can take that spinlock safely
+		 */
+		spin_lock_irq(&p->wait.lock);
+		/* now we can check p->done */
+		if (p->done > 0) {
+			spin_unlock_irq(&p->wait.lock);
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return false;
+}

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [RFC] new locking primitive (pulled from fs_pin)
  2015-04-13 19:05 [RFC] new locking primitive (pulled from fs_pin) Al Viro
@ 2015-04-13 21:37 ` Linus Torvalds
  0 siblings, 0 replies; 2+ messages in thread
From: Linus Torvalds @ 2015-04-13 21:37 UTC (permalink / raw)
  To: Al Viro; +Cc: Linux Kernel Mailing List

On Mon, Apr 13, 2015 at 12:05 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> This thing used to be the locking side of fs/fs_pin.c, but
> IMO it might make sense to separate it from fs_pin completely -
> it could be useful elsewhere...
>
> Comments (and especially suggestions on better names) are welcome...

I don't mind abstracting it out, but I would prefer that it stay local
to fs_pin.c until there's a clear case for using it anywhere else.

Do you really have any usage cases that are interesting? Because quite
frankly, I'm not a fan of the nasty fs-pin code, and I would *not*
want the ideas in there to bleed out anywhere else.

So my gut feel is that it's a good thing to try to abstract out that
mess, but not a good idea to spread it out.

(I do realize that without the cross-file movement, the patch
basically just boils down to abstracting out that "finished_killing()"
into a helper function and little else)

                     Linus

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-04-13 21:37 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-04-13 19:05 [RFC] new locking primitive (pulled from fs_pin) Al Viro
2015-04-13 21:37 ` Linus Torvalds

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.