From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754549AbbDMTFx (ORCPT ); Mon, 13 Apr 2015 15:05:53 -0400 Received: from zeniv.linux.org.uk ([195.92.253.2]:54227 "EHLO ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751524AbbDMTFv (ORCPT ); Mon, 13 Apr 2015 15:05:51 -0400 Date: Mon, 13 Apr 2015 20:05:48 +0100 From: Al Viro To: Linus Torvalds Cc: linux-kernel@vger.kernel.org Subject: [RFC] new locking primitive (pulled from fs_pin) Message-ID: <20150413190548.GQ889@ZenIV.linux.org.uk> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.21 (2010-09-15) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org New structure. Intended use: embed into an object that will always be freed with RCU delay. Initialize with init_kill_once(&object->kill_once). Use by grabbing rcu_read_lock(), finding the object, then if (needs_killing(&object->kill_once)) { // do whatever actions you want, including // removal of references from the places // where they could be found finished_killing(&object->kill_once); // arrange for RCU-delayed freeing } If several threads attempt that, only the first one will see needs_killing() return true *and* everything else will wait in needs_killing() until the first one is past finished_killing(). Note that they might end up returning after object gets freed - needs_killing() itself is very careful about dereferencing and its caller MUST NOT touch the object after getting false from needs_killing(). needs_killing() must be called with rcu_read_lock() held and drops it in all cases. This thing used to be the locking side of fs/fs_pin.c, but IMO it might make sense to separate it from fs_pin completely - it could be useful elsewhere... Comments (and especially suggestions on better names) are welcome... Signed-off-by: Al Viro --- diff --git a/fs/fs_pin.c b/fs/fs_pin.c index b06c987..b124faf 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c @@ -12,10 +12,7 @@ void pin_remove(struct fs_pin *pin) hlist_del(&pin->m_list); hlist_del(&pin->s_list); spin_unlock(&pin_lock); - spin_lock_irq(&pin->wait.lock); - pin->done = 1; - wake_up_locked(&pin->wait); - spin_unlock_irq(&pin->wait.lock); + finished_killing(&pin->head); } void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p) @@ -34,43 +31,12 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m) void pin_kill(struct fs_pin *p) { - wait_queue_t wait; - if (!p) { rcu_read_unlock(); return; } - init_wait(&wait); - spin_lock_irq(&p->wait.lock); - if (likely(!p->done)) { - p->done = -1; - spin_unlock_irq(&p->wait.lock); - rcu_read_unlock(); + if (needs_killing(&p->head)) p->kill(p); - return; - } - if (p->done > 0) { - spin_unlock_irq(&p->wait.lock); - rcu_read_unlock(); - return; - } - __add_wait_queue(&p->wait, &wait); - while (1) { - set_current_state(TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&p->wait.lock); - rcu_read_unlock(); - schedule(); - rcu_read_lock(); - if (likely(list_empty(&wait.task_list))) - break; - /* OK, we know p couldn't have been freed yet */ - spin_lock_irq(&p->wait.lock); - if (p->done > 0) { - spin_unlock_irq(&p->wait.lock); - break; - } - } - rcu_read_unlock(); } void mnt_pin_kill(struct mount *m) diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h index 9dc4e03..f65daad 100644 --- a/include/linux/fs_pin.h +++ b/include/linux/fs_pin.h @@ -1,8 +1,7 @@ -#include +#include struct fs_pin { - wait_queue_head_t wait; - int done; + struct kill_once head; struct hlist_node s_list; struct hlist_node m_list; void (*kill)(struct fs_pin *); @@ -12,7 +11,7 @@ struct vfsmount; static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *)) { - init_waitqueue_head(&p->wait); + init_kill_once(&p->head); p->kill = kill; } diff --git a/include/linux/kill_once.h b/include/linux/kill_once.h new file mode 100644 index 0000000..03a3717 --- /dev/null +++ b/include/linux/kill_once.h @@ -0,0 +1,28 @@ +#include + +/* +Intended use: + rcu_read_lock(); + p = <....> + if (needs_killing(p)) { + kill it + finished_killing(p); + arrange for rcu-delayed freeing + } else { + we are guaranteed that it is an ex-parrot + } +*/ + +struct kill_once { + wait_queue_head_t wait; + int done; +}; + +static inline void init_kill_once(struct kill_once *p) +{ + init_waitqueue_head(&p->wait); + p->done = 0; +} + +bool needs_killing(struct kill_once *); +void finished_killing(struct kill_once *); diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index de7a416..c404207 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -1,5 +1,5 @@ -obj-y += mutex.o semaphore.o rwsem.o +obj-y += mutex.o semaphore.o rwsem.o kill_once.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) diff --git a/kernel/locking/kill_once.c b/kernel/locking/kill_once.c new file mode 100644 index 0000000..f59ad4b --- /dev/null +++ b/kernel/locking/kill_once.c @@ -0,0 +1,59 @@ +#include +#include + +void finished_killing(struct kill_once *p) +{ + spin_lock_irq(&p->wait.lock); + p->done = 1; + wake_up_locked(&p->wait); + spin_unlock_irq(&p->wait.lock); +} + +bool needs_killing(struct kill_once *p) +{ + wait_queue_t wait; + + init_wait(&wait); + spin_lock_irq(&p->wait.lock); + if (likely(!p->done)) { + p->done = -1; + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + return true; + } + if (p->done > 0) { + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + return false; + } + __add_wait_queue(&p->wait, &wait); + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&p->wait.lock); + rcu_read_unlock(); + schedule(); + rcu_read_lock(); + /* + * racy, but we are OK with false negatives - + * if we observe anything other than an empty + * wait.task_list after taking rcu_read_lock(), + * we know that RCU grace period started after + * pin_remove() couldn't have ended yet and + * dereferencing p is still safe. + */ + if (likely(list_empty(&wait.task_list))) + break; + /* + * OK, we know p couldn't have been freed yet and + * can take that spinlock safely + */ + spin_lock_irq(&p->wait.lock); + /* now we can check p->done */ + if (p->done > 0) { + spin_unlock_irq(&p->wait.lock); + break; + } + } + rcu_read_unlock(); + return false; +}