* [RFC] Add support for semaphore-like structure with support for asynchronous I/O
@ 2005-03-30 21:51 Trond Myklebust
2005-03-30 22:34 ` Andrew Morton
2005-03-31 8:02 ` Nikita Danilov
0 siblings, 2 replies; 31+ messages in thread
From: Trond Myklebust @ 2005-03-30 21:51 UTC (permalink / raw)
To: linux-kernel; +Cc: Linux Filesystem Development
In NFSv4 we often want to serialize asynchronous RPC calls with ordinary
RPC calls (OPEN and CLOSE for instance). On paper, semaphores would
appear to fit the bill, however there is no support for asynchronous I/O
with semaphores.
<rant>What's more, trying to add that type of support is an exercise in
futility: there are currently 23 slightly different arch-dependent and
over-optimized versions of semaphores (not counting the different
versions of read/write semaphores).</rant>
Anyhow, the following is a simple implementation of semaphores designed
to satisfy the needs of those I/O subsystems that want to support
asynchronous behaviour too. Please comment.
Cheers,
Trond
--------------------------------------
NFS: Add support for iosems.
These act rather like semaphores, but also have support for asynchronous
I/O, using the wait_queue_t callback features.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
include/linux/iosem.h | 63 ++++++++++++++++++++++++++++++
lib/Makefile | 2
lib/iosem.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 167 insertions(+), 1 deletion(-)
Index: linux-2.6.12-rc1/include/linux/iosem.h
===================================================================
--- /dev/null
+++ linux-2.6.12-rc1/include/linux/iosem.h
@@ -0,0 +1,63 @@
+/*
+ * include/linux/iosem.h
+ *
+ * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ *
+ * Definitions for iosems. These can act as mutexes, but unlike
+ * semaphores, their code is 100% arch-independent, and can therefore
+ * easily be expanded in order to provide for things like
+ * asynchronous I/O.
+ */
+
+#ifndef __LINUX_SEM_LOCK_H
+#define __LINUX_SEM_LOCK_H
+
+#ifdef __KERNEL__
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+struct iosem {
+ unsigned long state;
+ wait_queue_head_t wait;
+};
+
+#define IOSEM_LOCK_EXCLUSIVE (24)
+/* #define IOSEM_LOCK_SHARED (25) */
+
+struct iosem_wait {
+ struct iosem *lock;
+ wait_queue_t wait;
+};
+
+struct iosem_work {
+ struct work_struct work;
+ struct iosem_wait waiter;
+};
+
+extern void FASTCALL(iosem_lock(struct iosem *lk));
+extern void FASTCALL(iosem_unlock(struct iosem *lk));
+extern int iosem_lock_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+
+static inline void init_iosem(struct iosem *lk)
+{
+ lk->state = 0;
+ init_waitqueue_head(&lk->wait);
+}
+
+static inline void init_iosem_waiter(struct iosem_wait *waiter)
+{
+ waiter->lock = NULL;
+ init_waitqueue_entry(&waiter->wait, current);
+ INIT_LIST_HEAD(&waiter->wait.task_list);
+}
+
+static inline void init_iosem_work(struct iosem_work *wk, void (*func)(void *), void *data)
+{
+ INIT_WORK(&wk->work, func, data);
+}
+
+extern int FASTCALL(iosem_lock_and_schedule_work(struct iosem *lk, struct iosem_work *wk));
+extern int iosem_lock_and_schedule_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+
+#endif /* __KERNEL__ */
+#endif /* __LINUX_SEM_LOCK_H */
Index: linux-2.6.12-rc1/lib/iosem.c
===================================================================
--- /dev/null
+++ linux-2.6.12-rc1/lib/iosem.c
@@ -0,0 +1,103 @@
+/*
+ * linux/fs/nfs/iosem.c
+ *
+ * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ *
+ * A set of primitives for semaphore-like locks that also support notification
+ * callbacks for waiters.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/iosem.h>
+
+static int fastcall __iosem_lock(struct iosem *lk, struct iosem_wait *waiter)
+{
+ int ret;
+
+ spin_lock(&lk->wait.lock);
+ if (lk->state != 0) {
+ waiter->lock = lk;
+ add_wait_queue_exclusive_locked(&lk->wait, &waiter->wait);
+ ret = -EINPROGRESS;
+ } else {
+ lk->state |= 1 << IOSEM_LOCK_EXCLUSIVE;
+ ret = 0;
+ }
+ spin_unlock(&lk->wait.lock);
+ return ret;
+}
+
+void fastcall iosem_unlock(struct iosem *lk)
+{
+ spin_lock(&lk->wait.lock);
+ lk->state &= ~(1 << IOSEM_LOCK_EXCLUSIVE);
+ wake_up_locked(&lk->wait);
+ spin_unlock(&lk->wait.lock);
+}
+EXPORT_SYMBOL(iosem_unlock);
+
+int iosem_lock_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+ struct iosem_wait *waiter = container_of(wait, struct iosem_wait, wait);
+ unsigned long *lk_state = &waiter->lock->state;
+ int ret = 0;
+
+ if (*lk_state == 0) {
+ ret = default_wake_function(wait, mode, sync, key);
+ if (ret) {
+ *lk_state |= 1 << IOSEM_LOCK_EXCLUSIVE;
+ list_del_init(&wait->task_list);
+ }
+ }
+ return ret;
+}
+
+void fastcall iosem_lock(struct iosem *lk)
+{
+ struct iosem_wait waiter;
+
+ might_sleep();
+
+ init_iosem_waiter(&waiter);
+ waiter.wait.func = iosem_lock_wake_function;
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (__iosem_lock(lk, &waiter))
+ schedule();
+ __set_current_state(TASK_RUNNING);
+
+ BUG_ON(!list_empty(&waiter.wait.task_list));
+}
+EXPORT_SYMBOL(iosem_lock);
+
+int iosem_lock_and_schedule_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+ struct iosem_wait *waiter = container_of(wait, struct iosem_wait, wait);
+ struct iosem_work *wk = container_of(waiter, struct iosem_work, waiter);
+ unsigned long *lk_state = &waiter->lock->state;
+ int ret = 0;
+
+ if (*lk_state == 0) {
+ ret = schedule_work(&wk->work);
+ if (ret) {
+ *lk_state |= 1 << IOSEM_LOCK_EXCLUSIVE;
+ list_del_init(&wait->task_list);
+ }
+ }
+ return ret;
+}
+
+int fastcall iosem_lock_and_schedule_work(struct iosem *lk, struct iosem_work *wk)
+{
+ int ret;
+
+ init_iosem_waiter(&wk->waiter);
+ wk->waiter.wait.func = iosem_lock_and_schedule_function;
+ ret = __iosem_lock(lk, &wk->waiter);
+ if (ret == 0)
+ ret = schedule_work(&wk->work);
+ return ret;
+}
+EXPORT_SYMBOL(iosem_lock_and_schedule_work);
Index: linux-2.6.12-rc1/lib/Makefile
===================================================================
--- linux-2.6.12-rc1.orig/lib/Makefile
+++ linux-2.6.12-rc1/lib/Makefile
@@ -8,7 +8,7 @@ lib-y := errno.o ctype.o string.o vsprin
bitmap.o extable.o kobject_uevent.o prio_tree.o sha1.o \
halfmd4.o
-obj-y += sort.o parser.o
+obj-y += sort.o parser.o iosem.o
ifeq ($(CONFIG_DEBUG_KOBJECT),y)
CFLAGS_kobject.o += -DDEBUG
--
Trond Myklebust <trond.myklebust@fys.uio.no>
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-03-30 21:51 [RFC] Add support for semaphore-like structure with support for asynchronous I/O Trond Myklebust @ 2005-03-30 22:34 ` Andrew Morton 2005-03-30 23:17 ` Trond Myklebust 2005-03-31 8:02 ` Nikita Danilov 1 sibling, 1 reply; 31+ messages in thread From: Andrew Morton @ 2005-03-30 22:34 UTC (permalink / raw) To: Trond Myklebust; +Cc: linux-kernel, linux-fsdevel Trond Myklebust <trond.myklebust@fys.uio.no> wrote: > > In NFSv4 we often want to serialize asynchronous RPC calls with ordinary > RPC calls (OPEN and CLOSE for instance). On paper, semaphores would > appear to fit the bill, however there is no support for asynchronous I/O > with semaphores. > <rant>What's more, trying to add that type of support is an exercise in > futility: there are currently 23 slightly different arch-dependent and > over-optimized versions of semaphores (not counting the different > versions of read/write semaphores).</rant> Yeah. > Anyhow, the following is a simple implementation of semaphores designed > to satisfy the needs of those I/O subsystems that want to support > asynchronous behaviour too. Please comment. > So I've been staring at this code for a while and I Just Don't Get It. If I want some custom callback function to be called when someone does an iosem_unlock(), how do I do it? Or have I misunderstood the intent? Some /* comments */ would be appropriate.. > +struct iosem { > + unsigned long state; > + wait_queue_head_t wait; > +}; > + > +#define IOSEM_LOCK_EXCLUSIVE (24) > +/* #define IOSEM_LOCK_SHARED (25) */ > + > +struct iosem_wait { > + struct iosem *lock; > + wait_queue_t wait; > +}; > + > +struct iosem_work { > + struct work_struct work; > + struct iosem_wait waiter; > +}; Commenting the data structures is particularly helpful. > +extern void FASTCALL(iosem_lock(struct iosem *lk)); > +extern void FASTCALL(iosem_unlock(struct iosem *lk)); > +extern int iosem_lock_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); > + > +static inline void init_iosem(struct iosem *lk) > +{ > + lk->state = 0; > + init_waitqueue_head(&lk->wait); > +} > + > +static inline void init_iosem_waiter(struct iosem_wait *waiter) > +{ > + waiter->lock = NULL; > + init_waitqueue_entry(&waiter->wait, current); > + INIT_LIST_HEAD(&waiter->wait.task_list); > +} > + > +static inline void init_iosem_work(struct iosem_work *wk, void (*func)(void *), void *data) > +{ > + INIT_WORK(&wk->work, func, data); > +} I'd be inclined to call these iosem_init, iosem_waiter_init and iosem_work_init. > --- /dev/null > +++ linux-2.6.12-rc1/lib/iosem.c > @@ -0,0 +1,103 @@ > +/* > + * linux/fs/nfs/iosem.c This filename is stale. > + spin_lock(&lk->wait.lock); I wonder if this lock should be irq-safe everywhere. Is it not possible that someone might want to do an unlock from irq context? > + if (lk->state != 0) { > + waiter->lock = lk; > + add_wait_queue_exclusive_locked(&lk->wait, &waiter->wait); > + ret = -EINPROGRESS; > + } else { > + lk->state |= 1 << IOSEM_LOCK_EXCLUSIVE; > + ret = 0; > + } > + spin_unlock(&lk->wait.lock); > + return ret; > +} Again, some commentary would be needed to help the poor reader understand what a -EINPROGRESS return means. > + struct iosem_wait waiter; > + > + might_sleep(); > + > + init_iosem_waiter(&waiter); > + waiter.wait.func = iosem_lock_wake_function; > + > + set_current_state(TASK_UNINTERRUPTIBLE); > + if (__iosem_lock(lk, &waiter)) > + schedule(); > + __set_current_state(TASK_RUNNING); > + > + BUG_ON(!list_empty(&waiter.wait.task_list)); > +} Is this BUG_ON() safe? No locks are held, so couldn't another object get added by some other thread of control? > +int iosem_lock_and_schedule_function(wait_queue_t *wait, unsigned mode, int sync, void *key) > +{ > + struct iosem_wait *waiter = container_of(wait, struct iosem_wait, wait); > + struct iosem_work *wk = container_of(waiter, struct iosem_work, waiter); > + unsigned long *lk_state = &waiter->lock->state; > + int ret = 0; > + > + if (*lk_state == 0) { > + ret = schedule_work(&wk->work); > + if (ret) { > + *lk_state |= 1 << IOSEM_LOCK_EXCLUSIVE; > + list_del_init(&wait->task_list); > + } > + } > + return ret; > +} Again, I don't understand why this function was created. I think it means that there are restrictions upon what keventd can do with iosems, to avoid deadlocking. If correct, they should be spelled out. > +int fastcall iosem_lock_and_schedule_work(struct iosem *lk, struct iosem_work *wk) > +{ > + int ret; > + > + init_iosem_waiter(&wk->waiter); > + wk->waiter.wait.func = iosem_lock_and_schedule_function; > + ret = __iosem_lock(lk, &wk->waiter); > + if (ret == 0) > + ret = schedule_work(&wk->work); > + return ret; > +} > +EXPORT_SYMBOL(iosem_lock_and_schedule_work); Ditto. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-03-30 22:34 ` Andrew Morton @ 2005-03-30 23:17 ` Trond Myklebust 2005-03-30 23:44 ` Andrew Morton 2005-03-31 22:53 ` Trond Myklebust 0 siblings, 2 replies; 31+ messages in thread From: Trond Myklebust @ 2005-03-30 23:17 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel, Linux Filesystem Development on den 30.03.2005 Klokka 14:34 (-0800) skreiv Andrew Morton: > > Anyhow, the following is a simple implementation of semaphores designed > > to satisfy the needs of those I/O subsystems that want to support > > asynchronous behaviour too. Please comment. > > > > So I've been staring at this code for a while and I Just Don't Get It. If > I want some custom callback function to be called when someone does an > iosem_unlock(), how do I do it? I haven't added support for arbitrary callback functions. It is quite possible to expand the interfaces to do so should someone need that functionality, however my current needs only dictate that I be able to grant the iosem token to a workqueue item, then schedule that work for execution by keventd. This is required in order to allow threads such as rpciod or keventd itself (for which sleeping may cause deadlocks) to ask the iosem manager code to simply queue the work that need to run once the iosem has been granted. That work function is then, of course, responsible for releasing the iosem when it is done. > Or have I misunderstood the intent? Some /* comments */ would be appropriate.. Will do. Cheers, Trond -- Trond Myklebust <trond.myklebust@fys.uio.no> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-03-30 23:17 ` Trond Myklebust @ 2005-03-30 23:44 ` Andrew Morton 2005-03-31 0:02 ` Trond Myklebust 2005-03-31 22:53 ` Trond Myklebust 1 sibling, 1 reply; 31+ messages in thread From: Andrew Morton @ 2005-03-30 23:44 UTC (permalink / raw) To: Trond Myklebust; +Cc: linux-kernel, linux-fsdevel Trond Myklebust <trond.myklebust@fys.uio.no> wrote: > > This is required in order to allow threads such as rpciod or keventd > itself (for which sleeping may cause deadlocks) to ask the iosem manager > code to simply queue the work that need to run once the iosem has been > granted. That work function is then, of course, responsible for > releasing the iosem when it is done. I see. I think. Should we be using those aio/N threads for this? They don't seem to do much else... ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-03-30 23:44 ` Andrew Morton @ 2005-03-31 0:02 ` Trond Myklebust 0 siblings, 0 replies; 31+ messages in thread From: Trond Myklebust @ 2005-03-31 0:02 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel, Linux Filesystem Development on den 30.03.2005 Klokka 15:44 (-0800) skreiv Andrew Morton: > Trond Myklebust <trond.myklebust@fys.uio.no> wrote: > > > > This is required in order to allow threads such as rpciod or keventd > > itself (for which sleeping may cause deadlocks) to ask the iosem manager > > code to simply queue the work that need to run once the iosem has been > > granted. That work function is then, of course, responsible for > > releasing the iosem when it is done. > > I see. I think. Should we be using those aio/N threads for this? They > don't seem to do much else... That would be quite OK by me if nobody objects. Cheers, Trond -- Trond Myklebust <trond.myklebust@fys.uio.no> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-03-30 23:17 ` Trond Myklebust 2005-03-30 23:44 ` Andrew Morton @ 2005-03-31 22:53 ` Trond Myklebust 2005-04-01 0:13 ` Andrew Morton 1 sibling, 1 reply; 31+ messages in thread From: Trond Myklebust @ 2005-03-31 22:53 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel, Linux Filesystem Development on den 30.03.2005 Klokka 18:17 (-0500) skreiv Trond Myklebust: > > Or have I misunderstood the intent? Some /* comments */ would be appropriate.. > > Will do. OK. Plenty of comments added that will hopefully clarify what is going on and how to use the API. Also some cleanups of the code. I haven't changed the name "iosem" as Nikita suggested. That's not because I'm opposed to doing so, but I haven't yet heard something that I like. Suggestions welcome... Cheers, Trond -------------- NFS: Add support for iosems. These act rather like semaphores, but also have support for asynchronous I/O, using the wait_queue_t callback features. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- include/linux/iosem.h | 110 +++++++++++++++++++++++++++++++ lib/Makefile | 2 lib/iosem.c | 177 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 288 insertions(+), 1 deletion(-) Index: linux-2.6.12-rc1/include/linux/iosem.h =================================================================== --- /dev/null +++ linux-2.6.12-rc1/include/linux/iosem.h @@ -0,0 +1,110 @@ +/* + * include/linux/iosem.h + * + * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com> + * + * Definitions for iosems. These can act as mutexes, but unlike + * semaphores, their code is 100% arch-independent, and can therefore + * easily be expanded in order to provide for things like + * asynchronous I/O. + */ + +#ifndef __LINUX_SEM_LOCK_H +#define __LINUX_SEM_LOCK_H + +#ifdef __KERNEL__ +#include <linux/wait.h> +#include <linux/workqueue.h> + +/* + * struct iosem: iosem mutex + * state: bitmask - currently only signals whether or not an exclusive + * lock has been taken + * wait: FIFO wait queue + */ +struct iosem { +#define IOSEM_LOCK_EXCLUSIVE (31) +/* #define IOSEM_LOCK_SHARED (30) */ + unsigned long state; + wait_queue_head_t wait; +}; + + + +/* + * struct iosem_wait: acts as a request for a lock on the iosem + * lock: backpointer to the iosem + * wait: wait queue entry. note that the callback function + * defines what to do when the lock has been granted + */ +struct iosem_wait { + struct iosem *lock; + wait_queue_t wait; +}; + +/* + * struct iosem_work: used by asynchronous waiters. + * + * work: work to schedule once the iosem has been granted. The + * function containing the critical code that needs to + * run under the protection of the lock should be placed here. + * The same function is responsible for calling iosem_unlock() + * when done. + * waiter: iosem waitqueue entry + */ +struct iosem_work { + struct work_struct work; + struct iosem_wait waiter; +}; + +/* + * Functions for synchronous i/o + */ + +/* Synchronously grab an iosem. + * These functions act in pretty much the same way down()/up() + * do for semaphores. + */ +extern void FASTCALL(iosem_lock(struct iosem *lk)); +extern void FASTCALL(iosem_unlock(struct iosem *lk)); + +/* + * Callback function to wake up the sleeping task once + * it has been granted an exclusive lock + */ +extern int iosem_lock_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); + +/* Initialize a struct iosem in the "unlocked" state */ +static inline void iosem_init(struct iosem *lk) +{ + lk->state = 0; + init_waitqueue_head(&lk->wait); +} + +/* Initializes a lock request */ +static inline void iosem_waiter_init(struct iosem_wait *waiter) +{ + waiter->lock = NULL; + init_waitqueue_entry(&waiter->wait, current); + INIT_LIST_HEAD(&waiter->wait.task_list); +} + +/* + * Functions for asynchronous I/O. + */ + +/* Requests an exclusive lock on the iosem on behalf of a workqueue entry "wk". + * Schedule wk->work for execution as soon as the lock is granted. */ +extern int FASTCALL(iosem_lock_and_schedule_work(struct iosem *lk, struct iosem_work *wk)); + +/* Waitqueue notifier that schedules work once the exclusive lock has + * been granted */ +extern int iosem_lock_and_schedule_function(wait_queue_t *wait, unsigned mode, int sync, void *key); + +static inline void iosem_work_init(struct iosem_work *wk, void (*func)(void *), void *data) +{ + INIT_WORK(&wk->work, func, data); +} + +#endif /* __KERNEL__ */ +#endif /* __LINUX_SEM_LOCK_H */ Index: linux-2.6.12-rc1/lib/iosem.c =================================================================== --- /dev/null +++ linux-2.6.12-rc1/lib/iosem.c @@ -0,0 +1,177 @@ +/* + * linux/lib/iosem.c + * + * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com> + * + * A set of primitives for semaphore-like locks that also support notification + * callbacks for waiters. + */ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/iosem.h> + +/* + * Common function for requesting an exclusive lock on an iosem + * + * Note: should be called while holding the non-irqsafe spinlock + * lk->wait.lock. The spinlock is non-irqsafe as we have no reason (yet) to + * expect anyone to take/release iosems from within an interrupt + * context (and 'cos it is a _bug_ to attempt to wake up the waitqueue + * lk->wait using anything other than iosem_unlock()). + */ +static inline int __iosem_lock(struct iosem *lk, struct iosem_wait *waiter) +{ + int ret; + + if (lk->state != 0) { + /* The lock cannot be immediately granted: queue waiter */ + waiter->lock = lk; + add_wait_queue_exclusive_locked(&lk->wait, &waiter->wait); + ret = -EINPROGRESS; + } else { + lk->state |= 1 << IOSEM_LOCK_EXCLUSIVE; + ret = 0; + } + return ret; +} + +/** + * iosem_unlock - release an exclusive lock + * @iosem - the iosem on which we hold an exclusive lock + */ +void fastcall iosem_unlock(struct iosem *lk) +{ + spin_lock(&lk->wait.lock); + lk->state &= ~(1 << IOSEM_LOCK_EXCLUSIVE); + wake_up_locked(&lk->wait); + spin_unlock(&lk->wait.lock); +} +EXPORT_SYMBOL(iosem_unlock); + +/** + * iosem_lock_wake_function - take an exclusive lock and wake up sleeping task + * @wait: waitqueue entry. Must be part of an initialized struct iosem_wait + * @mode: + * @sync: + * @key: + * + * Standard wait_queue_func_t callback function used by iosem_lock(). When + * called, it will attempt to wake up the sleeping task, and set an + * exclusive lock on the iosem. + * On success, @wait is automatically removed from the iosem's waitqueue, + * and a non-zero value is returned. + * + * This function will in practice *always* be called from within iosem_unlock() + */ +int iosem_lock_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct iosem_wait *waiter = container_of(wait, struct iosem_wait, wait); + unsigned long *lk_state = &waiter->lock->state; + int ret = 0; + + if (*lk_state == 0) { + ret = default_wake_function(wait, mode, sync, key); + if (ret) { + *lk_state |= 1 << IOSEM_LOCK_EXCLUSIVE; + list_del_init(&wait->task_list); + } + } + return ret; +} + +/** + * iosem_lock - synchronously take an exclusive lock + * @iosem - the iosem to take an exclusive lock + * + * If the exclusive lock cannot be immediately granted, put the current task + * to uninterruptible sleep until it can. + */ +void fastcall iosem_lock(struct iosem *lk) +{ + struct iosem_wait waiter; + + might_sleep(); + + iosem_waiter_init(&waiter); + waiter.wait.func = iosem_lock_wake_function; + + spin_lock(&lk->wait.lock); + if (__iosem_lock(lk, &waiter) != 0) { + /* Must wait for lock... */ + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (list_empty(&waiter.wait.task_list)) + break; + spin_unlock(&lk->wait.lock); + schedule(); + spin_lock(&lk->wait.lock); + } + __set_current_state(TASK_RUNNING); + } + spin_unlock(&lk->wait.lock); +} +EXPORT_SYMBOL(iosem_lock); + +/** + * iosem_lock_and_schedule_function - take an exclusive lock and schedule work + * @wait: waitqueue entry. Must be part of an initialized struct iosem_work + * @mode: unused + * @sync: unused + * @key: unused + * + * Standard wait_queue_func_t callback function used by + * iosem_lock_and_schedule_work. When called, it will attempt to queue the + * work function and set the exclusive lock on the iosem. + * On success, @wait is removed from the iosem's waitqueue, and a non-zero + * value is returned. + * + * This function will in practice *always* be called from within iosem_unlock() + */ +int iosem_lock_and_schedule_function(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct iosem_wait *waiter = container_of(wait, struct iosem_wait, wait); + struct iosem_work *wk = container_of(waiter, struct iosem_work, waiter); + unsigned long *lk_state = &waiter->lock->state; + int ret = 0; + + if (*lk_state == 0) { + ret = schedule_work(&wk->work); + if (ret) { + *lk_state |= 1 << IOSEM_LOCK_EXCLUSIVE; + list_del_init(&wait->task_list); + } + } + return ret; +} + +/** + * iosem_lock_and_schedule_work - request an exclusive lock and schedule work + * @lk: pointer to iosem + * @wk: pointer to iosem_work + * + * Request an exclusive lock on the iosem. If the lock cannot be immediately + * granted, place wk->waiter on the iosem's waitqueue, and return, else + * immediately queue the work function wk->work. + * + * Once the exclusive lock has been granted, the work function described by + * wk->work is queued in keventd. It is then the responsibility of that work + * function to release the exclusive lock once it has been granted. + * + * returns -EINPROGRESS if the lock could not be immediately granted. + */ +int fastcall iosem_lock_and_schedule_work(struct iosem *lk, struct iosem_work *wk) +{ + int ret; + + iosem_waiter_init(&wk->waiter); + wk->waiter.wait.func = iosem_lock_and_schedule_function; + spin_lock(&lk->wait.lock); + ret = __iosem_lock(lk, &wk->waiter); + spin_unlock(&lk->wait.lock); + if (ret == 0) + ret = schedule_work(&wk->work); + return ret; +} +EXPORT_SYMBOL(iosem_lock_and_schedule_work); Index: linux-2.6.12-rc1/lib/Makefile =================================================================== --- linux-2.6.12-rc1.orig/lib/Makefile +++ linux-2.6.12-rc1/lib/Makefile @@ -8,7 +8,7 @@ lib-y := errno.o ctype.o string.o vsprin bitmap.o extable.o kobject_uevent.o prio_tree.o sha1.o \ halfmd4.o -obj-y += sort.o parser.o +obj-y += sort.o parser.o iosem.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG -- Trond Myklebust <trondmy@trondhjem.org> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-03-31 22:53 ` Trond Myklebust @ 2005-04-01 0:13 ` Andrew Morton 2005-04-01 1:22 ` Trond Myklebust 0 siblings, 1 reply; 31+ messages in thread From: Andrew Morton @ 2005-04-01 0:13 UTC (permalink / raw) To: Trond Myklebust; +Cc: linux-kernel, linux-fsdevel Trond Myklebust <trondmy@trondhjem.org> wrote: > > on den 30.03.2005 Klokka 18:17 (-0500) skreiv Trond Myklebust: > > > Or have I misunderstood the intent? Some /* comments */ would be appropriate.. > > > > Will do. > > OK. Plenty of comments added that will hopefully clarify what is going > on and how to use the API. Also some cleanups of the code. Ah, so that's what it does ;) I guess once we have a caller in-tree we could merge this. I wonder if there's other existing code which should be converted to iosems. You chose to not use the aio kernel threads? Does iosem_lock_and_schedule_function() need locking? It nonatomically alters *lk_state. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-01 0:13 ` Andrew Morton @ 2005-04-01 1:22 ` Trond Myklebust 2005-04-01 14:12 ` Suparna Bhattacharya 0 siblings, 1 reply; 31+ messages in thread From: Trond Myklebust @ 2005-04-01 1:22 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel, Linux Filesystem Development to den 31.03.2005 Klokka 16:13 (-0800) skreiv Andrew Morton: > Trond Myklebust <trondmy@trondhjem.org> wrote: > > > > on den 30.03.2005 Klokka 18:17 (-0500) skreiv Trond Myklebust: > > > > Or have I misunderstood the intent? Some /* comments */ would be appropriate.. > > > > > > Will do. > > > > OK. Plenty of comments added that will hopefully clarify what is going > > on and how to use the API. Also some cleanups of the code. > > Ah, so that's what it does ;) > > I guess once we have a caller in-tree we could merge this. I wonder if > there's other existing code which should be converted to iosems. I can put it into the NFS client stream which feeds into the -mm kernel. That will enable me to queue up the NFSv4 patches that depend on it too... > You chose to not use the aio kernel threads? I thought I'd do that in a separate patch since the aio workqueue is currently statically defined in aio.c. > Does iosem_lock_and_schedule_function() need locking? It nonatomically > alters *lk_state. iosem_lock_and_schedule_function() will always be called with the iosem->wait.lock held, since it is a waitqueue notification function. In practice it is called by iosem_unlock(). The call to wake_up_locked() will trigger a call to __wake_up_common() which again tries the notification function of each waiter on the queue until it finds one that succeeds. Cheers, Trond -- Trond Myklebust <trond.myklebust@fys.uio.no> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-01 1:22 ` Trond Myklebust @ 2005-04-01 14:12 ` Suparna Bhattacharya 2005-04-04 15:52 ` Suparna Bhattacharya 0 siblings, 1 reply; 31+ messages in thread From: Suparna Bhattacharya @ 2005-04-01 14:12 UTC (permalink / raw) To: Trond Myklebust; +Cc: Andrew Morton, linux-kernel, Linux Filesystem Development On Thu, Mar 31, 2005 at 08:22:17PM -0500, Trond Myklebust wrote: > to den 31.03.2005 Klokka 16:13 (-0800) skreiv Andrew Morton: > > Trond Myklebust <trondmy@trondhjem.org> wrote: > > > > > > on den 30.03.2005 Klokka 18:17 (-0500) skreiv Trond Myklebust: > > > > > Or have I misunderstood the intent? Some /* comments */ would be appropriate.. > > > > > > > > Will do. > > > > > > OK. Plenty of comments added that will hopefully clarify what is going > > > on and how to use the API. Also some cleanups of the code. > > > > Ah, so that's what it does ;) > > > > I guess once we have a caller in-tree we could merge this. I wonder if > > there's other existing code which should be converted to iosems. > > I can put it into the NFS client stream which feeds into the -mm kernel. > That will enable me to queue up the NFSv4 patches that depend on it > too... > > > You chose to not use the aio kernel threads? > > I thought I'd do that in a separate patch since the aio workqueue is > currently statically defined in aio.c. I'll take a look at the patch over the weekend. I had a patch for aio semaphores a long while back, but I need to spend some time to understand how different this is. Regards Suparna > > > Does iosem_lock_and_schedule_function() need locking? It nonatomically > > alters *lk_state. > > iosem_lock_and_schedule_function() will always be called with the > iosem->wait.lock held, since it is a waitqueue notification function. > > In practice it is called by iosem_unlock(). The call to wake_up_locked() > will trigger a call to __wake_up_common() which again tries the > notification function of each waiter on the queue until it finds one > that succeeds. > > Cheers, > Trond > > -- > Trond Myklebust <trond.myklebust@fys.uio.no> > > - > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Suparna Bhattacharya (suparna@in.ibm.com) Linux Technology Center IBM Software Lab, India ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-01 14:12 ` Suparna Bhattacharya @ 2005-04-04 15:52 ` Suparna Bhattacharya 2005-04-04 16:22 ` Benjamin LaHaise 2005-04-04 16:39 ` Trond Myklebust 0 siblings, 2 replies; 31+ messages in thread From: Suparna Bhattacharya @ 2005-04-04 15:52 UTC (permalink / raw) To: Trond Myklebust Cc: Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio [cc'ing linux-aio] The way I did this for semaphores was to modify the down routines to accept an explicit wait queue entry parameter, so the caller could set it up to be a synchronous waiter or a callback as appropriate. Of course, in the AIO case, the callback was setup to restart the iocb operation to continue rest of its processing, which was when it acquired the lock. So, the callback in itself didn't need to remember the lock. The only grey area that I didn't resolve to satisfaction And oh yes, even though this was a really small change, having to update and verify those umpteen different arch specific pieces was not entirely an exciting prospect - I got as far as x86, x86_64 and ppc64 ... but stopped after that. It so happened that i_sem contention didn't show up as a major latency issue even without async semaphores in the kinds of workloads we were working with then. Your approach looks reasonable and simple enough. It'd be useful if I could visualize the caller side of things as in the NFS client stream as you mention - do you plan to post that soon ? I'm tempted to think about the possibility of using your iosems with retry-based AIO. Regards Suparna On Fri, Apr 01, 2005 at 07:42:25PM +0530, Suparna Bhattacharya wrote: > On Thu, Mar 31, 2005 at 08:22:17PM -0500, Trond Myklebust wrote: > > to den 31.03.2005 Klokka 16:13 (-0800) skreiv Andrew Morton: > > > Trond Myklebust <trondmy@trondhjem.org> wrote: > > > > > > > > on den 30.03.2005 Klokka 18:17 (-0500) skreiv Trond Myklebust: > > > > > > Or have I misunderstood the intent? Some /* comments */ would be appropriate.. > > > > > > > > > > Will do. > > > > > > > > OK. Plenty of comments added that will hopefully clarify what is going > > > > on and how to use the API. Also some cleanups of the code. > > > > > > Ah, so that's what it does ;) > > > > > > I guess once we have a caller in-tree we could merge this. I wonder if > > > there's other existing code which should be converted to iosems. > > > > I can put it into the NFS client stream which feeds into the -mm kernel. > > That will enable me to queue up the NFSv4 patches that depend on it > > too... > > > > > You chose to not use the aio kernel threads? > > > > I thought I'd do that in a separate patch since the aio workqueue is > > currently statically defined in aio.c. > > I'll take a look at the patch over the weekend. I had a patch > for aio semaphores a long while back, but I need to spend some time > to understand how different this is. > > Regards > Suparna > > > > > > Does iosem_lock_and_schedule_function() need locking? It nonatomically > > > alters *lk_state. > > > > iosem_lock_and_schedule_function() will always be called with the > > iosem->wait.lock held, since it is a waitqueue notification function. > > > > In practice it is called by iosem_unlock(). The call to wake_up_locked() > > will trigger a call to __wake_up_common() which again tries the > > notification function of each waiter on the queue until it finds one > > that succeeds. > > > > Cheers, > > Trond > > > > -- > > Trond Myklebust <trond.myklebust@fys.uio.no> > > > > - > > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- > Suparna Bhattacharya (suparna@in.ibm.com) > Linux Technology Center > IBM Software Lab, India > > - > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Suparna Bhattacharya (suparna@in.ibm.com) Linux Technology Center IBM Software Lab, India -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-04 15:52 ` Suparna Bhattacharya @ 2005-04-04 16:22 ` Benjamin LaHaise 2005-04-04 17:56 ` Trond Myklebust 2005-04-04 16:39 ` Trond Myklebust 1 sibling, 1 reply; 31+ messages in thread From: Benjamin LaHaise @ 2005-04-04 16:22 UTC (permalink / raw) To: Suparna Bhattacharya Cc: Trond Myklebust, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio On Mon, Apr 04, 2005 at 09:22:45PM +0530, Suparna Bhattacharya wrote: > And oh yes, even though this was a really small change, having to > update and verify those umpteen different arch specific pieces was not > entirely an exciting prospect - I got as far as x86, x86_64 and ppc64 > ... but stopped after that. It so happened that i_sem contention > didn't show up as a major latency issue even without async semaphores > in the kinds of workloads we were working with then. There are workloads where it does make a difference, like in audio processing. > Your approach looks reasonable and simple enough. It'd be useful if I > could visualize the caller side of things as in the NFS client stream > as you mention - do you plan to post that soon ? > I'm tempted to think about the possibility of using your iosems > with retry-based AIO. I'm not sure I see the point in adding yet another type of lock to the kernel that has different infrastructure. This will end up with much more code changing to adapt to the new iosems, rather than just using a new primative on the existing semaphores. -ben -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-04 16:22 ` Benjamin LaHaise @ 2005-04-04 17:56 ` Trond Myklebust 2005-04-05 15:46 ` Benjamin LaHaise 0 siblings, 1 reply; 31+ messages in thread From: Trond Myklebust @ 2005-04-04 17:56 UTC (permalink / raw) To: Benjamin LaHaise Cc: Suparna Bhattacharya, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio må den 04.04.2005 Klokka 12:22 (-0400) skreiv Benjamin LaHaise: > > Your approach looks reasonable and simple enough. It'd be useful if I > > could visualize the caller side of things as in the NFS client stream > > as you mention - do you plan to post that soon ? > > I'm tempted to think about the possibility of using your iosems > > with retry-based AIO. > > I'm not sure I see the point in adding yet another type of lock to the > kernel that has different infrastructure. This will end up with much > more code changing to adapt to the new iosems, rather than just using a > new primative on the existing semaphores. The point is to avoid having to develop and test 23 different and highly arch-dependent (and hence non-maintainable) versions of the same code. In particular note that many of those architectures appear to be heavily over-optimized to directly inline assembly code as well as to use non-standard calling conventions. They all also manage somehow to differ w.r.t. spinlocking conventions and even w.r.t. the internal structures and algorithms used. IOW: the current semaphore implementations really all need to die, and be replaced by a single generic version to which it is actually practical to add new functionality. Failing that, it is _much_ easier to convert the generic code that needs to support aio to use a new locking implementation and then test that. It is not as if conversion to aio won't involve changes to the code in the area surrounding those locks anyway. Cheers, Trond -- Trond Myklebust <trond.myklebust@fys.uio.no> -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-04 17:56 ` Trond Myklebust @ 2005-04-05 15:46 ` Benjamin LaHaise 2005-04-06 1:20 ` Trond Myklebust ` (2 more replies) 0 siblings, 3 replies; 31+ messages in thread From: Benjamin LaHaise @ 2005-04-05 15:46 UTC (permalink / raw) To: Trond Myklebust Cc: Suparna Bhattacharya, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio On Mon, Apr 04, 2005 at 01:56:35PM -0400, Trond Myklebust wrote: > IOW: the current semaphore implementations really all need to die, and > be replaced by a single generic version to which it is actually > practical to add new functionality. I can see that goal, but I don't think introducing iosems is the right way to acheive it. Instead (and I'll start tackling this), how about factoring out the existing semaphore implementations to use a common lib/semaphore.c, much like lib/rwsem.c? The iosems can be used as a basis for the implementation, but we can avoid having to do a giant s/semaphore/iosem/g over the kernel tree. > Failing that, it is _much_ easier to convert the generic code that needs > to support aio to use a new locking implementation and then test that. > It is not as if conversion to aio won't involve changes to the code in > the area surrounding those locks anyway. Quite true. There's a lot more work to do in this area, and these common primatives are needed to make progress. Someone at netapp sent me an email yesterday asking about aio support in NFS, so there is some demand out there. Cheers, -ben -- "Time is what keeps everything from happening all at once." -- John Wheeler -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-05 15:46 ` Benjamin LaHaise @ 2005-04-06 1:20 ` Trond Myklebust 2005-04-06 5:17 ` Bill Huey 2005-04-06 5:01 ` Suparna Bhattacharya 2005-04-07 11:43 ` Christoph Hellwig 2 siblings, 1 reply; 31+ messages in thread From: Trond Myklebust @ 2005-04-06 1:20 UTC (permalink / raw) To: Benjamin LaHaise Cc: Suparna Bhattacharya, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio ty den 05.04.2005 Klokka 11:46 (-0400) skreiv Benjamin LaHaise: > I can see that goal, but I don't think introducing iosems is the right > way to acheive it. Instead (and I'll start tackling this), how about > factoring out the existing semaphore implementations to use a common > lib/semaphore.c, much like lib/rwsem.c? The iosems can be used as a > basis for the implementation, but we can avoid having to do a giant > s/semaphore/iosem/g over the kernel tree. If you're willing to take this on then you have my full support and I'd be happy to lend a hand. Cheers, Trond -- Trond Myklebust <trond.myklebust@fys.uio.no> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-06 1:20 ` Trond Myklebust @ 2005-04-06 5:17 ` Bill Huey 0 siblings, 0 replies; 31+ messages in thread From: Bill Huey @ 2005-04-06 5:17 UTC (permalink / raw) To: Trond Myklebust Cc: Benjamin LaHaise, Suparna Bhattacharya, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio On Tue, Apr 05, 2005 at 09:20:57PM -0400, Trond Myklebust wrote: > ty den 05.04.2005 Klokka 11:46 (-0400) skreiv Benjamin LaHaise: > > > I can see that goal, but I don't think introducing iosems is the right > > way to acheive it. Instead (and I'll start tackling this), how about > > factoring out the existing semaphore implementations to use a common > > lib/semaphore.c, much like lib/rwsem.c? The iosems can be used as a > > basis for the implementation, but we can avoid having to do a giant > > s/semaphore/iosem/g over the kernel tree. > > If you're willing to take this on then you have my full support and I'd > be happy to lend a hand. I would expect also that some RT subgroups would be highly interested in getting it to respect priority for reworking parts of softirq. bill ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-05 15:46 ` Benjamin LaHaise 2005-04-06 1:20 ` Trond Myklebust @ 2005-04-06 5:01 ` Suparna Bhattacharya 2005-04-07 11:43 ` Christoph Hellwig 2 siblings, 0 replies; 31+ messages in thread From: Suparna Bhattacharya @ 2005-04-06 5:01 UTC (permalink / raw) To: Benjamin LaHaise Cc: Trond Myklebust, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio On Tue, Apr 05, 2005 at 11:46:41AM -0400, Benjamin LaHaise wrote: > On Mon, Apr 04, 2005 at 01:56:35PM -0400, Trond Myklebust wrote: > > IOW: the current semaphore implementations really all need to die, and > > be replaced by a single generic version to which it is actually > > practical to add new functionality. > > I can see that goal, but I don't think introducing iosems is the right > way to acheive it. Instead (and I'll start tackling this), how about > factoring out the existing semaphore implementations to use a common > lib/semaphore.c, much like lib/rwsem.c? The iosems can be used as a > basis for the implementation, but we can avoid having to do a giant > s/semaphore/iosem/g over the kernel tree. That would be really neat, if you can get to it. Regards Suparna > > > Failing that, it is _much_ easier to convert the generic code that needs > > to support aio to use a new locking implementation and then test that. > > It is not as if conversion to aio won't involve changes to the code in > > the area surrounding those locks anyway. > > Quite true. There's a lot more work to do in this area, and these common > primatives are needed to make progress. Someone at netapp sent me an > email yesterday asking about aio support in NFS, so there is some demand > out there. Cheers, > > -ben > -- > "Time is what keeps everything from happening all at once." -- John Wheeler > -- > To unsubscribe, send a message with 'unsubscribe linux-aio' in > the body to majordomo@kvack.org. For more info on Linux AIO, > see: http://www.kvack.org/aio/ > Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a> -- Suparna Bhattacharya (suparna@in.ibm.com) Linux Technology Center IBM Software Lab, India ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-05 15:46 ` Benjamin LaHaise 2005-04-06 1:20 ` Trond Myklebust 2005-04-06 5:01 ` Suparna Bhattacharya @ 2005-04-07 11:43 ` Christoph Hellwig 2005-04-08 22:39 ` Benjamin LaHaise 2005-04-15 16:13 ` David Howells 2 siblings, 2 replies; 31+ messages in thread From: Christoph Hellwig @ 2005-04-07 11:43 UTC (permalink / raw) To: Benjamin LaHaise Cc: Trond Myklebust, Suparna Bhattacharya, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio On Tue, Apr 05, 2005 at 11:46:41AM -0400, Benjamin LaHaise wrote: > On Mon, Apr 04, 2005 at 01:56:35PM -0400, Trond Myklebust wrote: > > IOW: the current semaphore implementations really all need to die, and > > be replaced by a single generic version to which it is actually > > practical to add new functionality. > > I can see that goal, but I don't think introducing iosems is the right > way to acheive it. Instead (and I'll start tackling this), how about > factoring out the existing semaphore implementations to use a common > lib/semaphore.c, much like lib/rwsem.c? The iosems can be used as a > basis for the implementation, but we can avoid having to do a giant > s/semaphore/iosem/g over the kernel tree. Note that iosem is also a total misowner, it's not a counting semaphore but a sleeping mutex with some special features. Now if someone wants my two cent on how to resolve the two gazillion different implementations mess: - switch all current semaphore users that don't need counting semaphores over to use a mutex_t type. For now it can map to struct semaphore. - rip out all existing complicated struct semaphore implementations and replace it with a portable C implementation. There's not a lot of users anyway. Add a mutex_t implementation that allows sensible assembly hooks for architectures instead of reimplementing all of it - add more features to mutex_t where nessecary -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-07 11:43 ` Christoph Hellwig @ 2005-04-08 22:39 ` Benjamin LaHaise 2005-04-08 23:31 ` Trond Myklebust 2005-04-15 16:13 ` David Howells 1 sibling, 1 reply; 31+ messages in thread From: Benjamin LaHaise @ 2005-04-08 22:39 UTC (permalink / raw) To: Christoph Hellwig, Trond Myklebust, Suparna Bhattacharya, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio On Thu, Apr 07, 2005 at 12:43:02PM +0100, Christoph Hellwig wrote: > - switch all current semaphore users that don't need counting semaphores > over to use a mutex_t type. For now it can map to struct semaphore. > - rip out all existing complicated struct semaphore implementations and > replace it with a portable C implementation. There's not a lot of users > anyway. Add a mutex_t implementation that allows sensible assembly hooks > for architectures instead of reimplementing all of it > - add more features to mutex_t where nessecary Oh dear, this is going to take a while. In any case, here is such a first step in creating such a sequence of patches. Located at http://www.kvack.org/~bcrl/patches/mutex-A0/ are the following patches: 00_mutex.diff - Introduces the basic mutex abstraction on top of the existing semaphore implementation. 01_i_sem.diff - Converts all users of i_sem to use the mutex abstraction. 10_new_mutex.diff - Replaces the semphore mutex with a new mutex derrived from Trond's iosem patch. Note that this fixes a serious bug in iosems: see the change in mutex_lock_wake_function that ignores the return value of default_wake_function, as on SMP a process might still be running while we actually made progress. sem-test.c - A basic stress tester for the mutex / semaphore. I'm still not convinced that introducing the mutex type is the best approach, especially given the history of the up()/down() implementation. On the aio side of things, I introduced the owner field in the mutex (as opposed to the flag in Trond's iosem) for the next patch in the series to enable something like the following api: int aio_lock_mutex(struct mutex *lock, struct iocb *iocb); ...generic_file_read.... { ret = mutex_lock_aio(&inode->i_sem, iocb); if (ret) return ret; /* aio_lock_mutex can return -EIOCBQUEUED */ ... mutex_unlock(&inode->i_sem); } mutex_lock_aio will attempt to take the lock if the iocb is not the owner, otherwise it returns immediately (ie ->owner == iocb). This will allow for code paths that support aio to follow a fairly similar coding style to the synchronous io path. More next week... -ben -- "Time is what keeps everything from happening all at once." -- John Wheeler -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-08 22:39 ` Benjamin LaHaise @ 2005-04-08 23:31 ` Trond Myklebust 2005-04-10 14:08 ` Suparna Bhattacharya 0 siblings, 1 reply; 31+ messages in thread From: Trond Myklebust @ 2005-04-08 23:31 UTC (permalink / raw) To: Benjamin LaHaise Cc: Christoph Hellwig, Suparna Bhattacharya, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio fr den 08.04.2005 Klokka 18:39 (-0400) skreiv Benjamin LaHaise: > On the aio side of things, I introduced the owner field in the mutex (as > opposed to the flag in Trond's iosem) for the next patch in the series to > enable something like the following api: > > int aio_lock_mutex(struct mutex *lock, struct iocb *iocb); Any chance of a more generic interface too? iocbs are fairly high level objects, and so I do not see them helping to resolve low level filesystem problems such as the NFSv4 state cleanup. Cheers, Trond -- Trond Myklebust <trond.myklebust@fys.uio.no> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-08 23:31 ` Trond Myklebust @ 2005-04-10 14:08 ` Suparna Bhattacharya 0 siblings, 0 replies; 31+ messages in thread From: Suparna Bhattacharya @ 2005-04-10 14:08 UTC (permalink / raw) To: Trond Myklebust Cc: Benjamin LaHaise, Christoph Hellwig, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio On Fri, Apr 08, 2005 at 07:31:46PM -0400, Trond Myklebust wrote: > fr den 08.04.2005 Klokka 18:39 (-0400) skreiv Benjamin LaHaise: > > > On the aio side of things, I introduced the owner field in the mutex (as > > opposed to the flag in Trond's iosem) for the next patch in the series to > > enable something like the following api: > > > > int aio_lock_mutex(struct mutex *lock, struct iocb *iocb); > > Any chance of a more generic interface too? > > iocbs are fairly high level objects, and so I do not see them helping to > resolve low level filesystem problems such as the NFSv4 state cleanup. My preferred approach would be to make the wait queue element the primitive, rather than the iocb, precisely for this reason. Guess its time for me to repost my aio-wait-bit based patch set - it doesn't cover the async semaphores bit, but should indicate the general direction of thinking. I still need to look at Ben's patches though. Regards Suparna > > Cheers, > Trond > > -- > Trond Myklebust <trond.myklebust@fys.uio.no> > > -- > To unsubscribe, send a message with 'unsubscribe linux-aio' in > the body to majordomo@kvack.org. For more info on Linux AIO, > see: http://www.kvack.org/aio/ > Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a> -- Suparna Bhattacharya (suparna@in.ibm.com) Linux Technology Center IBM Software Lab, India ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-07 11:43 ` Christoph Hellwig 2005-04-08 22:39 ` Benjamin LaHaise @ 2005-04-15 16:13 ` David Howells 2005-04-15 22:42 ` Trond Myklebust 2005-04-16 11:06 ` David Howells 1 sibling, 2 replies; 31+ messages in thread From: David Howells @ 2005-04-15 16:13 UTC (permalink / raw) To: Benjamin LaHaise Cc: Christoph Hellwig, Trond Myklebust, Suparna Bhattacharya, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio Benjamin LaHaise <bcrl@kvack.org> wrote: > Oh dear, this is going to take a while. In any case, here is such a > first step in creating such a sequence of patches. Located at > http://www.kvack.org/~bcrl/patches/mutex-A0/ are the following patches: > ... > 10_new_mutex.diff - Replaces the semphore mutex with a new mutex > derrived from Trond's iosem patch. Note that > this fixes a serious bug in iosems: see the > change in mutex_lock_wake_function that ignores > the return value of default_wake_function, as > on SMP a process might still be running while > we actually made progress. Can I suggest you don't use a wait_queue_t in your mutex? The way you've implemented your mutex you appear to have to take spinlocks and disable interrupts a lot more than is necessary. You might want to look at how I've implemented semaphores for the frv arch: include/asm-frv/semaphore.h arch/frv/kernel/semaphore.c On FRV disabling interrupts is quite expensive, so I've made my own simple semaphores that don't need to take spinlocks and disable interrupts in the down() path once the sleeping task has been queued[*]. These work in exactly the same way as rwsems. [*] except for the case where down_interruptible() is interrupted. The point being that since up() needs to take the semaphore's spinlock anyway so that it can access the list, up() does all the necessary dequeuing of tasks before waking them up. David -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-15 16:13 ` David Howells @ 2005-04-15 22:42 ` Trond Myklebust 2005-04-15 23:42 ` Benjamin LaHaise 2005-04-16 11:12 ` David Howells 2005-04-16 11:06 ` David Howells 1 sibling, 2 replies; 31+ messages in thread From: Trond Myklebust @ 2005-04-15 22:42 UTC (permalink / raw) To: David Howells Cc: Benjamin LaHaise, Christoph Hellwig, Suparna Bhattacharya, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio fr den 15.04.2005 Klokka 17:13 (+0100) skreiv David Howells: > Can I suggest you don't use a wait_queue_t in your mutex? The way you've > implemented your mutex you appear to have to take spinlocks and disable > interrupts a lot more than is necessary. > You might want to look at how I've implemented semaphores for the frv arch: > > include/asm-frv/semaphore.h > arch/frv/kernel/semaphore.c > > On FRV disabling interrupts is quite expensive, so I've made my own simple > semaphores that don't need to take spinlocks and disable interrupts in the > down() path once the sleeping task has been queued[*]. These work in exactly > the same way as rwsems. > The point being that since up() needs to take the semaphore's spinlock > anyway > so that it can access the list, up() does all the necessary dequeuing > of tasks > before waking them up. I'm looking at the code, and I'm completely failing to see your point. Exactly how is that scheme incompatible with use of wait_queue_t? AFAICS You grab the wait_queue_t lock once in down()/__mutex_lock() order to try to take the lock (or queue the waiter if that fails), then once more in order to pass the mutex on to the next waiter on up()/mutex_unlock(). That is more or less the exact same thing I was doing with iosems using bog-standard waitqueues, and which Ben has adapted to his mutexes. What am I missing? One of the motivations for doing that as far as the iosems were concerned, BTW, was to avoid adding to this ecology of functionally identical structures that we have for semaphores, rwsems, and wait queues. Cheers, Trond -- Trond Myklebust <trond.myklebust@fys.uio.no> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-15 22:42 ` Trond Myklebust @ 2005-04-15 23:42 ` Benjamin LaHaise 2005-04-16 11:12 ` David Howells 1 sibling, 0 replies; 31+ messages in thread From: Benjamin LaHaise @ 2005-04-15 23:42 UTC (permalink / raw) To: Trond Myklebust Cc: David Howells, Christoph Hellwig, Suparna Bhattacharya, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio On Fri, Apr 15, 2005 at 03:42:54PM -0700, Trond Myklebust wrote: > AFAICS You grab the wait_queue_t lock once in down()/__mutex_lock() > order to try to take the lock (or queue the waiter if that fails), then > once more in order to pass the mutex on to the next waiter on > up()/mutex_unlock(). That is more or less the exact same thing I was > doing with iosems using bog-standard waitqueues, and which Ben has > adapted to his mutexes. What am I missing? I didn't quite see that either. What about the use of atomic operations on frv? Are they more lightweight than a semaphore, making for a better fastpath? -ben -- "Time is what keeps everything from happening all at once." -- John Wheeler -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-15 22:42 ` Trond Myklebust 2005-04-15 23:42 ` Benjamin LaHaise @ 2005-04-16 11:12 ` David Howells 1 sibling, 0 replies; 31+ messages in thread From: David Howells @ 2005-04-16 11:12 UTC (permalink / raw) To: Benjamin LaHaise Cc: Trond Myklebust, Christoph Hellwig, Suparna Bhattacharya, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio Benjamin LaHaise <bcrl@kvack.org> wrote: > > What about the use of atomic operations on frv? Are they more lightweight > than a semaphore, making for a better fastpath? What do you mean? Atomic ops don't compare to semaphores. On FRV atomic ops don't disable interrupts; they reserve one of the eight conditional execution flags at compile time and that flag is cleared by entry to an exception, thus aborting the write instruction. See: Documentation/fujitsu/frv/atomic-ops.txt I could try and improve the fastpath on the semaphores for FRV - and perhaps should - but I implemented the semaphores before I'd thought of the clever way to do atomic ops. David ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-15 16:13 ` David Howells 2005-04-15 22:42 ` Trond Myklebust @ 2005-04-16 11:06 ` David Howells 1 sibling, 0 replies; 31+ messages in thread From: David Howells @ 2005-04-16 11:06 UTC (permalink / raw) To: Trond Myklebust Cc: Benjamin LaHaise, Christoph Hellwig, Suparna Bhattacharya, Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio Trond Myklebust <trond.myklebust@fys.uio.no> wrote: > > AFAICS You grab the wait_queue_t lock once in down()/__mutex_lock() > order to try to take the lock (or queue the waiter if that fails), then > once more in order to pass the mutex on to the next waiter on > up()/mutex_unlock(). That is more or less the exact same thing I was > doing with iosems using bog-standard waitqueues, and which Ben has > adapted to his mutexes. What am I missing? In Ben's patch it looks like the down() grabs the spinlock twice. Once to queue yourself and one to dequeue yourself. The up() grabs the spinlock once to wake you up, but it wasn't obvious that it actually dequeues you. David -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-04-04 15:52 ` Suparna Bhattacharya 2005-04-04 16:22 ` Benjamin LaHaise @ 2005-04-04 16:39 ` Trond Myklebust 1 sibling, 0 replies; 31+ messages in thread From: Trond Myklebust @ 2005-04-04 16:39 UTC (permalink / raw) To: suparna Cc: Andrew Morton, linux-kernel, Linux Filesystem Development, linux-aio må den 04.04.2005 Klokka 21:22 (+0530) skreiv Suparna Bhattacharya: > [cc'ing linux-aio] > > The way I did this for semaphores was to modify the down routines to accept > an explicit wait queue entry parameter, so the caller could set it up to > be a synchronous waiter or a callback as appropriate. Of course, in the > AIO case, the callback was setup to restart the iocb operation to continue > rest of its processing, which was when it acquired the lock. So, the > callback in itself didn't need to remember the lock. The only grey area > that I didn't resolve to satisfaction > > And oh yes, even though this was a really small change, having to > update and verify those umpteen different arch specific pieces was not > entirely an exciting prospect - I got as far as x86, x86_64 and ppc64 > ... but stopped after that. It so happened that i_sem contention > didn't show up as a major latency issue even without async semaphores > in the kinds of workloads we were working with then. Right. This is the main problem with using the ordinary semaphores: changing all the different arch-specific versions, and then (not least!) testing the resulting mess. > Your approach looks reasonable and simple enough. It'd be useful if I > could visualize the caller side of things as in the NFS client stream > as you mention - do you plan to post that soon ? > I'm tempted to think about the possibility of using your iosems > with retry-based AIO. Our immediate concern was with handling the cleanup of NFSv4 state in the case where a process was signalled. If you mount with "-ointr" using the code in 2.6.12-rc1, then it can currently deadlock rpciod. The iosems prevent the deadlock by allowing us to move the code that tears down state into a workqueue item. You can find the full series of NFS client patches against 2.6.12-rc1-bk4 and newer on http://client.linux-nfs.org/Linux-2.6.x/2.6.12-rc1-bk4/ The particular 3 patches that make use of the iosem code in order to do asynchronous cleanup of state are: http://client.linux-nfs.org/Linux-2.6.x/2.6.12-rc1-bk4/linux-2.6.12-40-nfs4_state_locks.dif http://client.linux-nfs.org/Linux-2.6.x/2.6.12-rc1-bk4/linux-2.6.12-41-async_close.dif http://client.linux-nfs.org/Linux-2.6.x/2.6.12-rc1-bk4/linux-2.6.12-42-async_locku.dif Cheers, Trond -- Trond Myklebust <trond.myklebust@fys.uio.no> -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-03-30 21:51 [RFC] Add support for semaphore-like structure with support for asynchronous I/O Trond Myklebust 2005-03-30 22:34 ` Andrew Morton @ 2005-03-31 8:02 ` Nikita Danilov 2005-03-31 12:31 ` Trond Myklebust 1 sibling, 1 reply; 31+ messages in thread From: Nikita Danilov @ 2005-03-31 8:02 UTC (permalink / raw) To: Trond Myklebust; +Cc: Linux Filesystem Development Trond Myklebust writes: > In NFSv4 we often want to serialize asynchronous RPC calls with ordinary [...] > + > +void fastcall iosem_lock(struct iosem *lk) > +{ > + struct iosem_wait waiter; > + > + might_sleep(); > + > + init_iosem_waiter(&waiter); > + waiter.wait.func = iosem_lock_wake_function; > + > + set_current_state(TASK_UNINTERRUPTIBLE); > + if (__iosem_lock(lk, &waiter)) > + schedule(); > + __set_current_state(TASK_RUNNING); > + > + BUG_ON(!list_empty(&waiter.wait.task_list)); > +} > +EXPORT_SYMBOL(iosem_lock); As I understand it, in the blocking path IOSEM_LOCK_EXCLUSIVE is set by iosem_lock_wake_function() called by the waker thread. But this is asking for convoy formation: iosem_unlock() transfers ownership of the lock to the thread that is currently sleeping. This means that all threads _running_ on another processors and bumping into that lock will go to sleep too (i.e., lock is owned but unused), thus forming a "convoy" that has a tendency to grow over time when there is at least smallest contention. This is known problem with all "early ownership transfer" locks designs (except maybe in your case contention is not supposed to happen). And as a nitpick: struct iosem is emphatically _not_ a semaphore, it even doesn't have a counter. :) Can it be named iomutex or iolock or async_lock or something? We have enough confusion going on with struct semaphore that is mostly used as mutex. [...] > + > +int fastcall iosem_lock_and_schedule_work(struct iosem *lk, struct iosem_work *wk) > +{ > + int ret; > + > + init_iosem_waiter(&wk->waiter); > + wk->waiter.wait.func = iosem_lock_and_schedule_function; > + ret = __iosem_lock(lk, &wk->waiter); > + if (ret == 0) > + ret = schedule_work(&wk->work); > + return ret; > +} This is actually trylock, right? If iosem_lock_and_schedule_work() returns -EINPROGRESS lock is not acquired on return and caller has to call schedule(). [...] > > -- > Trond Myklebust <trond.myklebust@fys.uio.no> > Nikita. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-03-31 8:02 ` Nikita Danilov @ 2005-03-31 12:31 ` Trond Myklebust 2005-03-31 17:09 ` Nikita Danilov 0 siblings, 1 reply; 31+ messages in thread From: Trond Myklebust @ 2005-03-31 12:31 UTC (permalink / raw) To: Nikita Danilov; +Cc: Linux Filesystem Development to den 31.03.2005 Klokka 12:02 (+0400) skreiv Nikita Danilov: > As I understand it, in the blocking path IOSEM_LOCK_EXCLUSIVE is set by > iosem_lock_wake_function() called by the waker thread. But this is > asking for convoy formation: iosem_unlock() transfers ownership of the > lock to the thread that is currently sleeping. This means that all > threads _running_ on another processors and bumping into that lock will > go to sleep too (i.e., lock is owned but unused), thus forming a > "convoy" that has a tendency to grow over time when there is at least > smallest contention. This is known problem with all "early ownership > transfer" locks designs (except maybe in your case contention is not > supposed to happen). You are assuming that all the waiters on the queue are tasks that must sleep if they cannot take the lock. That is not the case here. Whereas some users will indeed fall in this category, I expect that most will rather want to use the non-blocking mode in which the caller is free to go off and do other useful work. > And as a nitpick: struct iosem is emphatically _not_ a semaphore, it > even doesn't have a counter. :) Can it be named iomutex or iolock or > async_lock or something? We have enough confusion going on with struct > semaphore that is mostly used as mutex. I'm planning on expanding it to do more than mutexes. You can also implement shared locks using the same structure (in which case you do need a counter for the readers - the plan is to simply use the lower bits in the iosem->state for that).. > [...] > > > + > > +int fastcall iosem_lock_and_schedule_work(struct iosem *lk, struct iosem_work *wk) > > +{ > > + int ret; > > + > > + init_iosem_waiter(&wk->waiter); > > + wk->waiter.wait.func = iosem_lock_and_schedule_function; > > + ret = __iosem_lock(lk, &wk->waiter); > > + if (ret == 0) > > + ret = schedule_work(&wk->work); > > + return ret; > > +} > > This is actually trylock, right? If iosem_lock_and_schedule_work() > returns -EINPROGRESS lock is not acquired on return and caller has to > call schedule(). This is definitely not the same as trylock. It is the support for asynchronously grabbing the lock. The function containing the critical code that needs to be executed with the lock held should be set up as a work queue item inside wk->work. No matter whether or not the lock could be immediately granted, the caller is never responsible for running that function directly. Rather, as soon as the lock is granted, the work queue item wk->work will be automatically scheduled using keventd. Cheers, Trond -- Trond Myklebust <trond.myklebust@fys.uio.no> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-03-31 12:31 ` Trond Myklebust @ 2005-03-31 17:09 ` Nikita Danilov 2005-03-31 17:22 ` Trond Myklebust 0 siblings, 1 reply; 31+ messages in thread From: Nikita Danilov @ 2005-03-31 17:09 UTC (permalink / raw) To: Trond Myklebust; +Cc: Linux Filesystem Development Trond Myklebust writes: > to den 31.03.2005 Klokka 12:02 (+0400) skreiv Nikita Danilov: > > > As I understand it, in the blocking path IOSEM_LOCK_EXCLUSIVE is set by > > iosem_lock_wake_function() called by the waker thread. But this is > > asking for convoy formation: iosem_unlock() transfers ownership of the > > lock to the thread that is currently sleeping. This means that all > > threads _running_ on another processors and bumping into that lock will > > go to sleep too (i.e., lock is owned but unused), thus forming a > > "convoy" that has a tendency to grow over time when there is at least > > smallest contention. This is known problem with all "early ownership > > transfer" locks designs (except maybe in your case contention is not > > supposed to happen). > > You are assuming that all the waiters on the queue are tasks that must > sleep if they cannot take the lock. That is not the case here. Whereas > some users will indeed fall in this category, I expect that most will > rather want to use the non-blocking mode in which the caller is free to > go off and do other useful work. Ah, I see... But this doesn't look like semaphore _at_ _all_. Semaphores have no call-backs, and in iosem case it's callback (in the form of struct work_struct) that is central to the interface. I belive naming should reflect this, it's utterly confusing as it is. Maybe struct work_queue_token and schedule_work_{with,end}_token()? [...] > > Cheers, > Trond Nikita. ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-03-31 17:09 ` Nikita Danilov @ 2005-03-31 17:22 ` Trond Myklebust 2005-03-31 17:32 ` Trond Myklebust 0 siblings, 1 reply; 31+ messages in thread From: Trond Myklebust @ 2005-03-31 17:22 UTC (permalink / raw) To: Nikita Danilov; +Cc: Linux Filesystem Development to den 31.03.2005 Klokka 21:09 (+0400) skreiv Nikita Danilov: > > You are assuming that all the waiters on the queue are tasks that must > > sleep if they cannot take the lock. That is not the case here. Whereas > > some users will indeed fall in this category, I expect that most will > > rather want to use the non-blocking mode in which the caller is free to > > go off and do other useful work. > > Ah, I see... But this doesn't look like semaphore _at_ _all_. Semaphores > have no call-backs, and in iosem case it's callback (in the form of > struct work_struct) that is central to the interface. I belive naming should > reflect this, it's utterly confusing as it is. Maybe struct > work_queue_token and schedule_work_{with,end}_token()? That would be equally confusing. The lock exists to serialize _BOTH_ tasks and work queue items. For instance the NFSv4 OPEN takes the token synchronously, and it needs to be serialized w.r.t. the asynchronous CLOSE or OPEN_DOWNGRADE. Cheers, Trond -- Trond Myklebust <trond.myklebust@fys.uio.no> ^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [RFC] Add support for semaphore-like structure with support for asynchronous I/O 2005-03-31 17:22 ` Trond Myklebust @ 2005-03-31 17:32 ` Trond Myklebust 0 siblings, 0 replies; 31+ messages in thread From: Trond Myklebust @ 2005-03-31 17:32 UTC (permalink / raw) To: Nikita Danilov; +Cc: Linux Filesystem Development to den 31.03.2005 Klokka 12:22 (-0500) skreiv Trond Myklebust: > That would be equally confusing. The lock exists to serialize _BOTH_ > tasks and work queue items. > > For instance the NFSv4 OPEN takes the token synchronously, and it needs > to be serialized w.r.t. the asynchronous CLOSE or OPEN_DOWNGRADE. My point is that as far as ordinary tasks are concerned, the lock can acts exactly like a semaphore/mutex would do. As far as asynchronous tasks are concerned, the lock will serialize the execution of the work_struct w.r.t. the ordinary tasks and other work_structs without risk of deadlocking. Cheers, Trond -- Trond Myklebust <trond.myklebust@fys.uio.no> ^ permalink raw reply [flat|nested] 31+ messages in thread
end of thread, other threads:[~2005-04-16 11:12 UTC | newest] Thread overview: 31+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2005-03-30 21:51 [RFC] Add support for semaphore-like structure with support for asynchronous I/O Trond Myklebust 2005-03-30 22:34 ` Andrew Morton 2005-03-30 23:17 ` Trond Myklebust 2005-03-30 23:44 ` Andrew Morton 2005-03-31 0:02 ` Trond Myklebust 2005-03-31 22:53 ` Trond Myklebust 2005-04-01 0:13 ` Andrew Morton 2005-04-01 1:22 ` Trond Myklebust 2005-04-01 14:12 ` Suparna Bhattacharya 2005-04-04 15:52 ` Suparna Bhattacharya 2005-04-04 16:22 ` Benjamin LaHaise 2005-04-04 17:56 ` Trond Myklebust 2005-04-05 15:46 ` Benjamin LaHaise 2005-04-06 1:20 ` Trond Myklebust 2005-04-06 5:17 ` Bill Huey 2005-04-06 5:01 ` Suparna Bhattacharya 2005-04-07 11:43 ` Christoph Hellwig 2005-04-08 22:39 ` Benjamin LaHaise 2005-04-08 23:31 ` Trond Myklebust 2005-04-10 14:08 ` Suparna Bhattacharya 2005-04-15 16:13 ` David Howells 2005-04-15 22:42 ` Trond Myklebust 2005-04-15 23:42 ` Benjamin LaHaise 2005-04-16 11:12 ` David Howells 2005-04-16 11:06 ` David Howells 2005-04-04 16:39 ` Trond Myklebust 2005-03-31 8:02 ` Nikita Danilov 2005-03-31 12:31 ` Trond Myklebust 2005-03-31 17:09 ` Nikita Danilov 2005-03-31 17:22 ` Trond Myklebust 2005-03-31 17:32 ` Trond Myklebust
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).