linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] Add support for semaphore-like structure with support for asynchronous I/O
@ 2005-03-30 21:51 Trond Myklebust
  2005-03-30 22:34 ` Andrew Morton
  2005-03-31  8:02 ` Nikita Danilov
  0 siblings, 2 replies; 31+ messages in thread
From: Trond Myklebust @ 2005-03-30 21:51 UTC (permalink / raw)
  To: linux-kernel; +Cc: Linux Filesystem Development

In NFSv4 we often want to serialize asynchronous RPC calls with ordinary
RPC calls (OPEN and CLOSE for instance). On paper, semaphores would
appear to fit the bill, however there is no support for asynchronous I/O
with semaphores.
<rant>What's more, trying to add that type of support is an exercise in
futility: there are currently 23 slightly different arch-dependent and
over-optimized versions of semaphores (not counting the different
versions of read/write semaphores).</rant>

Anyhow, the following is a simple implementation of semaphores designed
to satisfy the needs of those I/O subsystems that want to support
asynchronous behaviour too. Please comment.

Cheers,
  Trond

--------------------------------------
NFS: Add support for iosems.

 These act rather like semaphores, but also have support for asynchronous
 I/O, using the wait_queue_t callback features.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/iosem.h |   63 ++++++++++++++++++++++++++++++
 lib/Makefile          |    2 
 lib/iosem.c           |  103 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 167 insertions(+), 1 deletion(-)

Index: linux-2.6.12-rc1/include/linux/iosem.h
===================================================================
--- /dev/null
+++ linux-2.6.12-rc1/include/linux/iosem.h
@@ -0,0 +1,63 @@
+/*
+ * include/linux/iosem.h
+ *
+ * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ *
+ * Definitions for iosems. These can act as mutexes, but unlike
+ * semaphores, their code is 100% arch-independent, and can therefore
+ * easily be expanded in order to provide for things like
+ * asynchronous I/O.
+ */
+
+#ifndef __LINUX_SEM_LOCK_H
+#define __LINUX_SEM_LOCK_H
+
+#ifdef __KERNEL__
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+struct iosem {
+	unsigned long state;
+	wait_queue_head_t wait;
+};
+
+#define IOSEM_LOCK_EXCLUSIVE (24)
+/* #define IOSEM_LOCK_SHARED (25) */
+
+struct iosem_wait {
+	struct iosem *lock;
+	wait_queue_t wait;
+};
+
+struct iosem_work {
+	struct work_struct work;
+	struct iosem_wait waiter;
+};
+
+extern void FASTCALL(iosem_lock(struct iosem *lk));
+extern void FASTCALL(iosem_unlock(struct iosem *lk));
+extern int iosem_lock_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+
+static inline void init_iosem(struct iosem *lk)
+{
+	lk->state = 0;
+	init_waitqueue_head(&lk->wait);
+}
+
+static inline void init_iosem_waiter(struct iosem_wait *waiter)
+{
+	waiter->lock = NULL;
+	init_waitqueue_entry(&waiter->wait, current);
+	INIT_LIST_HEAD(&waiter->wait.task_list);
+}
+
+static inline void init_iosem_work(struct iosem_work *wk, void (*func)(void *), void *data)
+{
+	INIT_WORK(&wk->work, func, data);
+}
+
+extern int FASTCALL(iosem_lock_and_schedule_work(struct iosem *lk, struct iosem_work *wk));
+extern int iosem_lock_and_schedule_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+
+#endif /* __KERNEL__ */
+#endif /* __LINUX_SEM_LOCK_H */
Index: linux-2.6.12-rc1/lib/iosem.c
===================================================================
--- /dev/null
+++ linux-2.6.12-rc1/lib/iosem.c
@@ -0,0 +1,103 @@
+/*
+ * linux/fs/nfs/iosem.c
+ *
+ * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ *
+ * A set of primitives for semaphore-like locks that also support notification
+ * callbacks for waiters.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/iosem.h>
+
+static int fastcall __iosem_lock(struct iosem *lk, struct iosem_wait *waiter)
+{
+	int ret;
+
+	spin_lock(&lk->wait.lock);
+	if (lk->state != 0) {
+		waiter->lock = lk;
+		add_wait_queue_exclusive_locked(&lk->wait, &waiter->wait);
+		ret = -EINPROGRESS;
+	} else {
+		lk->state |= 1 << IOSEM_LOCK_EXCLUSIVE;
+		ret = 0;
+	}
+	spin_unlock(&lk->wait.lock);
+	return ret;
+}
+
+void fastcall iosem_unlock(struct iosem *lk)
+{
+	spin_lock(&lk->wait.lock);
+	lk->state &= ~(1 << IOSEM_LOCK_EXCLUSIVE);
+	wake_up_locked(&lk->wait);
+	spin_unlock(&lk->wait.lock);
+}
+EXPORT_SYMBOL(iosem_unlock);
+
+int iosem_lock_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	struct iosem_wait *waiter = container_of(wait, struct iosem_wait, wait);
+	unsigned long *lk_state = &waiter->lock->state;
+	int ret = 0;
+
+	if (*lk_state == 0) {
+		ret = default_wake_function(wait, mode, sync, key);
+		if (ret) {
+			*lk_state |= 1 << IOSEM_LOCK_EXCLUSIVE;
+			list_del_init(&wait->task_list);
+		}
+	}
+	return ret;
+}
+
+void fastcall iosem_lock(struct iosem *lk)
+{
+	struct iosem_wait waiter;
+
+	might_sleep();
+
+	init_iosem_waiter(&waiter);
+	waiter.wait.func = iosem_lock_wake_function;
+
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	if (__iosem_lock(lk, &waiter))
+		schedule();
+	__set_current_state(TASK_RUNNING);
+
+	BUG_ON(!list_empty(&waiter.wait.task_list));
+}
+EXPORT_SYMBOL(iosem_lock);
+
+int iosem_lock_and_schedule_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	struct iosem_wait *waiter = container_of(wait, struct iosem_wait, wait);
+	struct iosem_work *wk = container_of(waiter, struct iosem_work, waiter);
+	unsigned long *lk_state = &waiter->lock->state;
+	int ret = 0;
+
+	if (*lk_state == 0) {
+		ret = schedule_work(&wk->work);
+		if (ret) {
+			*lk_state |= 1 << IOSEM_LOCK_EXCLUSIVE;
+			list_del_init(&wait->task_list);
+		}
+	}
+	return ret;
+}
+
+int fastcall iosem_lock_and_schedule_work(struct iosem *lk, struct iosem_work *wk)
+{
+	int ret;
+
+	init_iosem_waiter(&wk->waiter);
+	wk->waiter.wait.func = iosem_lock_and_schedule_function;
+	ret = __iosem_lock(lk, &wk->waiter);
+	if (ret == 0)
+		ret = schedule_work(&wk->work);
+	return ret;
+}
+EXPORT_SYMBOL(iosem_lock_and_schedule_work);
Index: linux-2.6.12-rc1/lib/Makefile
===================================================================
--- linux-2.6.12-rc1.orig/lib/Makefile
+++ linux-2.6.12-rc1/lib/Makefile
@@ -8,7 +8,7 @@ lib-y := errno.o ctype.o string.o vsprin
 	 bitmap.o extable.o kobject_uevent.o prio_tree.o sha1.o \
 	 halfmd4.o
 
-obj-y += sort.o parser.o
+obj-y += sort.o parser.o iosem.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG

-- 
Trond Myklebust <trond.myklebust@fys.uio.no>


^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2005-04-16 11:12 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-03-30 21:51 [RFC] Add support for semaphore-like structure with support for asynchronous I/O Trond Myklebust
2005-03-30 22:34 ` Andrew Morton
2005-03-30 23:17   ` Trond Myklebust
2005-03-30 23:44     ` Andrew Morton
2005-03-31  0:02       ` Trond Myklebust
2005-03-31 22:53     ` Trond Myklebust
2005-04-01  0:13       ` Andrew Morton
2005-04-01  1:22         ` Trond Myklebust
2005-04-01 14:12           ` Suparna Bhattacharya
2005-04-04 15:52             ` Suparna Bhattacharya
2005-04-04 16:22               ` Benjamin LaHaise
2005-04-04 17:56                 ` Trond Myklebust
2005-04-05 15:46                   ` Benjamin LaHaise
2005-04-06  1:20                     ` Trond Myklebust
2005-04-06  5:17                       ` Bill Huey
2005-04-06  5:01                     ` Suparna Bhattacharya
2005-04-07 11:43                     ` Christoph Hellwig
2005-04-08 22:39                       ` Benjamin LaHaise
2005-04-08 23:31                         ` Trond Myklebust
2005-04-10 14:08                           ` Suparna Bhattacharya
2005-04-15 16:13                       ` David Howells
2005-04-15 22:42                         ` Trond Myklebust
2005-04-15 23:42                           ` Benjamin LaHaise
2005-04-16 11:12                           ` David Howells
2005-04-16 11:06                         ` David Howells
2005-04-04 16:39               ` Trond Myklebust
2005-03-31  8:02 ` Nikita Danilov
2005-03-31 12:31   ` Trond Myklebust
2005-03-31 17:09     ` Nikita Danilov
2005-03-31 17:22       ` Trond Myklebust
2005-03-31 17:32         ` Trond Myklebust

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).