public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* Re: GFS
@ 2005-08-11  7:10 Pekka J Enberg
  2005-08-11 16:33 ` GFS Zach Brown
  0 siblings, 1 reply; 30+ messages in thread
From: Pekka J Enberg @ 2005-08-11  7:10 UTC (permalink / raw)
  To: Mark Fasheh
  Cc: Christoph Hellwig, Zach Brown, David Teigland, Pekka Enberg, akpm,
	linux-kernel, linux-cluster

Hi Mark,

On Thu, 11 Aug 2005, Pekka J Enberg wrote:
> Reading and writing from other filesystems to a GFS2 mmap'd file
> does not walk the vmas. Therefore, data consistency guarantees
> are different:

What I meant was that, if a filesystem requires vma walks, we need to do 
it VFS level with something like the following patch. With this, your 
filesystem would implement a_ops->iolock_acquire that sorts the locks
and takes them all. In case of GFS2, this would replace walk_vm().

Thoughts?

			Pekka

[PATCH] vfs: iolock

This patch introduces iolock which can be used by filesystems that require
special locking when accessing an mmap'd region.

Unfinished and untested.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---

 fs/Makefile            |    2 -
 fs/iolock.c            |   88 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/read_write.c        |   15 ++++++++
 include/linux/fs.h     |    2 +
 include/linux/iolock.h |   11 ++++++
 5 files changed, 117 insertions(+), 1 deletion(-)

Index: 2.6-mm/fs/iolock.c
===================================================================
--- /dev/null
+++ 2.6-mm/fs/iolock.c
@@ -0,0 +1,88 @@
+/*
+ * fs/iolock.c
+ *
+ * Derived from GFS2.
+ */
+
+#include <linux/iolock.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+/*
+ * I/O lock contains all files that participate in locking a memory region.
+ * It is used for filesystems that require special locks to access mmap'd
+ * memory.
+ */
+struct iolock {
+	struct address_space	*mapping;
+	unsigned long		nr_files;
+	struct file		**files;
+};
+
+struct iolock *iolock_region(const char __user *buf, size_t size)
+{
+	int err = -ENOMEM;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long start = (unsigned long)buf;
+	unsigned long end = start + size;
+	struct iolock *ret;
+
+	ret = kcalloc(1, sizeof(*ret), GFP_KERNEL);
+	if (!ret)
+		return ERR_PTR(-ENOMEM);
+
+	down_read(&mm->mmap_sem);
+
+	ret->files = kcalloc(mm->map_count, sizeof(struct file*), GFP_KERNEL);
+	if (!ret->files)
+		goto error;
+
+	for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
+		struct file *file;
+		struct address_space *mapping;
+
+		if (end <= vma->vm_start)
+			break;
+
+		file = vma->vm_file;
+		if (!file)
+			continue;
+
+		mapping = file->f_mapping;
+		if (!mapping->a_ops->iolock_acquire ||
+		    !mapping->a_ops->iolock_release)
+			continue;
+
+ 		/* FIXME: This only works when one address_space participates
+		          in the iolock. */
+		ret->mapping = mapping;
+		ret->files[ret->nr_files++] = file;
+	}
+out:
+	up_read(&mm->mmap_sem);
+
+	if (ret->mapping->a_ops->iolock_acquire) {
+		err = ret->mapping->a_ops->iolock_acquire(ret->files, ret->nr_files);
+		if (!err)
+			goto error;
+	}
+
+	return ret;
+
+error:
+	iolock_release(ret);
+	ret = ERR_PTR(err);
+	goto out;
+}
+
+void iolock_release(struct iolock *iolock)
+{
+	struct address_space *mapping = iolock->mapping;
+	if (mapping && mapping->a_ops->iolock_release)
+		mapping->a_ops->iolock_release(iolock->files, iolock->nr_files);
+	kfree(iolock->files);
+	kfree(iolock);
+}
Index: 2.6-mm/fs/read_write.c
===================================================================
--- 2.6-mm.orig/fs/read_write.c
+++ 2.6-mm/fs/read_write.c
@@ -14,6 +14,7 @@
 #include <linux/security.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
+#include <linux/iolock.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -247,14 +248,21 @@ ssize_t vfs_read(struct file *file, char
 	if (!ret) {
 		ret = security_file_permission (file, MAY_READ);
 		if (!ret) {
+			struct iolock * iolock = iolock_region(buf, count);
+			if (IS_ERR(iolock)) {
+				ret = PTR_ERR(iolock);
+				goto out;
+			}
 			if (file->f_op->read)
 				ret = file->f_op->read(file, buf, count, pos);
 			else
 				ret = do_sync_read(file, buf, count, pos);
+			iolock_release(iolock);
 			if (ret > 0) {
 				fsnotify_access(file->f_dentry);
 				current->rchar += ret;
 			}
+  out:
 			current->syscr++;
 		}
 	}
@@ -298,14 +306,21 @@ ssize_t vfs_write(struct file *file, con
 	if (!ret) {
 		ret = security_file_permission (file, MAY_WRITE);
 		if (!ret) {
+			struct iolock * iolock = iolock_region(buf, count);
+			if (IS_ERR(iolock)) {
+				ret = PTR_ERR(iolock);
+				goto out;
+			}
 			if (file->f_op->write)
 				ret = file->f_op->write(file, buf, count, pos);
 			else
 				ret = do_sync_write(file, buf, count, pos);
+			iolock_release(iolock);
 			if (ret > 0) {
 				fsnotify_modify(file->f_dentry);
 				current->wchar += ret;
 			}
+  out:
 			current->syscw++;
 		}
 	}
Index: 2.6-mm/include/linux/iolock.h
===================================================================
--- /dev/null
+++ 2.6-mm/include/linux/iolock.h
@@ -0,0 +1,11 @@
+#ifndef __LINUX_IOLOCK_H
+#define __LINUX_IOLOCK_H
+
+#include <linux/kernel.h>
+
+struct iolock;
+
+struct iolock *iolock_region(const char __user *buf, size_t count);
+void iolock_release(struct iolock *lock);
+
+#endif
Index: 2.6-mm/fs/Makefile
===================================================================
--- 2.6-mm.orig/fs/Makefile
+++ 2.6-mm/fs/Makefile
@@ -10,7 +10,7 @@ obj-y :=	open.o read_write.o file_table.
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o
+		ioprio.o iolock.o
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
Index: 2.6-mm/include/linux/fs.h
===================================================================
--- 2.6-mm.orig/include/linux/fs.h
+++ 2.6-mm/include/linux/fs.h
@@ -334,6 +334,8 @@ struct address_space_operations {
 			loff_t offset, unsigned long nr_segs);
 	struct page* (*get_xip_page)(struct address_space *, sector_t,
 			int);
+	int (*iolock_acquire)(struct file **, unsigned long);
+	void (*iolock_release)(struct file **, unsigned long);
 };
 
 struct backing_dev_info;

^ permalink raw reply	[flat|nested] 30+ messages in thread
* [PATCH 00/14] GFS
@ 2005-08-02  7:18 David Teigland
  2005-08-02 10:16 ` Pekka Enberg
  2005-08-03  6:44 ` [PATCH 00/14] GFS Pekka Enberg
  0 siblings, 2 replies; 30+ messages in thread
From: David Teigland @ 2005-08-02  7:18 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, linux-cluster

Hi, GFS (Global File System) is a cluster file system that we'd like to
see added to the kernel.  The 14 patches total about 900K so I won't send
them to the list unless that's requested.  Comments and suggestions are
welcome.  Thanks

http://redhat.com/~teigland/gfs2/20050801/gfs2-full.patch
http://redhat.com/~teigland/gfs2/20050801/broken-out/

Dave


^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2005-08-11 16:45 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-08-11  7:10 GFS Pekka J Enberg
2005-08-11 16:33 ` GFS Zach Brown
2005-08-11 16:35   ` GFS Christoph Hellwig
2005-08-11 16:39     ` GFS Zach Brown
2005-08-11 16:44   ` GFS Pekka Enberg
  -- strict thread matches above, loose matches on Subject: below --
2005-08-02  7:18 [PATCH 00/14] GFS David Teigland
2005-08-02 10:16 ` Pekka Enberg
2005-08-03  6:36   ` David Teigland
2005-08-08 14:14     ` GFS Pekka J Enberg
2005-08-08 18:32       ` GFS Zach Brown
2005-08-09 14:49         ` GFS Pekka Enberg
2005-08-09 17:17           ` GFS Zach Brown
2005-08-09 18:35             ` GFS Pekka J Enberg
2005-08-10  4:48             ` GFS Pekka J Enberg
2005-08-10  7:21           ` GFS Christoph Hellwig
2005-08-10  7:31             ` GFS Pekka J Enberg
2005-08-10 16:26               ` GFS Mark Fasheh
2005-08-10 16:57                 ` GFS Pekka J Enberg
2005-08-10 18:21                   ` GFS Mark Fasheh
2005-08-10 20:18                     ` GFS Pekka J Enberg
2005-08-10 22:07                       ` GFS Mark Fasheh
2005-08-11  4:41                         ` GFS Pekka J Enberg
2005-08-10  5:59       ` GFS David Teigland
2005-08-10  6:06         ` GFS Pekka J Enberg
2005-08-03  6:44 ` [PATCH 00/14] GFS Pekka Enberg
2005-08-08  9:57   ` David Teigland
2005-08-08 10:00     ` GFS Pekka J Enberg
2005-08-08 10:18     ` GFS Pekka J Enberg
2005-08-08 10:56       ` GFS David Teigland
2005-08-08 10:57         ` GFS Pekka J Enberg
2005-08-08 11:39           ` GFS David Teigland
2005-08-08 10:34     ` GFS Pekka J Enberg
2005-08-09 14:55     ` GFS Pekka J Enberg
2005-08-10  7:40     ` GFS Pekka J Enberg
2005-08-10  7:43       ` GFS Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox