From: Ingo Molnar <mingo@elte.hu>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Tridge <tridge@samba.org>,
Al Viro <viro@parcelfarce.linux.theplanet.co.uk>,
Jamie Lokier <jamie@shareable.org>,
"H. Peter Anvin" <hpa@zytor.com>,
Kernel Mailing List <linux-kernel@vger.kernel.org>,
Jamie Lokier <jamie@shareable.org>
Subject: [patch] explicit dcache <-> user-space cache coherency, sys_mark_dir_clean(), O_CLEAN
Date: Fri, 20 Feb 2004 14:23:52 +0100 [thread overview]
Message-ID: <20040220132352.GA11618@elte.hu> (raw)
In-Reply-To: <20040220120417.GA4010@elte.hu>
[-- Attachment #1: Type: text/plain, Size: 1262 bytes --]
> What Samba needs is a way to tell between two points in time whether the
> directory contents have changed in any way - nothing more. Only one new
> syscall is used to maintain the Samba dcache:
>
> long sys_mark_dir_clean(dirfd);
> this is how Samba could create a file atomically:
>
> sys_create(name, mode | O_CLEAN);
i've attached a quick patch (against 2.6.3) that implements the new
sys_mark_dir_clean() syscall and O_CLEAN support in all open() variants,
just to have an idea of how it looks like roughly. (It's incomplete -
e.g. there's no explicit way to do an atomic unlink or rename.)
i've also attached dir-cache.c, a simple testcode for the new
functionality. It marks the current directory clean and tries to open
the "./1" file via O_CLEAN with 1 second delay. Start this in one shell
and do VFS-namespace modifying ops in another window (eg. "rm -f 2;
touch 2") and see the dir-cache code react to it - the 'clean' bit is
lost, and the file open-create does not succeed if the directory is not
clean.
there's a new dentry flag that is maintained under the directory's i_sem
semaphore. (It would be simpler to have the flag on the inode level,
that way the invalidation could be done as a simple filter to the
dnotify function.)
Ingo
[-- Attachment #2: dir-mark-clean-2.6.3-A3 --]
[-- Type: text/plain, Size: 4310 bytes --]
--- linux/arch/i386/kernel/entry.S.orig
+++ linux/arch/i386/kernel/entry.S
@@ -882,5 +882,6 @@ ENTRY(sys_call_table)
.long sys_utimes
.long sys_fadvise64_64
.long sys_ni_syscall /* sys_vserver */
+ .long sys_mark_dir_clean
syscall_table_size=(.-sys_call_table)
--- linux/include/linux/dcache.h.orig
+++ linux/include/linux/dcache.h
@@ -153,9 +153,25 @@ d_iput: no no yes
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
#define DCACHE_UNHASHED 0x0010
+#define DCACHE_USER_CLEAN 0x0020 /* userspace cache coherent */
extern spinlock_t dcache_lock;
+static inline void d_user_flush(struct dentry *dentry)
+{
+ dentry->d_vfs_flags &= ~DCACHE_USER_CLEAN;
+}
+
+static inline void d_user_mark_clean(struct dentry *dentry)
+{
+ dentry->d_vfs_flags |= DCACHE_USER_CLEAN;
+}
+
+static inline long d_user_valid(struct dentry *dentry)
+{
+ return (dentry->d_vfs_flags & DCACHE_USER_CLEAN) != 0;
+}
+
/**
* d_drop - drop a dentry
* @dentry: dentry to drop
--- linux/include/asm-generic/errno.h.orig
+++ linux/include/asm-generic/errno.h
@@ -96,5 +96,6 @@
#define ENOMEDIUM 123 /* No medium found */
#define EMEDIUMTYPE 124 /* Wrong medium type */
+#define EFLUSH 125 /* cache not valid */
#endif
--- linux/include/asm-i386/fcntl.h.orig
+++ linux/include/asm-i386/fcntl.h
@@ -20,6 +20,7 @@
#define O_LARGEFILE 0100000
#define O_DIRECTORY 0200000 /* must be a directory */
#define O_NOFOLLOW 0400000 /* don't follow links */
+#define O_CLEAN 01000000 /* parent dir must be clean */
#define F_DUPFD 0 /* dup */
#define F_GETFD 1 /* get close_on_exec */
--- linux/fs/open.c.orig
+++ linux/fs/open.c
@@ -747,6 +747,7 @@ struct file *filp_open(const char * file
namei_flags++;
if (namei_flags & O_TRUNC)
namei_flags |= 2;
+ namei_flags |= flags & O_CLEAN;
error = open_namei(filename, namei_flags, mode, &nd);
if (!error)
@@ -1029,6 +1030,26 @@ out_unlock:
EXPORT_SYMBOL(sys_close);
+asmlinkage long sys_mark_dir_clean(unsigned int fd)
+{
+ struct file *filp;
+ long ret = -EBADF;
+
+ filp = fget(fd);
+ if (!filp)
+ return ret;
+
+ down(&filp->f_dentry->d_inode->i_sem);
+ ret = d_user_valid(filp->f_dentry);
+ d_user_mark_clean(filp->f_dentry);
+ up(&filp->f_dentry->d_inode->i_sem);
+
+ fput(filp);
+
+ return ret;
+}
+
+
/*
* This routine simulates a hangup on the tty, to arrange that users
* are given clean terminals at login time.
--- linux/fs/namei.c.orig
+++ linux/fs/namei.c
@@ -1295,11 +1295,23 @@ do_last:
goto exit;
}
+ /*
+ * Did user-space require the parent directory to be clean
+ * but it was invalid?:
+ */
+ error = -EFLUSH;
+ if ((flag & O_CLEAN) && !d_user_valid(dir)) {
+ up(&dir->d_inode->i_sem);
+ goto exit;
+ }
+
/* Negative dentry, just create the file */
if (!dentry->d_inode) {
if (!IS_POSIXACL(dir->d_inode))
mode &= ~current->fs->umask;
error = vfs_create(dir->d_inode, dentry, mode, nd);
+ if (!error)
+ d_user_flush(dir);
up(&dir->d_inode->i_sem);
dput(nd->dentry);
nd->dentry = dentry;
@@ -1493,6 +1505,8 @@ asmlinkage long sys_mknod(const char __u
}
dput(dentry);
}
+ if (!error)
+ d_user_flush(nd.dentry);
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out:
@@ -1545,6 +1559,8 @@ asmlinkage long sys_mkdir(const char __u
if (!IS_POSIXACL(nd.dentry->d_inode))
mode &= ~current->fs->umask;
error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
+ if (!error)
+ d_user_flush(nd.dentry);
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
@@ -1653,6 +1669,8 @@ asmlinkage long sys_rmdir(const char __u
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_rmdir(nd.dentry->d_inode, dentry);
+ if (!error)
+ d_user_flush(nd.dentry);
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
@@ -1728,6 +1746,8 @@ asmlinkage long sys_unlink(const char __
if (inode)
atomic_inc(&inode->i_count);
error = vfs_unlink(nd.dentry->d_inode, dentry);
+ if (!error)
+ d_user_flush(nd.dentry);
exit2:
dput(dentry);
}
@@ -2099,6 +2119,10 @@ static inline int do_rename(const char *
error = vfs_rename(old_dir->d_inode, old_dentry,
new_dir->d_inode, new_dentry);
+ if (!error) {
+ d_user_flush(old_dir);
+ d_user_flush(new_dir);
+ }
exit5:
dput(new_dentry);
exit4:
[-- Attachment #3: dir-cache.c --]
[-- Type: text/plain, Size: 881 bytes --]
/*
* Copyright (C) Ingo Molnar, 2002
*/
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#include <sys/times.h>
#include <sys/wait.h>
#include <sys/ioctl.h>
#include <linux/unistd.h>
#define __NR_sys_mark_dir_clean 274
_syscall1(int, sys_mark_dir_clean, int, fd);
#define O_DIRECTORY 0200000 /* must be a directory */
#define O_CLEAN 01000000 /* parent dir must be clean */
int main(int argc, char **argv)
{
int fd, fd2, clean;
fd = open(".", O_RDONLY | O_DIRECTORY);
if (fd <= 0) {
perror("fd:");
exit(-1);
}
for (;;) {
clean = sys_mark_dir_clean(fd);
printf("clean:%d ", clean); fflush(stdout);
sleep(1);
fd2 = open("./1", O_CREAT|O_TRUNC|O_CLEAN, 0777);
close(fd2);
printf("fd:%d\n", fd2);
sleep(1);
}
return 0;
}
next prev parent reply other threads:[~2004-02-20 13:27 UTC|newest]
Thread overview: 123+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-02-17 4:12 UTF-8 and case-insensitivity tridge
2004-02-17 5:11 ` Linus Torvalds
2004-02-17 6:54 ` tridge
2004-02-17 8:33 ` Neil Brown
2004-02-17 22:48 ` tridge
2004-02-18 0:06 ` Neil Brown
2004-02-18 9:47 ` Helge Hafting
2004-02-17 15:13 ` Linus Torvalds
2004-02-17 16:57 ` Linus Torvalds
2004-02-17 19:44 ` viro
2004-02-17 20:10 ` Linus Torvalds
2004-02-17 20:17 ` viro
2004-02-17 20:23 ` Linus Torvalds
2004-02-17 21:08 ` Robin Rosenberg
2004-02-17 21:17 ` Linus Torvalds
2004-02-17 22:27 ` Robin Rosenberg
2004-02-18 3:02 ` tridge
2004-02-17 23:57 ` tridge
2004-02-17 23:20 ` tridge
2004-02-17 23:43 ` Linus Torvalds
2004-02-18 3:26 ` tridge
2004-02-18 5:33 ` H. Peter Anvin
2004-02-18 7:54 ` Marc Lehmann
2004-02-18 2:37 ` H. Peter Anvin
2004-02-18 3:03 ` Linus Torvalds
2004-02-18 3:14 ` H. Peter Anvin
2004-02-18 3:27 ` Linus Torvalds
2004-02-18 21:31 ` tridge
2004-02-18 22:23 ` Linus Torvalds
2004-02-18 22:28 ` Linus Torvalds
2004-02-18 22:50 ` tridge
2004-02-18 22:59 ` Linus Torvalds
2004-02-18 23:09 ` tridge
2004-02-18 23:16 ` Linus Torvalds
2004-02-19 8:10 ` Jamie Lokier
2004-02-19 16:09 ` Linus Torvalds
2004-02-19 16:38 ` Jamie Lokier
2004-02-19 16:54 ` Linus Torvalds
2004-02-19 18:29 ` Jamie Lokier
2004-02-19 19:48 ` Eureka! (was Re: UTF-8 and case-insensitivity) Linus Torvalds
2004-02-19 19:51 ` Linus Torvalds
2004-02-19 19:48 ` H. Peter Anvin
2004-02-19 20:04 ` Linus Torvalds
2004-02-19 20:05 ` viro
2004-02-19 20:23 ` Linus Torvalds
2004-02-19 20:32 ` Linus Torvalds
2004-02-19 20:45 ` viro
2004-02-19 21:26 ` Linus Torvalds
2004-02-19 21:38 ` Linus Torvalds
2004-02-19 21:45 ` Linus Torvalds
2004-02-19 21:43 ` viro
2004-02-19 21:53 ` Linus Torvalds
2004-02-19 22:21 ` David Lang
2004-02-19 20:48 ` Jamie Lokier
2004-02-19 21:30 ` Linus Torvalds
2004-02-20 0:00 ` Jamie Lokier
2004-02-20 0:17 ` Linus Torvalds
2004-02-20 0:24 ` Linus Torvalds
2004-02-20 0:30 ` Trond Myklebust
2004-02-20 0:54 ` Jamie Lokier
2004-02-20 0:57 ` tridge
2004-02-20 1:07 ` Paul Wagland
2004-02-20 13:31 ` Chris Wedgwood
2004-02-20 0:46 ` Jamie Lokier
2004-02-23 10:13 ` Tim Connors
2004-02-20 1:39 ` Junio C Hamano
2004-02-20 12:54 ` Jamie Lokier
2004-02-19 23:37 ` tridge
2004-02-20 0:02 ` Linus Torvalds
2004-02-20 0:16 ` tridge
2004-02-20 0:37 ` Linus Torvalds
2004-02-20 1:26 ` tridge
2004-02-20 1:07 ` H. Peter Anvin
2004-02-20 2:30 ` Theodore Ts'o
2004-02-20 12:04 ` explicit dcache <-> user-space cache coherency, sys_mark_dir_clean(), O_CLEAN Ingo Molnar
2004-02-20 13:19 ` Jamie Lokier
2004-02-20 13:37 ` Ingo Molnar
2004-02-20 14:00 ` Ingo Molnar
2004-02-20 16:31 ` Jamie Lokier
2004-02-20 13:23 ` Ingo Molnar [this message]
2004-02-20 18:00 ` [patch] " viro
2004-02-20 15:41 ` Linus Torvalds
2004-02-20 17:04 ` Ingo Molnar
2004-02-20 17:19 ` Linus Torvalds
2004-02-20 18:48 ` Ingo Molnar
2004-02-21 1:44 ` Jamie Lokier
2004-02-21 7:58 ` Ingo Molnar
2004-02-21 8:04 ` viro
2004-02-21 17:46 ` Ingo Molnar
2004-02-21 18:15 ` Linus Torvalds
2004-02-21 8:26 ` Keith Owens
2004-02-23 10:59 ` Pavel Machek
2004-02-23 13:55 ` Jamie Lokier
2004-02-23 16:45 ` Ingo Molnar
2004-02-23 17:32 ` Jamie Lokier
2004-02-20 23:00 ` tridge
2004-02-20 17:33 ` Jamie Lokier
2004-02-20 18:22 ` Linus Torvalds
2004-02-21 0:38 ` Jamie Lokier
2004-02-21 1:10 ` Linus Torvalds
2004-02-21 3:01 ` Jamie Lokier
2004-02-20 17:47 ` Jamie Lokier
2004-02-20 20:38 ` Christer Weinigel
2004-02-22 15:07 ` Jamie Lokier
2004-02-22 16:55 ` Miquel van Smoorenburg
2004-02-19 19:08 ` UTF-8 and case-insensitivity Helge Hafting
2004-02-18 4:08 ` tridge
2004-02-18 10:05 ` Robin Rosenberg
2004-02-18 11:43 ` tridge
2004-02-18 12:31 ` Robin Rosenberg
2004-02-18 16:48 ` H. Peter Anvin
2004-02-18 20:00 ` H. Peter Anvin
2004-02-19 2:53 ` Daniel Newby
2004-02-17 5:25 ` Tim Connors
2004-02-17 7:43 ` H. Peter Anvin
2004-02-17 8:05 ` H. Peter Anvin
2004-02-17 14:25 ` Dave Kleikamp
2004-02-18 0:16 ` Robert White
2004-02-18 0:20 ` Linus Torvalds
2004-02-18 1:03 ` Robert White
2004-02-18 21:48 ` Ville Herva
2004-02-18 2:48 ` tridge
2004-02-18 20:56 ` Robert White
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20040220132352.GA11618@elte.hu \
--to=mingo@elte.hu \
--cc=hpa@zytor.com \
--cc=jamie@shareable.org \
--cc=linux-kernel@vger.kernel.org \
--cc=torvalds@osdl.org \
--cc=tridge@samba.org \
--cc=viro@parcelfarce.linux.theplanet.co.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.