From: Ingo Molnar <mingo@elte.hu>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Tridge <tridge@samba.org>,
Al Viro <viro@parcelfarce.linux.theplanet.co.uk>,
Jamie Lokier <jamie@shareable.org>,
"H. Peter Anvin" <hpa@zytor.com>,
Kernel Mailing List <linux-kernel@vger.kernel.org>,
Jamie Lokier <jamie@shareable.org>
Subject: [patch] explicit dcache <-> user-space cache coherency, sys_mark_dir_clean(), O_CLEAN
Date: Fri, 20 Feb 2004 14:23:52 +0100 [thread overview]
Message-ID: <20040220132352.GA11618@elte.hu> (raw)
In-Reply-To: <20040220120417.GA4010@elte.hu>
[-- Attachment #1: Type: text/plain, Size: 1262 bytes --]
> What Samba needs is a way to tell between two points in time whether the
> directory contents have changed in any way - nothing more. Only one new
> syscall is used to maintain the Samba dcache:
>
> long sys_mark_dir_clean(dirfd);
> this is how Samba could create a file atomically:
>
> sys_create(name, mode | O_CLEAN);
i've attached a quick patch (against 2.6.3) that implements the new
sys_mark_dir_clean() syscall and O_CLEAN support in all open() variants,
just to have an idea of how it looks like roughly. (It's incomplete -
e.g. there's no explicit way to do an atomic unlink or rename.)
i've also attached dir-cache.c, a simple testcode for the new
functionality. It marks the current directory clean and tries to open
the "./1" file via O_CLEAN with 1 second delay. Start this in one shell
and do VFS-namespace modifying ops in another window (eg. "rm -f 2;
touch 2") and see the dir-cache code react to it - the 'clean' bit is
lost, and the file open-create does not succeed if the directory is not
clean.
there's a new dentry flag that is maintained under the directory's i_sem
semaphore. (It would be simpler to have the flag on the inode level,
that way the invalidation could be done as a simple filter to the
dnotify function.)
Ingo
[-- Attachment #2: dir-mark-clean-2.6.3-A3 --]
[-- Type: text/plain, Size: 4310 bytes --]
--- linux/arch/i386/kernel/entry.S.orig
+++ linux/arch/i386/kernel/entry.S
@@ -882,5 +882,6 @@ ENTRY(sys_call_table)
.long sys_utimes
.long sys_fadvise64_64
.long sys_ni_syscall /* sys_vserver */
+ .long sys_mark_dir_clean
syscall_table_size=(.-sys_call_table)
--- linux/include/linux/dcache.h.orig
+++ linux/include/linux/dcache.h
@@ -153,9 +153,25 @@ d_iput: no no yes
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
#define DCACHE_UNHASHED 0x0010
+#define DCACHE_USER_CLEAN 0x0020 /* userspace cache coherent */
extern spinlock_t dcache_lock;
+static inline void d_user_flush(struct dentry *dentry)
+{
+ dentry->d_vfs_flags &= ~DCACHE_USER_CLEAN;
+}
+
+static inline void d_user_mark_clean(struct dentry *dentry)
+{
+ dentry->d_vfs_flags |= DCACHE_USER_CLEAN;
+}
+
+static inline long d_user_valid(struct dentry *dentry)
+{
+ return (dentry->d_vfs_flags & DCACHE_USER_CLEAN) != 0;
+}
+
/**
* d_drop - drop a dentry
* @dentry: dentry to drop
--- linux/include/asm-generic/errno.h.orig
+++ linux/include/asm-generic/errno.h
@@ -96,5 +96,6 @@
#define ENOMEDIUM 123 /* No medium found */
#define EMEDIUMTYPE 124 /* Wrong medium type */
+#define EFLUSH 125 /* cache not valid */
#endif
--- linux/include/asm-i386/fcntl.h.orig
+++ linux/include/asm-i386/fcntl.h
@@ -20,6 +20,7 @@
#define O_LARGEFILE 0100000
#define O_DIRECTORY 0200000 /* must be a directory */
#define O_NOFOLLOW 0400000 /* don't follow links */
+#define O_CLEAN 01000000 /* parent dir must be clean */
#define F_DUPFD 0 /* dup */
#define F_GETFD 1 /* get close_on_exec */
--- linux/fs/open.c.orig
+++ linux/fs/open.c
@@ -747,6 +747,7 @@ struct file *filp_open(const char * file
namei_flags++;
if (namei_flags & O_TRUNC)
namei_flags |= 2;
+ namei_flags |= flags & O_CLEAN;
error = open_namei(filename, namei_flags, mode, &nd);
if (!error)
@@ -1029,6 +1030,26 @@ out_unlock:
EXPORT_SYMBOL(sys_close);
+asmlinkage long sys_mark_dir_clean(unsigned int fd)
+{
+ struct file *filp;
+ long ret = -EBADF;
+
+ filp = fget(fd);
+ if (!filp)
+ return ret;
+
+ down(&filp->f_dentry->d_inode->i_sem);
+ ret = d_user_valid(filp->f_dentry);
+ d_user_mark_clean(filp->f_dentry);
+ up(&filp->f_dentry->d_inode->i_sem);
+
+ fput(filp);
+
+ return ret;
+}
+
+
/*
* This routine simulates a hangup on the tty, to arrange that users
* are given clean terminals at login time.
--- linux/fs/namei.c.orig
+++ linux/fs/namei.c
@@ -1295,11 +1295,23 @@ do_last:
goto exit;
}
+ /*
+ * Did user-space require the parent directory to be clean
+ * but it was invalid?:
+ */
+ error = -EFLUSH;
+ if ((flag & O_CLEAN) && !d_user_valid(dir)) {
+ up(&dir->d_inode->i_sem);
+ goto exit;
+ }
+
/* Negative dentry, just create the file */
if (!dentry->d_inode) {
if (!IS_POSIXACL(dir->d_inode))
mode &= ~current->fs->umask;
error = vfs_create(dir->d_inode, dentry, mode, nd);
+ if (!error)
+ d_user_flush(dir);
up(&dir->d_inode->i_sem);
dput(nd->dentry);
nd->dentry = dentry;
@@ -1493,6 +1505,8 @@ asmlinkage long sys_mknod(const char __u
}
dput(dentry);
}
+ if (!error)
+ d_user_flush(nd.dentry);
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out:
@@ -1545,6 +1559,8 @@ asmlinkage long sys_mkdir(const char __u
if (!IS_POSIXACL(nd.dentry->d_inode))
mode &= ~current->fs->umask;
error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
+ if (!error)
+ d_user_flush(nd.dentry);
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
@@ -1653,6 +1669,8 @@ asmlinkage long sys_rmdir(const char __u
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_rmdir(nd.dentry->d_inode, dentry);
+ if (!error)
+ d_user_flush(nd.dentry);
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
@@ -1728,6 +1746,8 @@ asmlinkage long sys_unlink(const char __
if (inode)
atomic_inc(&inode->i_count);
error = vfs_unlink(nd.dentry->d_inode, dentry);
+ if (!error)
+ d_user_flush(nd.dentry);
exit2:
dput(dentry);
}
@@ -2099,6 +2119,10 @@ static inline int do_rename(const char *
error = vfs_rename(old_dir->d_inode, old_dentry,
new_dir->d_inode, new_dentry);
+ if (!error) {
+ d_user_flush(old_dir);
+ d_user_flush(new_dir);
+ }
exit5:
dput(new_dentry);
exit4:
[-- Attachment #3: dir-cache.c --]
[-- Type: text/plain, Size: 881 bytes --]
/*
* Copyright (C) Ingo Molnar, 2002
*/
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#include <sys/times.h>
#include <sys/wait.h>
#include <sys/ioctl.h>
#include <linux/unistd.h>
#define __NR_sys_mark_dir_clean 274
_syscall1(int, sys_mark_dir_clean, int, fd);
#define O_DIRECTORY 0200000 /* must be a directory */
#define O_CLEAN 01000000 /* parent dir must be clean */
int main(int argc, char **argv)
{
int fd, fd2, clean;
fd = open(".", O_RDONLY | O_DIRECTORY);
if (fd <= 0) {
perror("fd:");
exit(-1);
}
for (;;) {
clean = sys_mark_dir_clean(fd);
printf("clean:%d ", clean); fflush(stdout);
sleep(1);
fd2 = open("./1", O_CREAT|O_TRUNC|O_CLEAN, 0777);
close(fd2);
printf("fd:%d\n", fd2);
sleep(1);
}
return 0;
}
next prev parent reply other threads:[~2004-02-20 13:27 UTC|newest]
Thread overview: 123+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-02-17 4:12 UTF-8 and case-insensitivity tridge
2004-02-17 5:11 ` Linus Torvalds
2004-02-17 6:54 ` tridge
2004-02-17 8:33 ` Neil Brown
2004-02-17 22:48 ` tridge
2004-02-18 0:06 ` Neil Brown
2004-02-18 9:47 ` Helge Hafting
2004-02-17 15:13 ` Linus Torvalds
2004-02-17 16:57 ` Linus Torvalds
2004-02-17 19:44 ` viro
2004-02-17 20:10 ` Linus Torvalds
2004-02-17 20:17 ` viro
2004-02-17 20:23 ` Linus Torvalds
2004-02-17 21:08 ` Robin Rosenberg
2004-02-17 21:17 ` Linus Torvalds
2004-02-17 22:27 ` Robin Rosenberg
2004-02-18 3:02 ` tridge
2004-02-17 23:57 ` tridge
2004-02-17 23:20 ` tridge
2004-02-17 23:43 ` Linus Torvalds
2004-02-18 3:26 ` tridge
2004-02-18 5:33 ` H. Peter Anvin
2004-02-18 7:54 ` Marc Lehmann
2004-02-18 2:37 ` H. Peter Anvin
2004-02-18 3:03 ` Linus Torvalds
2004-02-18 3:14 ` H. Peter Anvin
2004-02-18 3:27 ` Linus Torvalds
2004-02-18 21:31 ` tridge
2004-02-18 22:23 ` Linus Torvalds
2004-02-18 22:28 ` Linus Torvalds
2004-02-18 22:50 ` tridge
2004-02-18 22:59 ` Linus Torvalds
2004-02-18 23:09 ` tridge
2004-02-18 23:16 ` Linus Torvalds
2004-02-19 8:10 ` Jamie Lokier
2004-02-19 16:09 ` Linus Torvalds
2004-02-19 16:38 ` Jamie Lokier
2004-02-19 16:54 ` Linus Torvalds
2004-02-19 18:29 ` Jamie Lokier
2004-02-19 19:48 ` Eureka! (was Re: UTF-8 and case-insensitivity) Linus Torvalds
2004-02-19 19:51 ` Linus Torvalds
2004-02-19 19:48 ` H. Peter Anvin
2004-02-19 20:04 ` Linus Torvalds
2004-02-19 20:05 ` viro
2004-02-19 20:23 ` Linus Torvalds
2004-02-19 20:32 ` Linus Torvalds
2004-02-19 20:45 ` viro
2004-02-19 21:26 ` Linus Torvalds
2004-02-19 21:38 ` Linus Torvalds
2004-02-19 21:45 ` Linus Torvalds
2004-02-19 21:43 ` viro
2004-02-19 21:53 ` Linus Torvalds
2004-02-19 22:21 ` David Lang
2004-02-19 20:48 ` Jamie Lokier
2004-02-19 21:30 ` Linus Torvalds
2004-02-20 0:00 ` Jamie Lokier
2004-02-20 0:17 ` Linus Torvalds
2004-02-20 0:24 ` Linus Torvalds
2004-02-20 0:30 ` Trond Myklebust
2004-02-20 0:54 ` Jamie Lokier
2004-02-20 0:57 ` tridge
2004-02-20 1:07 ` Paul Wagland
2004-02-20 13:31 ` Chris Wedgwood
2004-02-20 0:46 ` Jamie Lokier
2004-02-23 10:13 ` Tim Connors
2004-02-20 1:39 ` Junio C Hamano
2004-02-20 12:54 ` Jamie Lokier
2004-02-19 23:37 ` tridge
2004-02-20 0:02 ` Linus Torvalds
2004-02-20 0:16 ` tridge
2004-02-20 0:37 ` Linus Torvalds
2004-02-20 1:26 ` tridge
2004-02-20 1:07 ` H. Peter Anvin
2004-02-20 2:30 ` Theodore Ts'o
2004-02-20 12:04 ` explicit dcache <-> user-space cache coherency, sys_mark_dir_clean(), O_CLEAN Ingo Molnar
2004-02-20 13:19 ` Jamie Lokier
2004-02-20 13:37 ` Ingo Molnar
2004-02-20 14:00 ` Ingo Molnar
2004-02-20 16:31 ` Jamie Lokier
2004-02-20 13:23 ` Ingo Molnar [this message]
2004-02-20 18:00 ` [patch] " viro
2004-02-20 15:41 ` Linus Torvalds
2004-02-20 17:04 ` Ingo Molnar
2004-02-20 17:19 ` Linus Torvalds
2004-02-20 18:48 ` Ingo Molnar
2004-02-21 1:44 ` Jamie Lokier
2004-02-21 7:58 ` Ingo Molnar
2004-02-21 8:04 ` viro
2004-02-21 17:46 ` Ingo Molnar
2004-02-21 18:15 ` Linus Torvalds
2004-02-21 8:26 ` Keith Owens
2004-02-23 10:59 ` Pavel Machek
2004-02-23 13:55 ` Jamie Lokier
2004-02-23 16:45 ` Ingo Molnar
2004-02-23 17:32 ` Jamie Lokier
2004-02-20 23:00 ` tridge
2004-02-20 17:33 ` Jamie Lokier
2004-02-20 18:22 ` Linus Torvalds
2004-02-21 0:38 ` Jamie Lokier
2004-02-21 1:10 ` Linus Torvalds
2004-02-21 3:01 ` Jamie Lokier
2004-02-20 17:47 ` Jamie Lokier
2004-02-20 20:38 ` Christer Weinigel
2004-02-22 15:07 ` Jamie Lokier
2004-02-22 16:55 ` Miquel van Smoorenburg
2004-02-19 19:08 ` UTF-8 and case-insensitivity Helge Hafting
2004-02-18 4:08 ` tridge
2004-02-18 10:05 ` Robin Rosenberg
2004-02-18 11:43 ` tridge
2004-02-18 12:31 ` Robin Rosenberg
2004-02-18 16:48 ` H. Peter Anvin
2004-02-18 20:00 ` H. Peter Anvin
2004-02-19 2:53 ` Daniel Newby
2004-02-17 5:25 ` Tim Connors
2004-02-17 7:43 ` H. Peter Anvin
2004-02-17 8:05 ` H. Peter Anvin
2004-02-17 14:25 ` Dave Kleikamp
2004-02-18 0:16 ` Robert White
2004-02-18 0:20 ` Linus Torvalds
2004-02-18 1:03 ` Robert White
2004-02-18 21:48 ` Ville Herva
2004-02-18 2:48 ` tridge
2004-02-18 20:56 ` Robert White
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20040220132352.GA11618@elte.hu \
--to=mingo@elte.hu \
--cc=hpa@zytor.com \
--cc=jamie@shareable.org \
--cc=linux-kernel@vger.kernel.org \
--cc=torvalds@osdl.org \
--cc=tridge@samba.org \
--cc=viro@parcelfarce.linux.theplanet.co.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox