public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@elte.hu>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Tridge <tridge@samba.org>,
	Al Viro <viro@parcelfarce.linux.theplanet.co.uk>,
	Jamie Lokier <jamie@shareable.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Jamie Lokier <jamie@shareable.org>
Subject: [patch] explicit dcache <-> user-space cache coherency, sys_mark_dir_clean(), O_CLEAN
Date: Fri, 20 Feb 2004 14:23:52 +0100	[thread overview]
Message-ID: <20040220132352.GA11618@elte.hu> (raw)
In-Reply-To: <20040220120417.GA4010@elte.hu>

[-- Attachment #1: Type: text/plain, Size: 1262 bytes --]


> What Samba needs is a way to tell between two points in time whether the
> directory contents have changed in any way - nothing more. Only one new
> syscall is used to maintain the Samba dcache:
> 
> 	long sys_mark_dir_clean(dirfd);

> this is how Samba could create a file atomically:
> 
> 	sys_create(name, mode | O_CLEAN);

i've attached a quick patch (against 2.6.3) that implements the new
sys_mark_dir_clean() syscall and O_CLEAN support in all open() variants,
just to have an idea of how it looks like roughly. (It's incomplete -
e.g. there's no explicit way to do an atomic unlink or rename.)

i've also attached dir-cache.c, a simple testcode for the new
functionality. It marks the current directory clean and tries to open
the "./1" file via O_CLEAN with 1 second delay. Start this in one shell
and do VFS-namespace modifying ops in another window (eg. "rm -f 2;
touch 2") and see the dir-cache code react to it - the 'clean' bit is
lost, and the file open-create does not succeed if the directory is not
clean.

there's a new dentry flag that is maintained under the directory's i_sem
semaphore. (It would be simpler to have the flag on the inode level,
that way the invalidation could be done as a simple filter to the
dnotify function.)

	Ingo

[-- Attachment #2: dir-mark-clean-2.6.3-A3 --]
[-- Type: text/plain, Size: 4310 bytes --]

--- linux/arch/i386/kernel/entry.S.orig	
+++ linux/arch/i386/kernel/entry.S	
@@ -882,5 +882,6 @@ ENTRY(sys_call_table)
 	.long sys_utimes
  	.long sys_fadvise64_64
 	.long sys_ni_syscall	/* sys_vserver */
+ 	.long sys_mark_dir_clean
 
 syscall_table_size=(.-sys_call_table)
--- linux/include/linux/dcache.h.orig	
+++ linux/include/linux/dcache.h	
@@ -153,9 +153,25 @@ d_iput:		no		no		yes
 
 #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
 #define DCACHE_UNHASHED		0x0010	
+#define DCACHE_USER_CLEAN	0x0020	/* userspace cache coherent */
 
 extern spinlock_t dcache_lock;
 
+static inline void d_user_flush(struct dentry *dentry)
+{
+	dentry->d_vfs_flags &= ~DCACHE_USER_CLEAN;
+}
+
+static inline void d_user_mark_clean(struct dentry *dentry)
+{
+	dentry->d_vfs_flags |= DCACHE_USER_CLEAN;
+}
+
+static inline long d_user_valid(struct dentry *dentry)
+{
+	return (dentry->d_vfs_flags & DCACHE_USER_CLEAN) != 0;
+}
+
 /**
  * d_drop - drop a dentry
  * @dentry: dentry to drop
--- linux/include/asm-generic/errno.h.orig	
+++ linux/include/asm-generic/errno.h	
@@ -96,5 +96,6 @@
 
 #define	ENOMEDIUM	123	/* No medium found */
 #define	EMEDIUMTYPE	124	/* Wrong medium type */
+#define	EFLUSH		125	/* cache not valid */
 
 #endif
--- linux/include/asm-i386/fcntl.h.orig	
+++ linux/include/asm-i386/fcntl.h	
@@ -20,6 +20,7 @@
 #define O_LARGEFILE	0100000
 #define O_DIRECTORY	0200000	/* must be a directory */
 #define O_NOFOLLOW	0400000 /* don't follow links */
+#define O_CLEAN	       01000000 /* parent dir must be clean */
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
--- linux/fs/open.c.orig	
+++ linux/fs/open.c	
@@ -747,6 +747,7 @@ struct file *filp_open(const char * file
 		namei_flags++;
 	if (namei_flags & O_TRUNC)
 		namei_flags |= 2;
+	namei_flags |= flags & O_CLEAN;
 
 	error = open_namei(filename, namei_flags, mode, &nd);
 	if (!error)
@@ -1029,6 +1030,26 @@ out_unlock:
 
 EXPORT_SYMBOL(sys_close);
 
+asmlinkage long sys_mark_dir_clean(unsigned int fd)
+{
+	struct file *filp;
+	long ret = -EBADF;
+
+	filp = fget(fd);
+	if (!filp)
+		return ret;
+
+	down(&filp->f_dentry->d_inode->i_sem);
+	ret = d_user_valid(filp->f_dentry);
+	d_user_mark_clean(filp->f_dentry);
+	up(&filp->f_dentry->d_inode->i_sem);
+
+	fput(filp);
+
+	return ret;
+}
+
+
 /*
  * This routine simulates a hangup on the tty, to arrange that users
  * are given clean terminals at login time.
--- linux/fs/namei.c.orig	
+++ linux/fs/namei.c	
@@ -1295,11 +1295,23 @@ do_last:
 		goto exit;
 	}
 
+	/*
+	 * Did user-space require the parent directory to be clean
+	 * but it was invalid?:
+	 */
+	error = -EFLUSH;
+	if ((flag & O_CLEAN) && !d_user_valid(dir)) {
+		up(&dir->d_inode->i_sem);
+		goto exit;
+	}
+
 	/* Negative dentry, just create the file */
 	if (!dentry->d_inode) {
 		if (!IS_POSIXACL(dir->d_inode))
 			mode &= ~current->fs->umask;
 		error = vfs_create(dir->d_inode, dentry, mode, nd);
+		if (!error)
+			d_user_flush(dir);
 		up(&dir->d_inode->i_sem);
 		dput(nd->dentry);
 		nd->dentry = dentry;
@@ -1493,6 +1505,8 @@ asmlinkage long sys_mknod(const char __u
 		}
 		dput(dentry);
 	}
+	if (!error)
+		d_user_flush(nd.dentry);
 	up(&nd.dentry->d_inode->i_sem);
 	path_release(&nd);
 out:
@@ -1545,6 +1559,8 @@ asmlinkage long sys_mkdir(const char __u
 			if (!IS_POSIXACL(nd.dentry->d_inode))
 				mode &= ~current->fs->umask;
 			error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
+			if (!error)
+				d_user_flush(nd.dentry);
 			dput(dentry);
 		}
 		up(&nd.dentry->d_inode->i_sem);
@@ -1653,6 +1669,8 @@ asmlinkage long sys_rmdir(const char __u
 	error = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
 		error = vfs_rmdir(nd.dentry->d_inode, dentry);
+		if (!error)
+			d_user_flush(nd.dentry);
 		dput(dentry);
 	}
 	up(&nd.dentry->d_inode->i_sem);
@@ -1728,6 +1746,8 @@ asmlinkage long sys_unlink(const char __
 		if (inode)
 			atomic_inc(&inode->i_count);
 		error = vfs_unlink(nd.dentry->d_inode, dentry);
+		if (!error)
+			d_user_flush(nd.dentry);
 	exit2:
 		dput(dentry);
 	}
@@ -2099,6 +2119,10 @@ static inline int do_rename(const char *
 
 	error = vfs_rename(old_dir->d_inode, old_dentry,
 				   new_dir->d_inode, new_dentry);
+	if (!error) {
+		d_user_flush(old_dir);
+		d_user_flush(new_dir);
+	}
 exit5:
 	dput(new_dentry);
 exit4:

[-- Attachment #3: dir-cache.c --]
[-- Type: text/plain, Size: 881 bytes --]

/*
 * Copyright (C) Ingo Molnar, 2002
 */
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#include <sys/times.h>
#include <sys/wait.h>
#include <sys/ioctl.h>
#include <linux/unistd.h>

#define __NR_sys_mark_dir_clean 274
_syscall1(int, sys_mark_dir_clean, int, fd);

#define O_DIRECTORY        0200000 /* must be a directory */

#define O_CLEAN        01000000 /* parent dir must be clean */

int main(int argc, char **argv)
{
	int fd, fd2, clean;

	fd = open(".", O_RDONLY | O_DIRECTORY);
	if (fd <= 0) {
		perror("fd:");
		exit(-1);
	}

	for (;;) {
		clean = sys_mark_dir_clean(fd);
		printf("clean:%d ", clean); fflush(stdout);
		sleep(1);
		fd2 = open("./1", O_CREAT|O_TRUNC|O_CLEAN, 0777);
		close(fd2);
		printf("fd:%d\n", fd2);
		sleep(1);
	}

	return 0;
}


  parent reply	other threads:[~2004-02-20 13:27 UTC|newest]

Thread overview: 123+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-02-17  4:12 UTF-8 and case-insensitivity tridge
2004-02-17  5:11 ` Linus Torvalds
2004-02-17  6:54   ` tridge
2004-02-17  8:33     ` Neil Brown
2004-02-17 22:48       ` tridge
2004-02-18  0:06         ` Neil Brown
2004-02-18  9:47           ` Helge Hafting
2004-02-17 15:13     ` Linus Torvalds
2004-02-17 16:57       ` Linus Torvalds
2004-02-17 19:44         ` viro
2004-02-17 20:10           ` Linus Torvalds
2004-02-17 20:17             ` viro
2004-02-17 20:23               ` Linus Torvalds
2004-02-17 21:08         ` Robin Rosenberg
2004-02-17 21:17           ` Linus Torvalds
2004-02-17 22:27             ` Robin Rosenberg
2004-02-18  3:02               ` tridge
2004-02-17 23:57         ` tridge
2004-02-17 23:20       ` tridge
2004-02-17 23:43         ` Linus Torvalds
2004-02-18  3:26           ` tridge
2004-02-18  5:33             ` H. Peter Anvin
2004-02-18  7:54             ` Marc Lehmann
2004-02-18  2:37         ` H. Peter Anvin
2004-02-18  3:03           ` Linus Torvalds
2004-02-18  3:14             ` H. Peter Anvin
2004-02-18  3:27               ` Linus Torvalds
2004-02-18 21:31                 ` tridge
2004-02-18 22:23                   ` Linus Torvalds
2004-02-18 22:28                     ` Linus Torvalds
2004-02-18 22:50                       ` tridge
2004-02-18 22:59                         ` Linus Torvalds
2004-02-18 23:09                           ` tridge
2004-02-18 23:16                             ` Linus Torvalds
2004-02-19  8:10                               ` Jamie Lokier
2004-02-19 16:09                                 ` Linus Torvalds
2004-02-19 16:38                                   ` Jamie Lokier
2004-02-19 16:54                                     ` Linus Torvalds
2004-02-19 18:29                                       ` Jamie Lokier
2004-02-19 19:48                                         ` Eureka! (was Re: UTF-8 and case-insensitivity) Linus Torvalds
2004-02-19 19:51                                           ` Linus Torvalds
2004-02-19 19:48                                             ` H. Peter Anvin
2004-02-19 20:04                                               ` Linus Torvalds
2004-02-19 20:05                                           ` viro
2004-02-19 20:23                                             ` Linus Torvalds
2004-02-19 20:32                                               ` Linus Torvalds
2004-02-19 20:45                                                 ` viro
2004-02-19 21:26                                                   ` Linus Torvalds
2004-02-19 21:38                                                     ` Linus Torvalds
2004-02-19 21:45                                                     ` Linus Torvalds
2004-02-19 21:43                                                       ` viro
2004-02-19 21:53                                                         ` Linus Torvalds
2004-02-19 22:21                                                           ` David Lang
2004-02-19 20:48                                                 ` Jamie Lokier
2004-02-19 21:30                                                   ` Linus Torvalds
2004-02-20  0:00                                                     ` Jamie Lokier
2004-02-20  0:17                                                       ` Linus Torvalds
2004-02-20  0:24                                                         ` Linus Torvalds
2004-02-20  0:30                                                           ` Trond Myklebust
2004-02-20  0:54                                                           ` Jamie Lokier
2004-02-20  0:57                                                           ` tridge
2004-02-20  1:07                                                           ` Paul Wagland
2004-02-20 13:31                                                           ` Chris Wedgwood
2004-02-20  0:46                                                         ` Jamie Lokier
2004-02-23 10:13                                                           ` Tim Connors
2004-02-20  1:39                                                     ` Junio C Hamano
2004-02-20 12:54                                                       ` Jamie Lokier
2004-02-19 23:37                                           ` tridge
2004-02-20  0:02                                             ` Linus Torvalds
2004-02-20  0:16                                               ` tridge
2004-02-20  0:37                                                 ` Linus Torvalds
2004-02-20  1:26                                                   ` tridge
2004-02-20  1:07                                               ` H. Peter Anvin
2004-02-20  2:30                                           ` Theodore Ts'o
2004-02-20 12:04                                           ` explicit dcache <-> user-space cache coherency, sys_mark_dir_clean(), O_CLEAN Ingo Molnar
2004-02-20 13:19                                             ` Jamie Lokier
2004-02-20 13:37                                               ` Ingo Molnar
2004-02-20 14:00                                                 ` Ingo Molnar
2004-02-20 16:31                                                 ` Jamie Lokier
2004-02-20 13:23                                             ` Ingo Molnar [this message]
2004-02-20 18:00                                               ` [patch] " viro
2004-02-20 15:41                                             ` Linus Torvalds
2004-02-20 17:04                                               ` Ingo Molnar
2004-02-20 17:19                                                 ` Linus Torvalds
2004-02-20 18:48                                                   ` Ingo Molnar
2004-02-21  1:44                                                     ` Jamie Lokier
2004-02-21  7:58                                                     ` Ingo Molnar
2004-02-21  8:04                                                       ` viro
2004-02-21 17:46                                                         ` Ingo Molnar
2004-02-21 18:15                                                         ` Linus Torvalds
2004-02-21  8:26                                                       ` Keith Owens
2004-02-23 10:59                                                       ` Pavel Machek
2004-02-23 13:55                                                         ` Jamie Lokier
2004-02-23 16:45                                                           ` Ingo Molnar
2004-02-23 17:32                                                             ` Jamie Lokier
2004-02-20 23:00                                                   ` tridge
2004-02-20 17:33                                               ` Jamie Lokier
2004-02-20 18:22                                                 ` Linus Torvalds
2004-02-21  0:38                                                   ` Jamie Lokier
2004-02-21  1:10                                                     ` Linus Torvalds
2004-02-21  3:01                                                       ` Jamie Lokier
2004-02-20 17:47                                               ` Jamie Lokier
2004-02-20 20:38                                             ` Christer Weinigel
2004-02-22 15:07                                               ` Jamie Lokier
2004-02-22 16:55                                                 ` Miquel van Smoorenburg
2004-02-19 19:08                                       ` UTF-8 and case-insensitivity Helge Hafting
2004-02-18  4:08           ` tridge
2004-02-18 10:05             ` Robin Rosenberg
2004-02-18 11:43               ` tridge
2004-02-18 12:31                 ` Robin Rosenberg
2004-02-18 16:48                   ` H. Peter Anvin
2004-02-18 20:00                     ` H. Peter Anvin
2004-02-19  2:53   ` Daniel Newby
2004-02-17  5:25 ` Tim Connors
2004-02-17  7:43 ` H. Peter Anvin
2004-02-17  8:05   ` H. Peter Anvin
2004-02-17 14:25 ` Dave Kleikamp
2004-02-18  0:16 ` Robert White
2004-02-18  0:20   ` Linus Torvalds
2004-02-18  1:03     ` Robert White
2004-02-18 21:48     ` Ville Herva
2004-02-18  2:48   ` tridge
2004-02-18 20:56     ` Robert White

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040220132352.GA11618@elte.hu \
    --to=mingo@elte.hu \
    --cc=hpa@zytor.com \
    --cc=jamie@shareable.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@osdl.org \
    --cc=tridge@samba.org \
    --cc=viro@parcelfarce.linux.theplanet.co.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox