All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@elte.hu>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Tridge <tridge@samba.org>,
	Al Viro <viro@parcelfarce.linux.theplanet.co.uk>,
	Jamie Lokier <jamie@shareable.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Jamie Lokier <jamie@shareable.org>
Subject: [patch] explicit dcache <-> user-space cache coherency, sys_mark_dir_clean(), O_CLEAN
Date: Fri, 20 Feb 2004 14:23:52 +0100	[thread overview]
Message-ID: <20040220132352.GA11618@elte.hu> (raw)
In-Reply-To: <20040220120417.GA4010@elte.hu>

[-- Attachment #1: Type: text/plain, Size: 1262 bytes --]


> What Samba needs is a way to tell between two points in time whether the
> directory contents have changed in any way - nothing more. Only one new
> syscall is used to maintain the Samba dcache:
> 
> 	long sys_mark_dir_clean(dirfd);

> this is how Samba could create a file atomically:
> 
> 	sys_create(name, mode | O_CLEAN);

i've attached a quick patch (against 2.6.3) that implements the new
sys_mark_dir_clean() syscall and O_CLEAN support in all open() variants,
just to have an idea of how it looks like roughly. (It's incomplete -
e.g. there's no explicit way to do an atomic unlink or rename.)

i've also attached dir-cache.c, a simple testcode for the new
functionality. It marks the current directory clean and tries to open
the "./1" file via O_CLEAN with 1 second delay. Start this in one shell
and do VFS-namespace modifying ops in another window (eg. "rm -f 2;
touch 2") and see the dir-cache code react to it - the 'clean' bit is
lost, and the file open-create does not succeed if the directory is not
clean.

there's a new dentry flag that is maintained under the directory's i_sem
semaphore. (It would be simpler to have the flag on the inode level,
that way the invalidation could be done as a simple filter to the
dnotify function.)

	Ingo

[-- Attachment #2: dir-mark-clean-2.6.3-A3 --]
[-- Type: text/plain, Size: 4310 bytes --]

--- linux/arch/i386/kernel/entry.S.orig	
+++ linux/arch/i386/kernel/entry.S	
@@ -882,5 +882,6 @@ ENTRY(sys_call_table)
 	.long sys_utimes
  	.long sys_fadvise64_64
 	.long sys_ni_syscall	/* sys_vserver */
+ 	.long sys_mark_dir_clean
 
 syscall_table_size=(.-sys_call_table)
--- linux/include/linux/dcache.h.orig	
+++ linux/include/linux/dcache.h	
@@ -153,9 +153,25 @@ d_iput:		no		no		yes
 
 #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
 #define DCACHE_UNHASHED		0x0010	
+#define DCACHE_USER_CLEAN	0x0020	/* userspace cache coherent */
 
 extern spinlock_t dcache_lock;
 
+static inline void d_user_flush(struct dentry *dentry)
+{
+	dentry->d_vfs_flags &= ~DCACHE_USER_CLEAN;
+}
+
+static inline void d_user_mark_clean(struct dentry *dentry)
+{
+	dentry->d_vfs_flags |= DCACHE_USER_CLEAN;
+}
+
+static inline long d_user_valid(struct dentry *dentry)
+{
+	return (dentry->d_vfs_flags & DCACHE_USER_CLEAN) != 0;
+}
+
 /**
  * d_drop - drop a dentry
  * @dentry: dentry to drop
--- linux/include/asm-generic/errno.h.orig	
+++ linux/include/asm-generic/errno.h	
@@ -96,5 +96,6 @@
 
 #define	ENOMEDIUM	123	/* No medium found */
 #define	EMEDIUMTYPE	124	/* Wrong medium type */
+#define	EFLUSH		125	/* cache not valid */
 
 #endif
--- linux/include/asm-i386/fcntl.h.orig	
+++ linux/include/asm-i386/fcntl.h	
@@ -20,6 +20,7 @@
 #define O_LARGEFILE	0100000
 #define O_DIRECTORY	0200000	/* must be a directory */
 #define O_NOFOLLOW	0400000 /* don't follow links */
+#define O_CLEAN	       01000000 /* parent dir must be clean */
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
--- linux/fs/open.c.orig	
+++ linux/fs/open.c	
@@ -747,6 +747,7 @@ struct file *filp_open(const char * file
 		namei_flags++;
 	if (namei_flags & O_TRUNC)
 		namei_flags |= 2;
+	namei_flags |= flags & O_CLEAN;
 
 	error = open_namei(filename, namei_flags, mode, &nd);
 	if (!error)
@@ -1029,6 +1030,26 @@ out_unlock:
 
 EXPORT_SYMBOL(sys_close);
 
+asmlinkage long sys_mark_dir_clean(unsigned int fd)
+{
+	struct file *filp;
+	long ret = -EBADF;
+
+	filp = fget(fd);
+	if (!filp)
+		return ret;
+
+	down(&filp->f_dentry->d_inode->i_sem);
+	ret = d_user_valid(filp->f_dentry);
+	d_user_mark_clean(filp->f_dentry);
+	up(&filp->f_dentry->d_inode->i_sem);
+
+	fput(filp);
+
+	return ret;
+}
+
+
 /*
  * This routine simulates a hangup on the tty, to arrange that users
  * are given clean terminals at login time.
--- linux/fs/namei.c.orig	
+++ linux/fs/namei.c	
@@ -1295,11 +1295,23 @@ do_last:
 		goto exit;
 	}
 
+	/*
+	 * Did user-space require the parent directory to be clean
+	 * but it was invalid?:
+	 */
+	error = -EFLUSH;
+	if ((flag & O_CLEAN) && !d_user_valid(dir)) {
+		up(&dir->d_inode->i_sem);
+		goto exit;
+	}
+
 	/* Negative dentry, just create the file */
 	if (!dentry->d_inode) {
 		if (!IS_POSIXACL(dir->d_inode))
 			mode &= ~current->fs->umask;
 		error = vfs_create(dir->d_inode, dentry, mode, nd);
+		if (!error)
+			d_user_flush(dir);
 		up(&dir->d_inode->i_sem);
 		dput(nd->dentry);
 		nd->dentry = dentry;
@@ -1493,6 +1505,8 @@ asmlinkage long sys_mknod(const char __u
 		}
 		dput(dentry);
 	}
+	if (!error)
+		d_user_flush(nd.dentry);
 	up(&nd.dentry->d_inode->i_sem);
 	path_release(&nd);
 out:
@@ -1545,6 +1559,8 @@ asmlinkage long sys_mkdir(const char __u
 			if (!IS_POSIXACL(nd.dentry->d_inode))
 				mode &= ~current->fs->umask;
 			error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
+			if (!error)
+				d_user_flush(nd.dentry);
 			dput(dentry);
 		}
 		up(&nd.dentry->d_inode->i_sem);
@@ -1653,6 +1669,8 @@ asmlinkage long sys_rmdir(const char __u
 	error = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
 		error = vfs_rmdir(nd.dentry->d_inode, dentry);
+		if (!error)
+			d_user_flush(nd.dentry);
 		dput(dentry);
 	}
 	up(&nd.dentry->d_inode->i_sem);
@@ -1728,6 +1746,8 @@ asmlinkage long sys_unlink(const char __
 		if (inode)
 			atomic_inc(&inode->i_count);
 		error = vfs_unlink(nd.dentry->d_inode, dentry);
+		if (!error)
+			d_user_flush(nd.dentry);
 	exit2:
 		dput(dentry);
 	}
@@ -2099,6 +2119,10 @@ static inline int do_rename(const char *
 
 	error = vfs_rename(old_dir->d_inode, old_dentry,
 				   new_dir->d_inode, new_dentry);
+	if (!error) {
+		d_user_flush(old_dir);
+		d_user_flush(new_dir);
+	}
 exit5:
 	dput(new_dentry);
 exit4:

[-- Attachment #3: dir-cache.c --]
[-- Type: text/plain, Size: 881 bytes --]

/*
 * Copyright (C) Ingo Molnar, 2002
 */
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#include <sys/times.h>
#include <sys/wait.h>
#include <sys/ioctl.h>
#include <linux/unistd.h>

#define __NR_sys_mark_dir_clean 274
_syscall1(int, sys_mark_dir_clean, int, fd);

#define O_DIRECTORY        0200000 /* must be a directory */

#define O_CLEAN        01000000 /* parent dir must be clean */

int main(int argc, char **argv)
{
	int fd, fd2, clean;

	fd = open(".", O_RDONLY | O_DIRECTORY);
	if (fd <= 0) {
		perror("fd:");
		exit(-1);
	}

	for (;;) {
		clean = sys_mark_dir_clean(fd);
		printf("clean:%d ", clean); fflush(stdout);
		sleep(1);
		fd2 = open("./1", O_CREAT|O_TRUNC|O_CLEAN, 0777);
		close(fd2);
		printf("fd:%d\n", fd2);
		sleep(1);
	}

	return 0;
}


  parent reply	other threads:[~2004-02-20 13:27 UTC|newest]

Thread overview: 123+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-02-17  4:12 UTF-8 and case-insensitivity tridge
2004-02-17  5:11 ` Linus Torvalds
2004-02-17  6:54   ` tridge
2004-02-17  8:33     ` Neil Brown
2004-02-17 22:48       ` tridge
2004-02-18  0:06         ` Neil Brown
2004-02-18  9:47           ` Helge Hafting
2004-02-17 15:13     ` Linus Torvalds
2004-02-17 16:57       ` Linus Torvalds
2004-02-17 19:44         ` viro
2004-02-17 20:10           ` Linus Torvalds
2004-02-17 20:17             ` viro
2004-02-17 20:23               ` Linus Torvalds
2004-02-17 21:08         ` Robin Rosenberg
2004-02-17 21:17           ` Linus Torvalds
2004-02-17 22:27             ` Robin Rosenberg
2004-02-18  3:02               ` tridge
2004-02-17 23:57         ` tridge
2004-02-17 23:20       ` tridge
2004-02-17 23:43         ` Linus Torvalds
2004-02-18  3:26           ` tridge
2004-02-18  5:33             ` H. Peter Anvin
2004-02-18  7:54             ` Marc Lehmann
2004-02-18  2:37         ` H. Peter Anvin
2004-02-18  3:03           ` Linus Torvalds
2004-02-18  3:14             ` H. Peter Anvin
2004-02-18  3:27               ` Linus Torvalds
2004-02-18 21:31                 ` tridge
2004-02-18 22:23                   ` Linus Torvalds
2004-02-18 22:28                     ` Linus Torvalds
2004-02-18 22:50                       ` tridge
2004-02-18 22:59                         ` Linus Torvalds
2004-02-18 23:09                           ` tridge
2004-02-18 23:16                             ` Linus Torvalds
2004-02-19  8:10                               ` Jamie Lokier
2004-02-19 16:09                                 ` Linus Torvalds
2004-02-19 16:38                                   ` Jamie Lokier
2004-02-19 16:54                                     ` Linus Torvalds
2004-02-19 18:29                                       ` Jamie Lokier
2004-02-19 19:48                                         ` Eureka! (was Re: UTF-8 and case-insensitivity) Linus Torvalds
2004-02-19 19:51                                           ` Linus Torvalds
2004-02-19 19:48                                             ` H. Peter Anvin
2004-02-19 20:04                                               ` Linus Torvalds
2004-02-19 20:05                                           ` viro
2004-02-19 20:23                                             ` Linus Torvalds
2004-02-19 20:32                                               ` Linus Torvalds
2004-02-19 20:45                                                 ` viro
2004-02-19 21:26                                                   ` Linus Torvalds
2004-02-19 21:38                                                     ` Linus Torvalds
2004-02-19 21:45                                                     ` Linus Torvalds
2004-02-19 21:43                                                       ` viro
2004-02-19 21:53                                                         ` Linus Torvalds
2004-02-19 22:21                                                           ` David Lang
2004-02-19 20:48                                                 ` Jamie Lokier
2004-02-19 21:30                                                   ` Linus Torvalds
2004-02-20  0:00                                                     ` Jamie Lokier
2004-02-20  0:17                                                       ` Linus Torvalds
2004-02-20  0:24                                                         ` Linus Torvalds
2004-02-20  0:30                                                           ` Trond Myklebust
2004-02-20  0:54                                                           ` Jamie Lokier
2004-02-20  0:57                                                           ` tridge
2004-02-20  1:07                                                           ` Paul Wagland
2004-02-20 13:31                                                           ` Chris Wedgwood
2004-02-20  0:46                                                         ` Jamie Lokier
2004-02-23 10:13                                                           ` Tim Connors
2004-02-20  1:39                                                     ` Junio C Hamano
2004-02-20 12:54                                                       ` Jamie Lokier
2004-02-19 23:37                                           ` tridge
2004-02-20  0:02                                             ` Linus Torvalds
2004-02-20  0:16                                               ` tridge
2004-02-20  0:37                                                 ` Linus Torvalds
2004-02-20  1:26                                                   ` tridge
2004-02-20  1:07                                               ` H. Peter Anvin
2004-02-20  2:30                                           ` Theodore Ts'o
2004-02-20 12:04                                           ` explicit dcache <-> user-space cache coherency, sys_mark_dir_clean(), O_CLEAN Ingo Molnar
2004-02-20 13:19                                             ` Jamie Lokier
2004-02-20 13:37                                               ` Ingo Molnar
2004-02-20 14:00                                                 ` Ingo Molnar
2004-02-20 16:31                                                 ` Jamie Lokier
2004-02-20 13:23                                             ` Ingo Molnar [this message]
2004-02-20 18:00                                               ` [patch] " viro
2004-02-20 15:41                                             ` Linus Torvalds
2004-02-20 17:04                                               ` Ingo Molnar
2004-02-20 17:19                                                 ` Linus Torvalds
2004-02-20 18:48                                                   ` Ingo Molnar
2004-02-21  1:44                                                     ` Jamie Lokier
2004-02-21  7:58                                                     ` Ingo Molnar
2004-02-21  8:04                                                       ` viro
2004-02-21 17:46                                                         ` Ingo Molnar
2004-02-21 18:15                                                         ` Linus Torvalds
2004-02-21  8:26                                                       ` Keith Owens
2004-02-23 10:59                                                       ` Pavel Machek
2004-02-23 13:55                                                         ` Jamie Lokier
2004-02-23 16:45                                                           ` Ingo Molnar
2004-02-23 17:32                                                             ` Jamie Lokier
2004-02-20 23:00                                                   ` tridge
2004-02-20 17:33                                               ` Jamie Lokier
2004-02-20 18:22                                                 ` Linus Torvalds
2004-02-21  0:38                                                   ` Jamie Lokier
2004-02-21  1:10                                                     ` Linus Torvalds
2004-02-21  3:01                                                       ` Jamie Lokier
2004-02-20 17:47                                               ` Jamie Lokier
2004-02-20 20:38                                             ` Christer Weinigel
2004-02-22 15:07                                               ` Jamie Lokier
2004-02-22 16:55                                                 ` Miquel van Smoorenburg
2004-02-19 19:08                                       ` UTF-8 and case-insensitivity Helge Hafting
2004-02-18  4:08           ` tridge
2004-02-18 10:05             ` Robin Rosenberg
2004-02-18 11:43               ` tridge
2004-02-18 12:31                 ` Robin Rosenberg
2004-02-18 16:48                   ` H. Peter Anvin
2004-02-18 20:00                     ` H. Peter Anvin
2004-02-19  2:53   ` Daniel Newby
2004-02-17  5:25 ` Tim Connors
2004-02-17  7:43 ` H. Peter Anvin
2004-02-17  8:05   ` H. Peter Anvin
2004-02-17 14:25 ` Dave Kleikamp
2004-02-18  0:16 ` Robert White
2004-02-18  0:20   ` Linus Torvalds
2004-02-18  1:03     ` Robert White
2004-02-18 21:48     ` Ville Herva
2004-02-18  2:48   ` tridge
2004-02-18 20:56     ` Robert White

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040220132352.GA11618@elte.hu \
    --to=mingo@elte.hu \
    --cc=hpa@zytor.com \
    --cc=jamie@shareable.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@osdl.org \
    --cc=tridge@samba.org \
    --cc=viro@parcelfarce.linux.theplanet.co.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.