linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Attempt at "stat light" implementation
@ 2009-04-07  6:23 Oleg Drokin
  2009-04-07 10:23 ` Andreas Dilger
                   ` (3 more replies)
  0 siblings, 4 replies; 25+ messages in thread
From: Oleg Drokin @ 2009-04-07  6:23 UTC (permalink / raw)
  To: linux-fsdevel

[-- Attachment #1: Type: text/plain, Size: 1151 bytes --]

Hello!

  Just as we discussed earlier today, here is a simple version that should do everything we need.
  I have separated stat fields into arbitrary categories based on my idea of how this needs to be done,
  that could of course be refined in other ways if desired.
  Only x86* syscall tables updated for now and no 64bit syscalls (probably we don't need them anyway, just
  need to always return 64 bit structure).

  I quickly realized that perhaps we can use just one extra syscall for fstat, and stat/lstat could be
  implemented as statat with just current cwd, though I do not know how desirable that is.

  Also I though that it would be kind of useful to allow the bitmask to be compatible with existing statat
  flags usage so that we do not need a new statat.

  Also perhaps the ->getattr just needs a prototype change to accept the flags argument and if the FS cares
  about it - to use it, but I did not want to do this huge patch touching every FS right now if only to save
  my time should this approach to be determined as undesirable for one reason or another, so
  for now I add a ->getattr_light

  Any comments?

Bye,
    Oleg

[-- Attachment #2: stat_light.diff --]
[-- Type: text/plain, Size: 11379 bytes --]

--- ./arch/x86/ia32/ia32entry.S.orig	2009-04-07 01:49:27.000000000 -0400
+++ ./arch/x86/ia32/ia32entry.S	2009-04-07 01:52:44.000000000 -0400
@@ -828,4 +828,7 @@
 	.quad sys_dup3			/* 330 */
 	.quad sys_pipe2
 	.quad sys_inotify_init1
+	.quad sys_fstatlight
+	.quad sys_lstatlight
+	.quad sys_statlight
 ia32_syscall_end:
--- ./arch/x86/include/asm/unistd_32.h.orig	2009-04-07 01:26:02.000000000 -0400
+++ ./arch/x86/include/asm/unistd_32.h	2009-04-07 01:38:39.000000000 -0400
@@ -338,6 +338,9 @@
 #define __NR_dup3		330
 #define __NR_pipe2		331
 #define __NR_inotify_init1	332
+#define __NR_fstat_light	333
+#define __NR_lstat_light	334
+#define __NR_stat_light		335
 
 #ifdef __KERNEL__
 
--- ./arch/x86/include/asm/unistd_64.h.orig	2009-04-07 01:26:12.000000000 -0400
+++ ./arch/x86/include/asm/unistd_64.h	2009-04-07 01:53:40.000000000 -0400
@@ -653,7 +653,12 @@
 __SYSCALL(__NR_pipe2, sys_pipe2)
 #define __NR_inotify_init1			294
 __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
-
+#define __NR_fstat_light			295
+__SYSCALL(__NR_fstat_light, sys_fstatlight)
+#define __NR_lstat_light			296
+__SYSCALL(__NR_lstat_light, sys_lstatlight)
+#define __NR_stat_light				297
+__SYSCALL(__NR_inotify_init1, sys_statlight)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
--- ./fs/stat.c.orig	2009-04-07 00:23:22.000000000 -0400
+++ ./fs/stat.c	2009-04-07 01:51:49.000000000 -0400
@@ -37,7 +37,8 @@
 
 EXPORT_SYMBOL(generic_fillattr);
 
-int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat,
+		unsigned int flags)
 {
 	struct inode *inode = dentry->d_inode;
 	int retval;
@@ -46,6 +47,9 @@
 	if (retval)
 		return retval;
 
+	if (inode->i_op->getattr_light)
+		return inode->i_op->getattr_light(mnt, dentry, stat, flags);
+
 	if (inode->i_op->getattr)
 		return inode->i_op->getattr(mnt, dentry, stat);
 
@@ -55,53 +59,56 @@
 
 EXPORT_SYMBOL(vfs_getattr);
 
-int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
+int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat,
+		unsigned int flags)
 {
 	struct path path;
 	int error;
 
 	error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path);
 	if (!error) {
-		error = vfs_getattr(path.mnt, path.dentry, stat);
+		error = vfs_getattr(path.mnt, path.dentry, stat, flags);
 		path_put(&path);
 	}
 	return error;
 }
 
-int vfs_stat(char __user *name, struct kstat *stat)
+int vfs_stat(char __user *name, struct kstat *stat, unsigned int flags)
 {
-	return vfs_stat_fd(AT_FDCWD, name, stat);
+	return vfs_stat_fd(AT_FDCWD, name, stat, flags);
 }
 
 EXPORT_SYMBOL(vfs_stat);
 
-int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat)
+int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat,
+		 unsigned int flags)
 {
 	struct path path;
 	int error;
 
 	error = user_path_at(dfd, name, 0, &path);
 	if (!error) {
-		error = vfs_getattr(path.mnt, path.dentry, stat);
+		error = vfs_getattr(path.mnt, path.dentry, stat, flags);
 		path_put(&path);
 	}
 	return error;
 }
 
-int vfs_lstat(char __user *name, struct kstat *stat)
+int vfs_lstat(char __user *name, struct kstat *stat, unsigned int flags)
 {
-	return vfs_lstat_fd(AT_FDCWD, name, stat);
+	return vfs_lstat_fd(AT_FDCWD, name, stat, flags);
 }
 
 EXPORT_SYMBOL(vfs_lstat);
 
-int vfs_fstat(unsigned int fd, struct kstat *stat)
+int vfs_fstat(unsigned int fd, struct kstat *stat, unsigned int flags)
 {
 	struct file *f = fget(fd);
 	int error = -EBADF;
 
 	if (f) {
-		error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
+		error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat,
+				    flags);
 		fput(f);
 	}
 	return error;
@@ -155,7 +162,7 @@
 SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf)
 {
 	struct kstat stat;
-	int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
+	int error = vfs_stat_fd(AT_FDCWD, filename, &stat, STAT_EVERYTHING);
 
 	if (!error)
 		error = cp_old_stat(&stat, statbuf);
@@ -166,7 +173,7 @@
 SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf)
 {
 	struct kstat stat;
-	int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
+	int error = vfs_lstat_fd(AT_FDCWD, filename, &stat, STAT_EVERYTHING);
 
 	if (!error)
 		error = cp_old_stat(&stat, statbuf);
@@ -177,7 +184,7 @@
 SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf)
 {
 	struct kstat stat;
-	int error = vfs_fstat(fd, &stat);
+	int error = vfs_fstat(fd, &stat, STAT_EVERYTHING);
 
 	if (!error)
 		error = cp_old_stat(&stat, statbuf);
@@ -240,7 +247,7 @@
 SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf)
 {
 	struct kstat stat;
-	int error = vfs_stat_fd(AT_FDCWD, filename, &stat);
+	int error = vfs_stat_fd(AT_FDCWD, filename, &stat, STAT_EVERYTHING);
 
 	if (!error)
 		error = cp_new_stat(&stat, statbuf);
@@ -251,7 +258,7 @@
 SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf)
 {
 	struct kstat stat;
-	int error = vfs_lstat_fd(AT_FDCWD, filename, &stat);
+	int error = vfs_lstat_fd(AT_FDCWD, filename, &stat, STAT_EVERYTHING);
 
 	if (!error)
 		error = cp_new_stat(&stat, statbuf);
@@ -270,9 +277,9 @@
 		goto out;
 
 	if (flag & AT_SYMLINK_NOFOLLOW)
-		error = vfs_lstat_fd(dfd, filename, &stat);
+		error = vfs_lstat_fd(dfd, filename, &stat, STAT_EVERYTHING);
 	else
-		error = vfs_stat_fd(dfd, filename, &stat);
+		error = vfs_stat_fd(dfd, filename, &stat, STAT_EVERYTHING);
 
 	if (!error)
 		error = cp_new_stat(&stat, statbuf);
@@ -285,7 +292,43 @@
 SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf)
 {
 	struct kstat stat;
-	int error = vfs_fstat(fd, &stat);
+	int error = vfs_fstat(fd, &stat, STAT_EVERYTHING);
+
+	if (!error)
+		error = cp_new_stat(&stat, statbuf);
+
+	return error;
+}
+
+SYSCALL_DEFINE3(statlight, char __user *, filename,
+	        struct stat __user *, statbuf, unsigned int, flags)
+{
+	struct kstat stat;
+	int error = vfs_stat_fd(AT_FDCWD, filename, &stat, flags);
+
+	if (!error)
+		error = cp_new_stat(&stat, statbuf);
+
+	return error;
+}
+
+SYSCALL_DEFINE3(lstatlight, char __user *, filename,
+		struct stat __user *, statbuf, unsigned int, flags)
+{
+	struct kstat stat;
+	int error = vfs_lstat_fd(AT_FDCWD, filename, &stat, flags);
+
+	if (!error)
+		error = cp_new_stat(&stat, statbuf);
+
+	return error;
+}
+
+SYSCALL_DEFINE3(fstatlight, unsigned int, fd, struct stat __user *, statbuf,
+		unsigned int, flags)
+{
+	struct kstat stat;
+	int error = vfs_fstat(fd, &stat, flags);
 
 	if (!error)
 		error = cp_new_stat(&stat, statbuf);
@@ -370,7 +413,7 @@
 SYSCALL_DEFINE2(stat64, char __user *, filename, struct stat64 __user *, statbuf)
 {
 	struct kstat stat;
-	int error = vfs_stat(filename, &stat);
+	int error = vfs_stat(filename, &stat, STAT_EVERYTHING);
 
 	if (!error)
 		error = cp_new_stat64(&stat, statbuf);
@@ -381,7 +424,7 @@
 SYSCALL_DEFINE2(lstat64, char __user *, filename, struct stat64 __user *, statbuf)
 {
 	struct kstat stat;
-	int error = vfs_lstat(filename, &stat);
+	int error = vfs_lstat(filename, &stat, STAT_EVERYTHING);
 
 	if (!error)
 		error = cp_new_stat64(&stat, statbuf);
@@ -392,7 +435,7 @@
 SYSCALL_DEFINE2(fstat64, unsigned long, fd, struct stat64 __user *, statbuf)
 {
 	struct kstat stat;
-	int error = vfs_fstat(fd, &stat);
+	int error = vfs_fstat(fd, &stat, STAT_EVERYTHING);
 
 	if (!error)
 		error = cp_new_stat64(&stat, statbuf);
@@ -410,9 +453,9 @@
 		goto out;
 
 	if (flag & AT_SYMLINK_NOFOLLOW)
-		error = vfs_lstat_fd(dfd, filename, &stat);
+		error = vfs_lstat_fd(dfd, filename, &stat, STAT_EVERYTHING);
 	else
-		error = vfs_stat_fd(dfd, filename, &stat);
+		error = vfs_stat_fd(dfd, filename, &stat, STAT_EVERYTHING);
 
 	if (!error)
 		error = cp_new_stat64(&stat, statbuf);
--- ./include/linux/fcntl.h.orig	2009-04-07 00:31:36.000000000 -0400
+++ ./include/linux/fcntl.h	2009-04-07 00:44:41.000000000 -0400
@@ -40,6 +40,14 @@
                                            unlinking file.  */
 #define AT_SYMLINK_FOLLOW	0x400   /* Follow symbolic links.  */
 
+/* Bits to define what fields the "stat light" syscall must fill in struct stat,
+ * should not intersect with abovr AT_* bits */
+#define STAT_NLINK	0x01000		/* nlink */
+#define STAT_TIMES	0x02000		/* atime, mtime, ctime */
+#define STAT_SIZE	0x04000		/* File size and blocks */
+#define STAT_ACCESS	0x08000		/* mode, user, group */
+#define STAT_EVERYTHING 0x0f000		/* Just a normal stat */
+
 #ifdef __KERNEL__
 
 #ifndef force_o_largefile
--- ./include/linux/fs.h.orig	2009-04-07 00:30:52.000000000 -0400
+++ ./include/linux/fs.h	2009-04-07 01:03:58.000000000 -0400
@@ -1354,6 +1354,8 @@
 	int (*permission) (struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
+	int (*getattr_light) (struct vfsmount *mnt, struct dentry *,
+			      struct kstat *, unsigned int);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
 	ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
@@ -2062,7 +2064,8 @@
 extern const struct inode_operations page_symlink_inode_operations;
 extern int generic_readlink(struct dentry *, char __user *, int);
 extern void generic_fillattr(struct inode *, struct kstat *);
-extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *,
+		       unsigned int);
 void inode_add_bytes(struct inode *inode, loff_t bytes);
 void inode_sub_bytes(struct inode *inode, loff_t bytes);
 loff_t inode_get_bytes(struct inode *inode);
@@ -2070,11 +2073,11 @@
 
 extern int vfs_readdir(struct file *, filldir_t, void *);
 
-extern int vfs_stat(char __user *, struct kstat *);
-extern int vfs_lstat(char __user *, struct kstat *);
-extern int vfs_stat_fd(int dfd, char __user *, struct kstat *);
-extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *);
-extern int vfs_fstat(unsigned int, struct kstat *);
+extern int vfs_stat(char __user *, struct kstat *, unsigned int);
+extern int vfs_lstat(char __user *, struct kstat *, unsigned int);
+extern int vfs_stat_fd(int dfd, char __user *, struct kstat *, unsigned int);
+extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *, unsigned int);
+extern int vfs_fstat(unsigned int, struct kstat *, unsigned int);
 
 extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 		    unsigned long arg);
--- ./include/linux/syscalls.h.orig	2009-04-07 01:18:06.000000000 -0400
+++ ./include/linux/syscalls.h	2009-04-07 01:57:34.000000000 -0400
@@ -304,6 +304,13 @@
 				struct stat __user *statbuf);
 asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf);
 asmlinkage long sys_ustat(unsigned dev, struct ustat __user *ubuf);
+asmlinkage long sys_fstatlight(unsigned int fd,
+			       struct stat __user *statbuf,
+			       unsigned int);
+asmlinkage long sys_statlight(char __user *filename,
+			      struct stat __user *statbuf, unsigned int);
+asmlinkage long sys_lstatlight(char __user *filename,
+			       struct stat __user *statbuf, unsigned int);
 #if BITS_PER_LONG == 32
 asmlinkage long sys_stat64(char __user *filename,
 				struct stat64 __user *statbuf);

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07  6:23 Attempt at "stat light" implementation Oleg Drokin
@ 2009-04-07 10:23 ` Andreas Dilger
  2009-04-07 14:54   ` Oleg Drokin
  2009-04-07 15:22   ` jim owens
  2009-04-07 17:32 ` Jamie Lokier
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 25+ messages in thread
From: Andreas Dilger @ 2009-04-07 10:23 UTC (permalink / raw)
  To: Oleg Drokin; +Cc: linux-fsdevel

On Apr 07, 2009  10:23 +0400, Oleg Drokin wrote:
>   Also perhaps the ->getattr just needs a prototype change to accept the
>   flags argument and if the FS cares about it - to use it, but I did not
>   want to do this huge patch touching every FS right now if only to save
>   my time should this approach to be determined as undesirable for one
>   reason or another, so for now I add a ->getattr_light

You mean like open(path, flags, mode), where the @mode argument is
optional?  I think that has caused a lot of headaches in the past.

> +/* Bits to define what fields the "stat light" syscall must fill in
> + * struct stat, should not intersect with above AT_* bits */
> +#define STAT_NLINK	0x01000		/* nlink */
> +#define STAT_TIMES	0x02000		/* atime, mtime, ctime */
> +#define STAT_SIZE	0x04000		/* File size and blocks */
> +#define STAT_ACCESS	0x08000		/* mode, user, group */
> +#define STAT_EVERYTHING 0x0f000	/* Just a normal stat */

The very most common fields that will need to be returned are type and
mode, because "ls --color" (the default) does this.  These should get
their own flag at minimum, and should definitely be separate from uid/gid.  

One issue I recall with uid/gid is that in some environments the UID/GID
may be remapped for remote UID mapping (maybe with an LDAP or Kerberos
query for each one), and forcing the filesystem to do this when "ls"
is just going to throw them away is pointless.

Even the inode numbers cost a non-zero amount of work in some
filesystems that have to expose a persistent 32-bit mapping.

I'd prefer to have more fine grained control than this, since the
application knows what it wants, and it is impossible to predicy
what the "slow" operation is for a given filesystem/workload.

In the end it DOES seem that fstatat(2) is a sufficient interface for
this:

       int fstatat(int dirfd, const char *pathname, struct stat *buf,
		   int flags);

since it is working on the directory and passing in the filenames
for each file.  There is even a AT_SYMLINK_NOFOLLOW to avoid the
need to have a separate "l" version.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 10:23 ` Andreas Dilger
@ 2009-04-07 14:54   ` Oleg Drokin
  2009-04-07 17:52     ` Christoph Hellwig
  2009-04-07 15:22   ` jim owens
  1 sibling, 1 reply; 25+ messages in thread
From: Oleg Drokin @ 2009-04-07 14:54 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: linux-fsdevel

Hello!

On Apr 7, 2009, at 6:23 AM, Andreas Dilger wrote:

> On Apr 07, 2009  10:23 +0400, Oleg Drokin wrote:
>>  Also perhaps the ->getattr just needs a prototype change to accept  
>> the
>>  flags argument and if the FS cares about it - to use it, but I did  
>> not
>>  want to do this huge patch touching every FS right now if only to  
>> save
>>  my time should this approach to be determined as undesirable for one
>>  reason or another, so for now I add a ->getattr_light
> You mean like open(path, flags, mode), where the @mode argument is
> optional?  I think that has caused a lot of headaches in the past.

No, I mean, it would not be optional, just not every filesystem would  
care.
Say for a most local filesystems it is supposedly equal amount of work
to get just some inode field as it is to get entire struct stat, since
they need to just read the inode from disk (if not cached) anyway.

>> +/* Bits to define what fields the "stat light" syscall must fill in
>> + * struct stat, should not intersect with above AT_* bits */
>> +#define STAT_NLINK	0x01000		/* nlink */
>> +#define STAT_TIMES	0x02000		/* atime, mtime, ctime */
>> +#define STAT_SIZE	0x04000		/* File size and blocks */
>> +#define STAT_ACCESS	0x08000		/* mode, user, group */
>> +#define STAT_EVERYTHING 0x0f000	/* Just a normal stat */
> The very most common fields that will need to be returned are type and
> mode, because "ls --color" (the default) does this.  These should get
> their own flag at minimum, and should definitely be separate from  
> uid/gid.

Sure, we can do this. We can even have a flag per every field in
struct stat + more generic masks for commonly requested together
fields.

> I'd prefer to have more fine grained control than this, since the
> application knows what it wants, and it is impossible to predicy
> what the "slow" operation is for a given filesystem/workload.

Sure.

> In the end it DOES seem that fstatat(2) is a sufficient interface for
> this:
>
>       int fstatat(int dirfd, const char *pathname, struct stat *buf,
> 		   int flags);
> since it is working on the directory and passing in the filenames
> for each file.  There is even a AT_SYMLINK_NOFOLLOW to avoid the
> need to have a separate "l" version.

I thought about it, but unless we would allow userspace to pass in
the magic AT_FDCWD, they would still would need to open something
even if only once per run (and probably once per dir, to avoid
extra lookup overhead), and that is probably not very desirable.

Bye,
     Oleg

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 10:23 ` Andreas Dilger
  2009-04-07 14:54   ` Oleg Drokin
@ 2009-04-07 15:22   ` jim owens
  2009-04-07 15:38     ` Oleg Drokin
  1 sibling, 1 reply; 25+ messages in thread
From: jim owens @ 2009-04-07 15:22 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Oleg Drokin, linux-fsdevel

Sorry our travel budget didn't allow me to be there
for the spirited discussion ;)

I don't have anything against a selective stat function
except that it adds extra conditional code that my
experience says won't be much benefit.

We have a selective getattr in Tru64 for clusters with
the same idea that it would save a lot of expensive
operations in the cluster.

It did *not*.

The problems are:

   - applications are not smart (except maybe on luster).
   - applications find it easier and usually faster to get
     all the stat info and sort it themselves, particularly
     when they are tested exclusively on local systems.
   - who is going to fix (and keep fixed) those gui file managers
     (and stop users from "ls -l" or viewing properties)?
   - even internal kernel getattrs would often make a "light"
     call, then the next operation needed the "heavy" attrs,
     so we sent 2 RPCs instead of 1 and had to do a lot more
     complicated local attr cache management of what fields
     were valid in cache and what were not.

And I spent a lot of time fixing attr cache and cluster
locking problems in the cluster FS and NFS code.

So by trying to save expensive operations, it made it worse.

One practical suggestion: for fields that were not requested,
we sent back the "illegal/ignore" (usually -1) value that
could be used in a subsequent setattr.

jim

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 15:22   ` jim owens
@ 2009-04-07 15:38     ` Oleg Drokin
  2009-04-07 16:20       ` jim owens
  0 siblings, 1 reply; 25+ messages in thread
From: Oleg Drokin @ 2009-04-07 15:38 UTC (permalink / raw)
  To: jim owens; +Cc: Andreas Dilger, linux-fsdevel

Hello!

On Apr 7, 2009, at 11:22 AM, jim owens wrote:

>  - applications are not smart (except maybe on luster).
>  - applications find it easier and usually faster to get
>    all the stat info and sort it themselves, particularly
>    when they are tested exclusively on local systems.

No argument about these.

>  - who is going to fix (and keep fixed) those gui file managers
>    (and stop users from "ls -l" or viewing properties)?

The idea is once this gets upstream, ls (and then possibly
filemanagers) will start to use this call.
Then it will eventualy propagate to users the natural way.

>  - even internal kernel getattrs would often make a "light"
>    call, then the next operation needed the "heavy" attrs,
>    so we sent 2 RPCs instead of 1 and had to do a lot more
>    complicated local attr cache management of what fields
>    were valid in cache and what were not.

The thing is, e.g. in Lustre there are different kind of locks you
would get for different inode attributes and these 2 RPCs to
get them separately 5 minutes apart from each other would actually
save us some RPCs, since there would not be lock ping pong to
invalidate the locks held by a client protecting e.g. directory
nlink count (that the client does not care about) when another dir
is created in that dir.

> And I spent a lot of time fixing attr cache and cluster
> locking problems in the cluster FS and NFS code.

Well, since internally we represent only partial view of attributes
anyway, this is a not an issue for us.

Bye,
     Oleg

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 15:38     ` Oleg Drokin
@ 2009-04-07 16:20       ` jim owens
  0 siblings, 0 replies; 25+ messages in thread
From: jim owens @ 2009-04-07 16:20 UTC (permalink / raw)
  To: Oleg Drokin; +Cc: Andreas Dilger, linux-fsdevel

Oleg Drokin wrote:
> On Apr 7, 2009, at 11:22 AM, jim owens wrote:
> 
>>  - who is going to fix (and keep fixed) those gui file managers
>>    (and stop users from "ls -l" or viewing properties)?
> 
> The idea is once this gets upstream, ls (and then possibly
> filemanagers) will start to use this call.
> Then it will eventualy propagate to users the natural way.

you can lead them to water, but can't keep them from peeing in it :)

> The thing is, e.g. in Lustre there are different kind of locks you
> would get for different inode attributes and these 2 RPCs to
> get them separately 5 minutes apart from each other would actually
> save us some RPCs, since there would not be lock ping pong to
> invalidate the locks held by a client protecting e.g. directory
> nlink count (that the client does not care about) when another dir
> is created in that dir.

I would have been happy with more RPCs if we even got 10 seconds
of extra time on our shared lock before doing the invalidate and
switch to the exclusive heavy attr lock.  It was all the heavy
calls at subsecond intervals after the light ones that really
hurt.  But if you start seeing that kind of traffic I guess you
can always code it to change light requests to heavy ones and
cache them.

jim

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07  6:23 Attempt at "stat light" implementation Oleg Drokin
  2009-04-07 10:23 ` Andreas Dilger
@ 2009-04-07 17:32 ` Jamie Lokier
  2009-04-07 17:38   ` Jeff Garzik
  2009-04-07 17:41   ` Oleg Drokin
  2009-04-07 17:49 ` Christoph Hellwig
  2009-04-07 20:18 ` Evgeniy Polyakov
  3 siblings, 2 replies; 25+ messages in thread
From: Jamie Lokier @ 2009-04-07 17:32 UTC (permalink / raw)
  To: Oleg Drokin; +Cc: linux-fsdevel

Oleg Drokin wrote:
>   I have separated stat fields into arbitrary categories based on my idea of how this needs to be done,
>   that could of course be refined in other ways if desired.

Type should get its own category - which is finer grained than mode.
Type (dir, file, link, etc.) is sometimes a lot cheaper than the other
attributes to get, because on some filesystems you don't need to read
the inode to get the type.  See readdir() and d_type.

atime should get its own category, because on at least one filesystem
- Tux3 - atime is kept in a separate part of the storage.  Not reading
the atime if you don't care about it would be good.  Most programs
don't look at atime.

Separate categories for mtime and ctime make sense, although I don't
know of an implementation where it would make a performance difference
at present.

Once you have fine-grained selection of stat fields - it's natural to
ask why not allow _additional_ stat fields in an future-extensible
fashion?  A few things would be handy sometimes, such as inode
generation number, modification generation number (to detect changes
across reboots), and extra flags indicating COW or other properties.

If an "extensible" attribute is not asked for, the kernel must not
fill in that field of the structure as the app may be not have
allocated space for it.  Or it must use a tag-length-value sort of
structure for the result, similar to network CMSGs.

-- Jamie

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 17:32 ` Jamie Lokier
@ 2009-04-07 17:38   ` Jeff Garzik
  2009-04-07 17:56     ` Jamie Lokier
  2009-04-07 17:41   ` Oleg Drokin
  1 sibling, 1 reply; 25+ messages in thread
From: Jeff Garzik @ 2009-04-07 17:38 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: Oleg Drokin, linux-fsdevel, LKML

Jamie Lokier wrote:
> Once you have fine-grained selection of stat fields - it's natural to
> ask why not allow _additional_ stat fields in an future-extensible
> fashion?  A few things would be handy sometimes, such as inode
> generation number, modification generation number (to detect changes
> across reboots), and extra flags indicating COW or other properties.

That's quite an interesting thought...  until you run out of flags, the 
stat structure becomes a bit more flexible.

	Jeff




^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 17:32 ` Jamie Lokier
  2009-04-07 17:38   ` Jeff Garzik
@ 2009-04-07 17:41   ` Oleg Drokin
  2009-04-07 17:54     ` Jamie Lokier
                       ` (2 more replies)
  1 sibling, 3 replies; 25+ messages in thread
From: Oleg Drokin @ 2009-04-07 17:41 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: linux-fsdevel

Hello!

On Apr 7, 2009, at 1:32 PM, Jamie Lokier wrote:

> Oleg Drokin wrote:
>>  I have separated stat fields into arbitrary categories based on my  
>> idea of how this needs to be done,
>>  that could of course be refined in other ways if desired.
> Type should get its own category - which is finer grained than mode.
> Type (dir, file, link, etc.) is sometimes a lot cheaper than the other
> attributes to get, because on some filesystems you don't need to read
> the inode to get the type.  See readdir() and d_type.

Sure. In fact I not believe a flag per field would be the way.

> Once you have fine-grained selection of stat fields - it's natural to
> ask why not allow _additional_ stat fields in an future-extensible
> fashion?  A few things would be handy sometimes, such as inode
> generation number, modification generation number (to detect changes
> across reboots), and extra flags indicating COW or other properties.

Sure. When they append to sturct stat, they would add new bits.
We have 32 bits in the flags, then 3 bits are already used for AT*  
flags.
That leaves us with 29 bits for STAT_* (or perhaps AT_STAT) flags, and
we already use ~15 fields in the struct stat, do we have enough bits,
or should we increase the flags width right now while we are at it?

> If an "extensible" attribute is not asked for, the kernel must not
> fill in that field of the structure as the app may be not have
> allocated space for it.  Or it must use a tag-length-value sort of
> structure for the result, similar to network CMSGs.

The app allocates struct stat, whatever it is, I presume. There is a  
huge
benefit to actually reuse existing struct stat, I believe.

Thanks.

Bye,
     Oleg

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07  6:23 Attempt at "stat light" implementation Oleg Drokin
  2009-04-07 10:23 ` Andreas Dilger
  2009-04-07 17:32 ` Jamie Lokier
@ 2009-04-07 17:49 ` Christoph Hellwig
  2009-04-07 17:56   ` Oleg Drokin
  2009-04-07 19:47   ` Jeff Garzik
  2009-04-07 20:18 ` Evgeniy Polyakov
  3 siblings, 2 replies; 25+ messages in thread
From: Christoph Hellwig @ 2009-04-07 17:49 UTC (permalink / raw)
  To: Oleg Drokin; +Cc: linux-fsdevel

On Tue, Apr 07, 2009 at 10:23:56AM +0400, Oleg Drokin wrote:
> Hello!
> 
>   I quickly realized that perhaps we can use just one extra syscall for fstat, and stat/lstat could be
>   implemented as statat with just current cwd, though I do not know how desirable that is.
> 
>   Also I though that it would be kind of useful to allow the bitmask to be compatible with existing statat
>   flags usage so that we do not need a new statat.

I think the best way to do it is to just define additional flags for the
*statat family, that if present only request partial stat information.

>   Also perhaps the ->getattr just needs a prototype change to accept the flags argument and if the FS cares
>   about it - to use it, but I did not want to do this huge patch touching every FS right now if only to save
>   my time should this approach to be determined as undesirable for one reason or another, so
>   for now I add a ->getattr_light

Yes, no additional method please.  Adding the mask argument to getattr
is a pretty trivial search and replace.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 14:54   ` Oleg Drokin
@ 2009-04-07 17:52     ` Christoph Hellwig
  0 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2009-04-07 17:52 UTC (permalink / raw)
  To: Oleg Drokin; +Cc: Andreas Dilger, linux-fsdevel

On Tue, Apr 07, 2009 at 10:54:31AM -0400, Oleg Drokin wrote:
>> In the end it DOES seem that fstatat(2) is a sufficient interface for
>> this:
>>
>>       int fstatat(int dirfd, const char *pathname, struct stat *buf,
>> 		   int flags);
>> since it is working on the directory and passing in the filenames
>> for each file.  There is even a AT_SYMLINK_NOFOLLOW to avoid the
>> need to have a separate "l" version.
>
> I thought about it, but unless we would allow userspace to pass in
> the magic AT_FDCWD, they would still would need to open something
> even if only once per run (and probably once per dir, to avoid
> extra lookup overhead), and that is probably not very desirable.

AT_FDCWD is part of the Posix-specified *at interface and can be passed
in from userspace since day1.  Apropos Posix, you should probably talk
to Uli Drepper about exposing these things in glibc and possibly
standardizing it through Austin group (or whatever replaced it)

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 17:41   ` Oleg Drokin
@ 2009-04-07 17:54     ` Jamie Lokier
  2009-04-07 18:00       ` Oleg Drokin
  2009-04-07 18:18     ` Andreas Dilger
  2009-04-07 18:31     ` Nicholas Miell
  2 siblings, 1 reply; 25+ messages in thread
From: Jamie Lokier @ 2009-04-07 17:54 UTC (permalink / raw)
  To: Oleg Drokin; +Cc: linux-fsdevel

Oleg Drokin wrote:
> Hello!
> Sure. When they append to sturct stat, they would add new bits.
> We have 32 bits in the flags, then 3 bits are already used for AT*  
> flags.
> That leaves us with 29 bits for STAT_* (or perhaps AT_STAT) flags, and
> we already use ~15 fields in the struct stat, do we have enough bits,
> or should we increase the flags width right now while we are at it?

I don't see all 31 bits being used up in the foreseeable future.  So
it's fine as long as you (in future) make the last one
__AT_STAT_MORE_FLAGS causing an additional syscall argument to be
interpreted for more flags.  Glibc would wrap it up.

> >Once you have fine-grained selection of stat fields - it's natural to
> >ask why not allow _additional_ stat fields in an future-extensible
> >fashion?  A few things would be handy sometimes, such as inode
> >generation number, modification generation number (to detect changes
> >across reboots), and extra flags indicating COW or other properties.
> 
> >If an "extensible" attribute is not asked for, the kernel must not
> >fill in that field of the structure as the app may be not have
> >allocated space for it.  Or it must use a tag-length-value sort of
> >structure for the result, similar to network CMSGs.
> 
> The app allocates struct stat, whatever it is, I presume. There is a
> huge benefit to actually reuse existing struct stat, I believe.

I agree, but you can't fill in unwanted _additional_ fields if you add
any to the kernel's "struct stat" (or "struct stat_extended" which is
binary compatible but has additional fields).  If you do that, _old_
apps will allocate a smaller structure than the kernel is using.

-- Jamie

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 17:49 ` Christoph Hellwig
@ 2009-04-07 17:56   ` Oleg Drokin
  2009-04-07 18:28     ` Andreas Dilger
  2009-04-07 19:47   ` Jeff Garzik
  1 sibling, 1 reply; 25+ messages in thread
From: Oleg Drokin @ 2009-04-07 17:56 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel

Hello!

On Apr 7, 2009, at 1:49 PM, Christoph Hellwig wrote:
>>  I quickly realized that perhaps we can use just one extra syscall  
>> for fstat, and stat/lstat could be
>>  implemented as statat with just current cwd, though I do not know  
>> how desirable that is.
>>  Also I though that it would be kind of useful to allow the bitmask  
>> to be compatible with existing statat
>>  flags usage so that we do not need a new statat.
> I think the best way to do it is to just define additional flags for  
> the
> *statat family, that if present only request partial stat information.

But having to open every dir (or take an extra intermediate lookup hit)
might be a bad thing at times unless we officially allow to use a  
"magic"
"AT_FDCWD". What's your opinion on this?
Also we still need an fstat light version anyway, don't we?

Bye,
     Oleg

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 17:38   ` Jeff Garzik
@ 2009-04-07 17:56     ` Jamie Lokier
  2009-04-07 18:21       ` Andreas Dilger
  0 siblings, 1 reply; 25+ messages in thread
From: Jamie Lokier @ 2009-04-07 17:56 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Oleg Drokin, linux-fsdevel, LKML

Jeff Garzik wrote:
> Jamie Lokier wrote:
> >Once you have fine-grained selection of stat fields - it's natural to
> >ask why not allow _additional_ stat fields in an future-extensible
> >fashion?  A few things would be handy sometimes, such as inode
> >generation number, modification generation number (to detect changes
> >across reboots), and extra flags indicating COW or other properties.
> 
> That's quite an interesting thought...  until you run out of flags, the 
> stat structure becomes a bit more flexible.

I suspect at least one other unix or unix-like OS has done this
already, and it might be worth copying the interface.  But none come to
mind.

(I'm not counting DJGPP on MS-DOS ;-)

-- Jamie

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 17:54     ` Jamie Lokier
@ 2009-04-07 18:00       ` Oleg Drokin
  0 siblings, 0 replies; 25+ messages in thread
From: Oleg Drokin @ 2009-04-07 18:00 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: linux-fsdevel

Hello!

On Apr 7, 2009, at 1:54 PM, Jamie Lokier wrote:
>>> If an "extensible" attribute is not asked for, the kernel must not
>>> fill in that field of the structure as the app may be not have
>>> allocated space for it.  Or it must use a tag-length-value sort of
>>> structure for the result, similar to network CMSGs.
>> The app allocates struct stat, whatever it is, I presume. There is a
>> huge benefit to actually reuse existing struct stat, I believe.
> I agree, but you can't fill in unwanted _additional_ fields if you add
> any to the kernel's "struct stat" (or "struct stat_extended" which is
> binary compatible but has additional fields).  If you do that, _old_
> apps will allocate a smaller structure than the kernel is using.

If the actual struct stat is extended, then it is extended and we
would create additional syscalls to fill the new one anyway.
Of course it would be possible to just extend the *stat_light interface
with caring about what size of the stat_struct version to fill in
the future, but since there are no such fields yet, there is nothing
to code at the moment.

Bye,
     Oleg

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 17:41   ` Oleg Drokin
  2009-04-07 17:54     ` Jamie Lokier
@ 2009-04-07 18:18     ` Andreas Dilger
  2009-04-07 18:31     ` Nicholas Miell
  2 siblings, 0 replies; 25+ messages in thread
From: Andreas Dilger @ 2009-04-07 18:18 UTC (permalink / raw)
  To: Oleg Drokin; +Cc: Jamie Lokier, linux-fsdevel

On Apr 07, 2009  13:41 -0400, Oleg Drokin wrote:
> On Apr 7, 2009, at 1:32 PM, Jamie Lokier wrote:
>> If an "extensible" attribute is not asked for, the kernel must not
>> fill in that field of the structure as the app may be not have
>> allocated space for it.  Or it must use a tag-length-value sort of
>> structure for the result, similar to network CMSGs.
>
> The app allocates struct stat, whatever it is, I presume. There is a  
> huge benefit to actually reuse existing struct stat, I believe.

The point is that if the application is asking for specific fields,
it will also have allocated a stat struct that will have room for
those fields (sizeof(struct stat) would of course have to match
the AT_STAT_* flags that are available to the application).  New fields
(as opposed to using the reserved fields) would always be added at
the end.  If some flags are not set then it doesn't matter whether
the user struct is large enough because the fields will not be touched.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 17:56     ` Jamie Lokier
@ 2009-04-07 18:21       ` Andreas Dilger
  2009-04-07 19:30         ` Ulrich Drepper
  0 siblings, 1 reply; 25+ messages in thread
From: Andreas Dilger @ 2009-04-07 18:21 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: Jeff Garzik, Oleg Drokin, linux-fsdevel, LKML

On Apr 07, 2009  18:56 +0100, Jamie Lokier wrote:
> Jeff Garzik wrote:
> > Jamie Lokier wrote:
> > >Once you have fine-grained selection of stat fields - it's natural to
> > >ask why not allow _additional_ stat fields in an future-extensible
> > >fashion?  A few things would be handy sometimes, such as inode
> > >generation number, modification generation number (to detect changes
> > >across reboots), and extra flags indicating COW or other properties.
> > 
> > That's quite an interesting thought...  until you run out of flags, the 
> > stat structure becomes a bit more flexible.
> 
> I suspect at least one other unix or unix-like OS has done this
> already, and it might be worth copying the interface.  But none come to
> mind.

There is one in OS/X called getattrlist, but it is quite complex and
would require attribute handling wildly different than what the kernel
and *nix applications are doing:

http://www.manpagez.com/man/2/getattrlist/

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 17:56   ` Oleg Drokin
@ 2009-04-07 18:28     ` Andreas Dilger
  2009-04-07 18:50       ` Jamie Lokier
  2009-04-07 19:48       ` Jeff Garzik
  0 siblings, 2 replies; 25+ messages in thread
From: Andreas Dilger @ 2009-04-07 18:28 UTC (permalink / raw)
  To: Oleg Drokin; +Cc: Christoph Hellwig, linux-fsdevel

On Apr 07, 2009  13:56 -0400, Oleg Drokin wrote:
> On Apr 7, 2009, at 1:49 PM, Christoph Hellwig wrote:
>> I think the best way to do it is to just define additional flags for  
>> *statat family, that if present only request partial stat information.
>
> But having to open every dir (or take an extra intermediate lookup hit)
> might be a bad thing at times unless we officially allow to use a  
> "magic" > "AT_FDCWD". What's your opinion on this?

Generally, any application doing tree traversal will have the parent
directory open doing readdir() or getdents() so I don't think that is
a huge problem.

> Also we still need an fstat light version anyway, don't we?

Possibly, yes, though it isn't a strict requirement.  The man page
reports that fstatat() is only for directories, but possibly it
could also be used on regular files?

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 17:41   ` Oleg Drokin
  2009-04-07 17:54     ` Jamie Lokier
  2009-04-07 18:18     ` Andreas Dilger
@ 2009-04-07 18:31     ` Nicholas Miell
  2 siblings, 0 replies; 25+ messages in thread
From: Nicholas Miell @ 2009-04-07 18:31 UTC (permalink / raw)
  To: Oleg Drokin; +Cc: Jamie Lokier, linux-fsdevel

On Tue, 2009-04-07 at 13:41 -0400, Oleg Drokin wrote:
> Hello!
> 
> On Apr 7, 2009, at 1:32 PM, Jamie Lokier wrote:
> 
> > Oleg Drokin wrote:
> >>  I have separated stat fields into arbitrary categories based on my  
> >> idea of how this needs to be done,
> >>  that could of course be refined in other ways if desired.
> > Type should get its own category - which is finer grained than mode.
> > Type (dir, file, link, etc.) is sometimes a lot cheaper than the other
> > attributes to get, because on some filesystems you don't need to read
> > the inode to get the type.  See readdir() and d_type.
> 
> Sure. In fact I not believe a flag per field would be the way.
> 
> > Once you have fine-grained selection of stat fields - it's natural to
> > ask why not allow _additional_ stat fields in an future-extensible
> > fashion?  A few things would be handy sometimes, such as inode
> > generation number, modification generation number (to detect changes
> > across reboots), and extra flags indicating COW or other properties.
> 
> Sure. When they append to sturct stat, they would add new bits.
> We have 32 bits in the flags, then 3 bits are already used for AT*  
> flags.
> That leaves us with 29 bits for STAT_* (or perhaps AT_STAT) flags, and
> we already use ~15 fields in the struct stat, do we have enough bits,
> or should we increase the flags width right now while we are at it?
> 

You could introduce one xattr for each field and a getxattrvec() syscall
which returns multiple xattrs.

e.g.

struct attrvec
{
	const char *	name;	/* attribute name */
	void *		value;	/* attribute value buffer */
	size_t		size;	/* size of value buffer */
	error_t		error;	/* error -- 0, ENOATTR, ERANGE, etc. */	
};

int getxattrvec(const char *path, struct attrvec *attrs, size_t avcount);

time_t mtime;
mode_t mode;
off_t size;

struct attrvec av[] = {
	{ "system.st_mtime", &mtime, sizeof (mtime), 0 },
	{ "system.st_mode", &mode, sizeof (mode), 0 },
	{ "system.st_size", &size, sizeof (size), 0 }
}

getxattrvec("somefile", av, ARRAY_SIZE(av));

Although, I suspect coalescing multiple xattr requests into a single fs
RPC may get messy.

-- 
Nicholas Miell <nmiell@comcast.net>


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 18:28     ` Andreas Dilger
@ 2009-04-07 18:50       ` Jamie Lokier
  2009-04-07 19:48       ` Jeff Garzik
  1 sibling, 0 replies; 25+ messages in thread
From: Jamie Lokier @ 2009-04-07 18:50 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Oleg Drokin, Christoph Hellwig, linux-fsdevel

Andreas Dilger wrote:
> Generally, any application doing tree traversal will have the parent
> directory open doing readdir() or getdents() so I don't think that is
> a huge problem.

I can't think of any reason to disallow AT_FDCWD though.  It's already
there - are we proposing to take it away? :-)

> > Also we still need an fstat light version anyway, don't we?
> 
> Possibly, yes, though it isn't a strict requirement.  The man page
> reports that fstatat() is only for directories, but possibly it
> could also be used on regular files?

Suggestion: Passing AT_SELF (NULL) as the pathname means get the
attributes of the fd argument itself, instead of doing a name lookup.

-- Jamie

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 18:21       ` Andreas Dilger
@ 2009-04-07 19:30         ` Ulrich Drepper
  2009-04-12 20:55           ` Jamie Lokier
  0 siblings, 1 reply; 25+ messages in thread
From: Ulrich Drepper @ 2009-04-07 19:30 UTC (permalink / raw)
  To: Andreas Dilger
  Cc: Jamie Lokier, Jeff Garzik, Oleg Drokin, linux-fsdevel, LKML

On Tue, Apr 7, 2009 at 11:21 AM, Andreas Dilger <adilger@sun.com> wrote:
> There is one in OS/X called getattrlist, but it is quite complex and
> would require attribute handling wildly different than what the kernel
> and *nix applications are doing:
>
> http://www.manpagez.com/man/2/getattrlist/

I think this is overkill.  There are two different tasks to do:

- speed up stat by providing a subset of the information

- provide a mechanism to get arbitrary attribute support and efficient
ways to get to it


Trying to force both of these tasks into one interface will partially
defeat the purpose of the first, creating a fast stat replacement.  I
think these should be different interfaces (if the second type is
needed at all).

As for making the interface extendable.  In all the years there hasn't
really been much need to extend the stat structure (ignoring extended
attributes).  So, I'd not be worried about using a simple 32-bit
bitmask to select the fields to include in the fast stat.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 17:49 ` Christoph Hellwig
  2009-04-07 17:56   ` Oleg Drokin
@ 2009-04-07 19:47   ` Jeff Garzik
  1 sibling, 0 replies; 25+ messages in thread
From: Jeff Garzik @ 2009-04-07 19:47 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Oleg Drokin, linux-fsdevel

Christoph Hellwig wrote:
> Yes, no additional method please.  Adding the mask argument to getattr
> is a pretty trivial search and replace.

Agreed.  That was my reaction to the patch (Oleg's or Mark's, I forget) 
that added a new method.

	Jeff



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 18:28     ` Andreas Dilger
  2009-04-07 18:50       ` Jamie Lokier
@ 2009-04-07 19:48       ` Jeff Garzik
  1 sibling, 0 replies; 25+ messages in thread
From: Jeff Garzik @ 2009-04-07 19:48 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Oleg Drokin, Christoph Hellwig, linux-fsdevel

Andreas Dilger wrote:
> On Apr 07, 2009  13:56 -0400, Oleg Drokin wrote:
>> On Apr 7, 2009, at 1:49 PM, Christoph Hellwig wrote:
>>> I think the best way to do it is to just define additional flags for  
>>> *statat family, that if present only request partial stat information.
>> But having to open every dir (or take an extra intermediate lookup hit)
>> might be a bad thing at times unless we officially allow to use a  
>> "magic" > "AT_FDCWD". What's your opinion on this?
> 
> Generally, any application doing tree traversal will have the parent
> directory open doing readdir() or getdents() so I don't think that is
> a huge problem.

And if that application does not have a directory open, it is vulnerable 
to races...

	Jeff





^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07  6:23 Attempt at "stat light" implementation Oleg Drokin
                   ` (2 preceding siblings ...)
  2009-04-07 17:49 ` Christoph Hellwig
@ 2009-04-07 20:18 ` Evgeniy Polyakov
  3 siblings, 0 replies; 25+ messages in thread
From: Evgeniy Polyakov @ 2009-04-07 20:18 UTC (permalink / raw)
  To: Oleg Drokin; +Cc: linux-fsdevel

Hi Oleg.

On Tue, Apr 07, 2009 at 10:23:56AM +0400, Oleg Drokin (green@linuxhacker.ru) wrote:
>   I quickly realized that perhaps we can use just one extra syscall for fstat, and stat/lstat could be
>   implemented as statat with just current cwd, though I do not know how desirable that is.
> 
>   Also I though that it would be kind of useful to allow the bitmask to be compatible with existing statat
>   flags usage so that we do not need a new statat.

What if instead you add single additional fsatat() flag and check
provided stat structure for the data user requested to get?
I.e. if i_size is not zero, than user wants to get it, the same applies
to all other fields.

-- 
	Evgeniy Polyakov

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: Attempt at "stat light" implementation
  2009-04-07 19:30         ` Ulrich Drepper
@ 2009-04-12 20:55           ` Jamie Lokier
  0 siblings, 0 replies; 25+ messages in thread
From: Jamie Lokier @ 2009-04-12 20:55 UTC (permalink / raw)
  To: Ulrich Drepper
  Cc: Andreas Dilger, Jeff Garzik, Oleg Drokin, linux-fsdevel, LKML

Ulrich Drepper wrote:
> As for making the interface extendable.  In all the years there hasn't
> really been much need to extend the stat structure (ignoring extended
> attributes).  So, I'd not be worried about using a simple 32-bit
> bitmask to select the fields to include in the fast stat.

I agree about 32 bits being enough.

I think there has been some demand for new fields, but it's not been
strong enough to justify all the breakage from changing the normal
stat structure.

That's why we have generic filesystem ioctls: FS_IOC_GETFLAGS,
FS_IOC_GETVERSION, which return things which would naturally go with
stat if it were free to do so (but it isn't).

-- Jamie

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2009-04-12 20:55 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-04-07  6:23 Attempt at "stat light" implementation Oleg Drokin
2009-04-07 10:23 ` Andreas Dilger
2009-04-07 14:54   ` Oleg Drokin
2009-04-07 17:52     ` Christoph Hellwig
2009-04-07 15:22   ` jim owens
2009-04-07 15:38     ` Oleg Drokin
2009-04-07 16:20       ` jim owens
2009-04-07 17:32 ` Jamie Lokier
2009-04-07 17:38   ` Jeff Garzik
2009-04-07 17:56     ` Jamie Lokier
2009-04-07 18:21       ` Andreas Dilger
2009-04-07 19:30         ` Ulrich Drepper
2009-04-12 20:55           ` Jamie Lokier
2009-04-07 17:41   ` Oleg Drokin
2009-04-07 17:54     ` Jamie Lokier
2009-04-07 18:00       ` Oleg Drokin
2009-04-07 18:18     ` Andreas Dilger
2009-04-07 18:31     ` Nicholas Miell
2009-04-07 17:49 ` Christoph Hellwig
2009-04-07 17:56   ` Oleg Drokin
2009-04-07 18:28     ` Andreas Dilger
2009-04-07 18:50       ` Jamie Lokier
2009-04-07 19:48       ` Jeff Garzik
2009-04-07 19:47   ` Jeff Garzik
2009-04-07 20:18 ` Evgeniy Polyakov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).