public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch 13/13] lseek speedup
@ 2002-07-17  5:31 Andrew Morton
  2002-07-17  9:45 ` Anton Altaparmakov
  2002-07-17 10:54 ` Anton Altaparmakov
  0 siblings, 2 replies; 10+ messages in thread
From: Andrew Morton @ 2002-07-17  5:31 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: lkml



This is a fairly dopey patch to fix up the i_sem contention in lseek.
Better ideas are welcome, but I'm offline until Monday so don't think
I'm ignoring them...

We need to decide what we really want to lock in there.  If multiple
threads are updating f_pos and/or i_size at the same time then they are
going to see strange results whatever the kernel does.  I'd maintain
that the only real obligation which the kernel has in this situation is
to not oops, to not munch data and to not return competely outlandish
results.

And the only way we can return outlandish results is on 32-bit SMP if
one CPU reads i_size or f_pos while another CPU is in the middle of
modifying it.

- We only need to take i_sem for reading i_size on 32-bit machines
  with SMP or preempt, so add the entertaining down32() and up32()
  functions for that.

- Only take i_sem in the case where we actually need to read i_size:
  SEEK_END.

  So this patch only speeds up SEEK_SET, SEEK_CUR and SEEK_END on
  64-bit.  SEEK_END on 32-bit will continue to bounce i_sem around.

- Introduce a new spinlock in struct file.  Its scope is currently
  for providing atomicity to f_pos updates.

  This lock could also be used for guarding consistency of the
  readahead state.  But after a close walkthrough the readahead code I
  don't think readahead needs any locking (apart from the 32-bit reader
  of i_size!).  Any errors in there will be +/- 1 differences in the
  numbers, and readahead just fixes things like that up anyway.

- Fiddled with the layout of struct file in an attempt to make it
  more cache-friendly.



 fs/file_table.c    |    1 
 fs/read_write.c    |   86 ++++++++++++++++++++++++++++-------------------------
 include/linux/fs.h |   15 ++++++---
 3 files changed, 57 insertions(+), 45 deletions(-)

--- 2.5.26/fs/read_write.c~lseek-speedup	Tue Jul 16 21:47:22 2002
+++ 2.5.26-akpm/fs/read_write.c	Tue Jul 16 21:47:22 2002
@@ -20,53 +20,59 @@ struct file_operations generic_ro_fops =
 	mmap:		generic_file_mmap,
 };
 
+/*
+ * Assume reads and writes of long longs are atomic on 64-bit machines
+ */
+static inline void down32(struct semaphore *sem)
+{
+#if (BITS_PER_LONG != 64) && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT))
+	down(sem);
+#endif
+}
+
+static inline void up32(struct semaphore *sem)
+{
+#if (BITS_PER_LONG != 64) && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT))
+	up(sem);
+#endif
+}
+
 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
 {
-	long long retval;
+	loff_t retval;
 	struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
 
-	down(&inode->i_sem);
-	switch (origin) {
-		case 2:
-			offset += inode->i_size;
-			break;
-		case 1:
-			offset += file->f_pos;
+	if (origin == 2) {
+		down32(&inode->i_sem);
+		offset += inode->i_size;
+		up32(&inode->i_sem);
+		spin_lock(&file->f_lock);
+	} else if (origin == 1) {
+		spin_lock(&file->f_lock);
+		offset += file->f_pos;
+	} else {
+		spin_lock(&file->f_lock);
 	}
 	retval = -EINVAL;
-	if (offset>=0 && offset<=inode->i_sb->s_maxbytes) {
+	if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
 		if (offset != file->f_pos) {
 			file->f_pos = offset;
 			file->f_version = ++event;
 		}
 		retval = offset;
 	}
-	up(&inode->i_sem);
+	spin_unlock(&file->f_lock);
 	return retval;
 }
 
 loff_t remote_llseek(struct file *file, loff_t offset, int origin)
 {
-	long long retval;
+	loff_t ret;
 
 	lock_kernel();
-	switch (origin) {
-		case 2:
-			offset += file->f_dentry->d_inode->i_size;
-			break;
-		case 1:
-			offset += file->f_pos;
-	}
-	retval = -EINVAL;
-	if (offset>=0 && offset<=file->f_dentry->d_inode->i_sb->s_maxbytes) {
-		if (offset != file->f_pos) {
-			file->f_pos = offset;
-			file->f_version = ++event;
-		}
-		retval = offset;
-	}
+	ret = generic_file_llseek(file, offset, origin);
 	unlock_kernel();
-	return retval;
+	return ret;
 }
 
 loff_t no_llseek(struct file *file, loff_t offset, int origin)
@@ -76,15 +82,18 @@ loff_t no_llseek(struct file *file, loff
 
 loff_t default_llseek(struct file *file, loff_t offset, int origin)
 {
+	struct inode *inode = file->f_dentry->d_inode;
 	long long retval;
 
-	lock_kernel();
-	switch (origin) {
-		case 2:
-			offset += file->f_dentry->d_inode->i_size;
-			break;
-		case 1:
-			offset += file->f_pos;
+	if (origin == 2) {
+		down32(&inode->i_sem);
+		offset += inode->i_size;
+		up32(&inode->i_sem);
+	} else if (origin == 1) {
+		spin_lock(&file->f_lock);
+		offset += file->f_pos;
+	} else {
+		spin_lock(&file->f_lock);
 	}
 	retval = -EINVAL;
 	if (offset >= 0) {
@@ -94,18 +103,15 @@ loff_t default_llseek(struct file *file,
 		}
 		retval = offset;
 	}
-	unlock_kernel();
+	spin_unlock(&file->f_lock);
 	return retval;
 }
 
 static inline loff_t llseek(struct file *file, loff_t offset, int origin)
 {
-	loff_t (*fn)(struct file *, loff_t, int);
-
-	fn = default_llseek;
 	if (file->f_op && file->f_op->llseek)
-		fn = file->f_op->llseek;
-	return fn(file, offset, origin);
+		return (*file->f_op->llseek)(file, offset, origin);
+	return default_llseek(file, offset, origin);
 }
 
 asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin)
--- 2.5.26/include/linux/fs.h~lseek-speedup	Tue Jul 16 21:47:22 2002
+++ 2.5.26-akpm/include/linux/fs.h	Tue Jul 16 21:47:22 2002
@@ -475,20 +475,25 @@ struct file_ra_state {
 };
 
 struct file {
-	struct list_head	f_list;
 	struct dentry		*f_dentry;
+	struct list_head	f_list;
 	struct vfsmount         *f_vfsmnt;
-	struct file_operations	*f_op;
-	atomic_t		f_count;
+
 	unsigned int 		f_flags;
 	mode_t			f_mode;
+	struct file_operations	*f_op;
+	unsigned long		f_version;
+
+	atomic_t		f_count;
+	spinlock_t		f_lock;	/* for f_pos atomicity */
 	loff_t			f_pos;
+
 	struct fown_struct	f_owner;
-	unsigned int		f_uid, f_gid;
+	unsigned int		f_uid;
+	unsigned int		f_gid;
 	int			f_error;
 	struct file_ra_state	f_ra;
 
-	unsigned long		f_version;
 
 	/* needed for tty driver, and maybe others */
 	void			*private_data;
--- 2.5.26/fs/file_table.c~lseek-speedup	Tue Jul 16 21:47:22 2002
+++ 2.5.26-akpm/fs/file_table.c	Tue Jul 16 21:47:22 2002
@@ -44,6 +44,7 @@ struct file * get_empty_filp(void)
 	new_one:
 		memset(f, 0, sizeof(*f));
 		atomic_set(&f->f_count,1);
+		spin_lock_init(&f->f_lock);
 		f->f_version = ++event;
 		f->f_uid = current->fsuid;
 		f->f_gid = current->fsgid;

.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch 13/13] lseek speedup
  2002-07-17  5:31 [patch 13/13] lseek speedup Andrew Morton
@ 2002-07-17  9:45 ` Anton Altaparmakov
  2002-07-17 16:18   ` Andrew Morton
  2002-07-17 10:54 ` Anton Altaparmakov
  1 sibling, 1 reply; 10+ messages in thread
From: Anton Altaparmakov @ 2002-07-17  9:45 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, lkml

At 06:31 17/07/02, Andrew Morton wrote:
>This is a fairly dopey patch to fix up the i_sem contention in lseek.
>Better ideas are welcome, but I'm offline until Monday so don't think
>I'm ignoring them...

I am afraid I don't have any better ideas but I don't think your patch is 
safe. )-:

>We need to decide what we really want to lock in there.  If multiple
>threads are updating f_pos and/or i_size at the same time then they are
>going to see strange results whatever the kernel does.  I'd maintain
>that the only real obligation which the kernel has in this situation is
>to not oops, to not munch data and to not return competely outlandish
>results.

That can only be guaranteed by holding i_sem. Since the BKL was taken out 
of ->readdir(), i_sem is now used for exclusion between ->readdir() and 
->llseek() (well NTFS uses it anyway but many other fs which switched to 
generic_file_llseek() do the same). This exclusion is necessary IMHO.

->readdir() may well blow up or at the very least give "completely 
outlandish results" if someone modifies f_pos while it is running. 
Depending on when exactly it happens, ntfs_readdir() is going to be ok as 
it mostly sets f_pos instead of incrementing it but still, a succeeding 
->llseek(), may actually end up in a different position to the one 
requested because of a concurrent ->readdir().

I know doing the two things at the same time requires a completely crazy 
multithreaded application, but then again many application developers fall 
into that category. (-;

>And the only way we can return outlandish results is on 32-bit SMP if
>one CPU reads i_size or f_pos while another CPU is in the middle of
>modifying it.

I believe it can also happen if ->llseek() is allowed to run concurrently 
with a ->readdir()...

Best regards,

         Anton


-- 
   "I've not lost my mind. It's backed up on tape somewhere." - Unknown
-- 
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch 13/13] lseek speedup
  2002-07-17  5:31 [patch 13/13] lseek speedup Andrew Morton
  2002-07-17  9:45 ` Anton Altaparmakov
@ 2002-07-17 10:54 ` Anton Altaparmakov
  1 sibling, 0 replies; 10+ messages in thread
From: Anton Altaparmakov @ 2002-07-17 10:54 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, lkml

Hi Andrew,

At 06:31 17/07/02, Andrew Morton wrote:
>  static inline loff_t llseek(struct file *file, loff_t offset, int origin)
>  {
>-       loff_t (*fn)(struct file *, loff_t, int);
>-
>-       fn = default_llseek;
>         if (file->f_op && file->f_op->llseek)
>-               fn = file->f_op->llseek;
>-       return fn(file, offset, origin);
>+               return (*file->f_op->llseek)(file, offset, origin);
>+       return default_llseek(file, offset, origin);
>  }

This one is interesting. I have been wondering for quite a while whether 
constructs like the original one actually produce better machine code or 
whether they should all be cleaned up just as you do here. I was never 
bothered enough about it to try but I have half an hour spare at the moment 
so I tried both - using gcc-2.96, RedHat 7.3 latest version, ia32, 
compiling for Athlon.

While I cannot speak for other compilers, gcc-2.96 at least generates 
better code for the old llseek() than for the new llseek() proposed by the 
above patch snippet.

The old code has following advantages:
         - Has only one conditional jump (comparing to two in new code.
         - Has no unconditional jump (comparing to one in new code).
         - Uses 16 bytes less stack space.
         - Machine code is shorter.

Here are the relevant different sections in sys_llseek() taken from the gcc 
generated assembly files both with and without just the above patch snippet 
applied:

----old code----
         movl    $default_llseek, %esi
         movl    16(%ebx), %eax
         testl   %eax, %eax
         je      .L1427
         movl    4(%eax), %eax
         testl   %eax, %eax
         cmovne  %eax, %esi
.L1427:
         pushl   48(%esp)
         pushl   %ecx
         pushl   %edx
         pushl   12(%esp)
         call    *%esi

----new code----
         movl    16(%ebx), %eax
         testl   %eax, %eax
         je      .L1428
         movl    4(%eax), %eax
         testl   %eax, %eax
         je      .L1428
         pushl   64(%esp)
         pushl   %ecx
         pushl   %edx
         pushl   %ebx
         call    *%eax
         jmp     .L1442
         .p2align 4,,7
.L1428:
         pushl   64(%esp)
         pushl   %ecx
         pushl   %edx
         pushl   12(%esp)
         call    default_llseek
.L1442:

Obviously we are counting a few cycles difference only (although the fewer 
jmps could make a bigger difference if the branch prediction of the CPU 
doesn't match to the common usage patch and the pipe line is stalled) and 
sys_llseek() is not exactly the most performance critical code, however I 
thought it is interesting to note that the old code is a "coding style" 
which produces better machine code in general with (gcc-2.96)...

And also it provided me with something fun to do in the half hour I am 
waiting for my gel to run. (-8 Back to lab work...

Best regards,

         Anton


-- 
   "I've not lost my mind. It's backed up on tape somewhere." - Unknown
-- 
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch 13/13] lseek speedup
  2002-07-17  9:45 ` Anton Altaparmakov
@ 2002-07-17 16:18   ` Andrew Morton
  2002-07-17 16:20     ` Anton Altaparmakov
  0 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2002-07-17 16:18 UTC (permalink / raw)
  To: Anton Altaparmakov; +Cc: Linus Torvalds, lkml

Anton Altaparmakov wrote:
> 
> At 06:31 17/07/02, Andrew Morton wrote:
> >This is a fairly dopey patch to fix up the i_sem contention in lseek.
> >Better ideas are welcome, but I'm offline until Monday so don't think
> >I'm ignoring them...
> 
> I am afraid I don't have any better ideas but I don't think your patch is
> safe. )-:

It wasn't a very good idea in the first place.  Forgot to take
the new lock over in the updaters of f_pos.

And it's attempting to cater for a buggy application on a 32-bit
machine, SMP, where the fd is shared.  It's hard to justify
putting any locking in lseek for that.  (Then again, people
should use pread() more..)

Except for readdir().  Now, why are we taking i_sem for lseek/readdir
exclusion and not a per-file lock?

-

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch 13/13] lseek speedup
  2002-07-17 16:18   ` Andrew Morton
@ 2002-07-17 16:20     ` Anton Altaparmakov
  2002-07-17 16:49       ` Andrew Morton
  0 siblings, 1 reply; 10+ messages in thread
From: Anton Altaparmakov @ 2002-07-17 16:20 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, lkml

Hi Andrew,

At 17:18 17/07/02, Andrew Morton wrote:
>Anton Altaparmakov wrote:
> > At 06:31 17/07/02, Andrew Morton wrote:
> > >This is a fairly dopey patch to fix up the i_sem contention in lseek.
> > >Better ideas are welcome, but I'm offline until Monday so don't think
> > >I'm ignoring them...
> >
> > I am afraid I don't have any better ideas but I don't think your patch is
> > safe. )-:
>
>It wasn't a very good idea in the first place.  Forgot to take
>the new lock over in the updaters of f_pos.
>
>And it's attempting to cater for a buggy application on a 32-bit
>machine, SMP, where the fd is shared.  It's hard to justify
>putting any locking in lseek for that.  (Then again, people
>should use pread() more..)
>
>Except for readdir().  Now, why are we taking i_sem for lseek/readdir
>exclusion and not a per-file lock?

Because it also excludes against directory modifications, etc. Just imagine 
what "rm somefile" or "mv somefile otherfile" or "touch newfile" would do 
to the directory contents and what a concurrent readdir() would do... A 
very loud *BANG* is the only thing that springs to mind...

btw. the directory modification locking rules are written up in 
Documentation/filesystems/directory-locking by our very own VFS maintainer 
Al Viro himself... (-;

Cheers,

         Anton


-- 
   "I've not lost my mind. It's backed up on tape somewhere." - Unknown
-- 
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch 13/13] lseek speedup
  2002-07-17 16:20     ` Anton Altaparmakov
@ 2002-07-17 16:49       ` Andrew Morton
  2002-07-17 18:07         ` Anton Altaparmakov
  2002-07-17 18:27         ` Benjamin LaHaise
  0 siblings, 2 replies; 10+ messages in thread
From: Andrew Morton @ 2002-07-17 16:49 UTC (permalink / raw)
  To: Anton Altaparmakov; +Cc: Linus Torvalds, lkml

Anton Altaparmakov wrote:
> 
> >  Now, why are we taking i_sem for lseek/readdir
> >exclusion and not a per-file lock?
> 
> Because it also excludes against directory modifications, etc. Just imagine
> what "rm somefile" or "mv somefile otherfile" or "touch newfile" would do
> to the directory contents and what a concurrent readdir() would do... A
> very loud *BANG* is the only thing that springs to mind...

That's different.  i_size, contents of things, yes - i_sem for
those.

But protection of struct file should not be via any per-inode thing.

> btw. the directory modification locking rules are written up in
> Documentation/filesystems/directory-locking by our very own VFS maintainer
> Al Viro himself... (-;

Doesn't cover lseek...

-

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch 13/13] lseek speedup
  2002-07-17 16:49       ` Andrew Morton
@ 2002-07-17 18:07         ` Anton Altaparmakov
  2002-07-22  7:16           ` Andrew Morton
  2002-07-17 18:27         ` Benjamin LaHaise
  1 sibling, 1 reply; 10+ messages in thread
From: Anton Altaparmakov @ 2002-07-17 18:07 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, lkml

At 17:49 17/07/02, Andrew Morton wrote:
>Anton Altaparmakov wrote:
> >
> > >  Now, why are we taking i_sem for lseek/readdir
> > >exclusion and not a per-file lock?
> >
> > Because it also excludes against directory modifications, etc. Just imagine
> > what "rm somefile" or "mv somefile otherfile" or "touch newfile" would do
> > to the directory contents and what a concurrent readdir() would do... A
> > very loud *BANG* is the only thing that springs to mind...
>
>That's different.  i_size, contents of things, yes - i_sem for
>those.
>
>But protection of struct file should not be via any per-inode thing.

Oh, I see. But that would mean adding yet another lock to an existing 
locking scheme? So both i_sem and the "per file lock" would nead to be held 
over readdir(). That's doable but it would have to be a semaphore based 
lock due to readdir()...

Perhaps the f_lock from your patch, changed to a semaphore, and acquiring 
it in generic_file_llseek&friends, vfs_readdir() (and any other place where 
f_pos needs protecting) would be able to do the trick. Would that solve the 
lock contention problems you were seing? It would at least off-load i_sem a 
bit... but it would only replace one semaphore for another...

> > btw. the directory modification locking rules are written up in
> > Documentation/filesystems/directory-locking by our very own VFS maintainer
> > Al Viro himself... (-;
>
>Doesn't cover lseek...

Hm, true. I hadn't read it for quite a while...

         Anton


-- 
   "I've not lost my mind. It's backed up on tape somewhere." - Unknown
-- 
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch 13/13] lseek speedup
  2002-07-17 16:49       ` Andrew Morton
  2002-07-17 18:07         ` Anton Altaparmakov
@ 2002-07-17 18:27         ` Benjamin LaHaise
  1 sibling, 0 replies; 10+ messages in thread
From: Benjamin LaHaise @ 2002-07-17 18:27 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Anton Altaparmakov, Linus Torvalds, lkml

On Wed, Jul 17, 2002 at 09:49:40AM -0700, Andrew Morton wrote:
> But protection of struct file should not be via any per-inode thing.

Why not make use of set_64bit instead of using a spinlock?  It might 
need a companion get_64bit, but at least on 64 bit machines they could  
be defined to use a direct load/store.

		-ben
-- 
"You will be reincarnated as a toad; and you will be much happier."

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch 13/13] lseek speedup
  2002-07-17 18:07         ` Anton Altaparmakov
@ 2002-07-22  7:16           ` Andrew Morton
  2002-07-22  7:43             ` Anton Altaparmakov
  0 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2002-07-22  7:16 UTC (permalink / raw)
  To: Anton Altaparmakov; +Cc: Linus Torvalds, lkml

Anton Altaparmakov wrote:
> 
> At 17:49 17/07/02, Andrew Morton wrote:
> >Anton Altaparmakov wrote:
> > >
> > > >  Now, why are we taking i_sem for lseek/readdir
> > > >exclusion and not a per-file lock?
> > >
> > > Because it also excludes against directory modifications, etc. Just imagine
> > > what "rm somefile" or "mv somefile otherfile" or "touch newfile" would do
> > > to the directory contents and what a concurrent readdir() would do... A
> > > very loud *BANG* is the only thing that springs to mind...
> >
> >That's different.  i_size, contents of things, yes - i_sem for
> >those.
> >
> >But protection of struct file should not be via any per-inode thing.
> 
> Oh, I see. But that would mean adding yet another lock to an existing
> locking scheme? So both i_sem and the "per file lock" would nead to be held
> over readdir(). That's doable but it would have to be a semaphore based
> lock due to readdir()...

Adding a sleeping lock to struct file for this would make sense; using
i_sem to protect the innards of struct file ain't right.

However I'm not sure that the VFS needs to be serialising lseek
and readdir at all.  The fs can do that if it really needs to.

And does it really need to?  Setting aside the non-atomic 64-bit
read, it may be sufficient for the fs to do what ext2_readdir
does:  read f_pos once on entry, write it once on exit and if
there was a concurrent lseek then its results get stomped on.

Can you see any problem with that?

Anyway.  It's not exactly a top-priority thing.  I'll park
it for a while and just stop running that test ;)

-

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch 13/13] lseek speedup
  2002-07-22  7:16           ` Andrew Morton
@ 2002-07-22  7:43             ` Anton Altaparmakov
  0 siblings, 0 replies; 10+ messages in thread
From: Anton Altaparmakov @ 2002-07-22  7:43 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, lkml

At 08:16 22/07/02, Andrew Morton wrote:
>Anton Altaparmakov wrote:
> > At 17:49 17/07/02, Andrew Morton wrote:
> > >Anton Altaparmakov wrote:
> > > >
> > > > >  Now, why are we taking i_sem for lseek/readdir
> > > > >exclusion and not a per-file lock?
> > > >
> > > > Because it also excludes against directory modifications, etc. Just 
> imagine
> > > > what "rm somefile" or "mv somefile otherfile" or "touch newfile" 
> would do
> > > > to the directory contents and what a concurrent readdir() would do... A
> > > > very loud *BANG* is the only thing that springs to mind...
> > >
> > >That's different.  i_size, contents of things, yes - i_sem for
> > >those.
> > >
> > >But protection of struct file should not be via any per-inode thing.
> >
> > Oh, I see. But that would mean adding yet another lock to an existing
> > locking scheme? So both i_sem and the "per file lock" would nead to be held
> > over readdir(). That's doable but it would have to be a semaphore based
> > lock due to readdir()...
>
>Adding a sleeping lock to struct file for this would make sense; using
>i_sem to protect the innards of struct file ain't right.
>
>However I'm not sure that the VFS needs to be serialising lseek
>and readdir at all.  The fs can do that if it really needs to.
>
>And does it really need to?  Setting aside the non-atomic 64-bit
>read, it may be sufficient for the fs to do what ext2_readdir
>does:  read f_pos once on entry, write it once on exit and if
>there was a concurrent lseek then its results get stomped on.
>
>Can you see any problem with that?

No problem. NTFS can very easily be turned into doing that. It would need 
to happen to all file systems and perhaps a little documentation in the 
porting document to say so for out of kernel fs...

>Anyway.  It's not exactly a top-priority thing.  I'll park
>it for a while and just stop running that test ;)

Well, I will get ntfs_readdir changed to only read/write f_pos once on 
entry/exit respectively anyway...

Anton


-- 
   "I've not lost my mind. It's backed up on tape somewhere." - Unknown
-- 
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2002-07-22  7:40 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-07-17  5:31 [patch 13/13] lseek speedup Andrew Morton
2002-07-17  9:45 ` Anton Altaparmakov
2002-07-17 16:18   ` Andrew Morton
2002-07-17 16:20     ` Anton Altaparmakov
2002-07-17 16:49       ` Andrew Morton
2002-07-17 18:07         ` Anton Altaparmakov
2002-07-22  7:16           ` Andrew Morton
2002-07-22  7:43             ` Anton Altaparmakov
2002-07-17 18:27         ` Benjamin LaHaise
2002-07-17 10:54 ` Anton Altaparmakov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox