linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RESEND] [PATCH] lseek: remove i_mutex
@ 2009-02-03  8:04 Hisashi Hifumi
  2009-02-05 20:05 ` Andrew Morton
  0 siblings, 1 reply; 8+ messages in thread
From: Hisashi Hifumi @ 2009-02-03  8:04 UTC (permalink / raw)
  To: akpm, linux-kernel, linux-fsdevel

Hi 
I removed i_mutex from generic_file_llseek.
I think that the reason of protecting lseek with i_mutex is just
touching i_size atomically.

So I introduce i_size_read here so i_mutex is no longer needed.

Following patch removes i_mutex from generic_file_llseek, and deletes 
generic_file_llseek_nolock totally.

Currently there is i_mutex contention not only around lseek, but also fsync or write.
So,  I think we can mitigate i_mutex contention between fsync lseek and write by
removing i_mutex.

Thanks.

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>

diff -Nrup linux-2.6.29-rc3.org/fs/cifs/cifsfs.c linux-2.6.29-rc3.lseek/fs/cifs/cifsfs.c
--- linux-2.6.29-rc3.org/fs/cifs/cifsfs.c	2009-02-02 16:53:19.000000000 +0900
+++ linux-2.6.29-rc3.lseek/fs/cifs/cifsfs.c	2009-02-02 20:25:33.000000000 +0900
@@ -636,7 +636,7 @@ static loff_t cifs_llseek(struct file *f
 		if (retval < 0)
 			return (loff_t)retval;
 	}
-	return generic_file_llseek_unlocked(file, offset, origin);
+	return generic_file_llseek(file, offset, origin);
 }
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
diff -Nrup linux-2.6.29-rc3.org/fs/gfs2/ops_file.c linux-2.6.29-rc3.lseek/fs/gfs2/ops_file.c
--- linux-2.6.29-rc3.org/fs/gfs2/ops_file.c	2009-02-02 16:53:19.000000000 +0900
+++ linux-2.6.29-rc3.lseek/fs/gfs2/ops_file.c	2009-02-02 20:25:33.000000000 +0900
@@ -62,11 +62,11 @@ static loff_t gfs2_llseek(struct file *f
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
 					   &i_gh);
 		if (!error) {
-			error = generic_file_llseek_unlocked(file, offset, origin);
+			error = generic_file_llseek(file, offset, origin);
 			gfs2_glock_dq_uninit(&i_gh);
 		}
 	} else
-		error = generic_file_llseek_unlocked(file, offset, origin);
+		error = generic_file_llseek(file, offset, origin);
 
 	return error;
 }
diff -Nrup linux-2.6.29-rc3.org/fs/ncpfs/file.c linux-2.6.29-rc3.lseek/fs/ncpfs/file.c
--- linux-2.6.29-rc3.org/fs/ncpfs/file.c	2008-12-25 08:26:37.000000000 +0900
+++ linux-2.6.29-rc3.lseek/fs/ncpfs/file.c	2009-02-02 20:25:33.000000000 +0900
@@ -286,7 +286,7 @@ static loff_t ncp_remote_llseek(struct f
 {
 	loff_t ret;
 	lock_kernel();
-	ret = generic_file_llseek_unlocked(file, offset, origin);
+	ret = generic_file_llseek(file, offset, origin);
 	unlock_kernel();
 	return ret;
 }
diff -Nrup linux-2.6.29-rc3.org/fs/nfs/file.c linux-2.6.29-rc3.lseek/fs/nfs/file.c
--- linux-2.6.29-rc3.org/fs/nfs/file.c	2009-02-02 16:53:19.000000000 +0900
+++ linux-2.6.29-rc3.lseek/fs/nfs/file.c	2009-02-02 20:25:33.000000000 +0900
@@ -194,10 +194,10 @@ static loff_t nfs_file_llseek(struct fil
 			return (loff_t)retval;
 
 		spin_lock(&inode->i_lock);
-		loff = generic_file_llseek_unlocked(filp, offset, origin);
+		loff = generic_file_llseek(filp, offset, origin);
 		spin_unlock(&inode->i_lock);
 	} else
-		loff = generic_file_llseek_unlocked(filp, offset, origin);
+		loff = generic_file_llseek(filp, offset, origin);
 	return loff;
 }
 
diff -Nrup linux-2.6.29-rc3.org/fs/read_write.c linux-2.6.29-rc3.lseek/fs/read_write.c
--- linux-2.6.29-rc3.org/fs/read_write.c	2009-02-02 16:53:19.000000000 +0900
+++ linux-2.6.29-rc3.lseek/fs/read_write.c	2009-02-02 20:25:33.000000000 +0900
@@ -32,22 +32,23 @@ const struct file_operations generic_ro_
 EXPORT_SYMBOL(generic_ro_fops);
 
 /**
- * generic_file_llseek_unlocked - lockless generic llseek implementation
+ * generic_file_llseek - generic llseek implementation for regular files
  * @file:	file structure to seek on
  * @offset:	file offset to seek to
  * @origin:	type of seek
  *
- * Updates the file offset to the value specified by @offset and @origin.
- * Locking must be provided by the caller.
+ * This is a generic implemenation of ->llseek useable for all normal local
+ * filesystems.  It just updates the file offset to the value specified by
+ * @offset and @origin.
  */
-loff_t
-generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
+loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
 {
+	loff_t rval;
 	struct inode *inode = file->f_mapping->host;
 
 	switch (origin) {
 	case SEEK_END:
-		offset += inode->i_size;
+		offset += i_size_read(inode);
 		break;
 	case SEEK_CUR:
 		/*
@@ -70,28 +71,7 @@ generic_file_llseek_unlocked(struct file
 		file->f_pos = offset;
 		file->f_version = 0;
 	}
-
-	return offset;
-}
-EXPORT_SYMBOL(generic_file_llseek_unlocked);
-
-/**
- * generic_file_llseek - generic llseek implementation for regular files
- * @file:	file structure to seek on
- * @offset:	file offset to seek to
- * @origin:	type of seek
- *
- * This is a generic implemenation of ->llseek useable for all normal local
- * filesystems.  It just updates the file offset to the value specified by
- * @offset and @origin under i_mutex.
- */
-loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
-{
-	loff_t rval;
-
-	mutex_lock(&file->f_dentry->d_inode->i_mutex);
-	rval = generic_file_llseek_unlocked(file, offset, origin);
-	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+	rval = offset;
 
 	return rval;
 }
diff -Nrup linux-2.6.29-rc3.org/fs/smbfs/file.c linux-2.6.29-rc3.lseek/fs/smbfs/file.c
--- linux-2.6.29-rc3.org/fs/smbfs/file.c	2009-02-02 16:53:19.000000000 +0900
+++ linux-2.6.29-rc3.lseek/fs/smbfs/file.c	2009-02-02 20:25:33.000000000 +0900
@@ -426,7 +426,7 @@ static loff_t smb_remote_llseek(struct f
 {
 	loff_t ret;
 	lock_kernel();
-	ret = generic_file_llseek_unlocked(file, offset, origin);
+	ret = generic_file_llseek(file, offset, origin);
 	unlock_kernel();
 	return ret;
 }
diff -Nrup linux-2.6.29-rc3.org/include/linux/fs.h linux-2.6.29-rc3.lseek/include/linux/fs.h
--- linux-2.6.29-rc3.org/include/linux/fs.h	2009-02-02 16:53:21.000000000 +0900
+++ linux-2.6.29-rc3.lseek/include/linux/fs.h	2009-02-02 20:25:33.000000000 +0900
@@ -1982,8 +1982,6 @@ extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
 extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
-extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset,
-			int origin);
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RESEND] [PATCH] lseek: remove i_mutex
  2009-02-03  8:04 [RESEND] [PATCH] " Hisashi Hifumi
@ 2009-02-05 20:05 ` Andrew Morton
  2009-02-06  0:20   ` Hisashi Hifumi
  0 siblings, 1 reply; 8+ messages in thread
From: Andrew Morton @ 2009-02-05 20:05 UTC (permalink / raw)
  To: Hisashi Hifumi; +Cc: linux-kernel, linux-fsdevel

On Tue, 03 Feb 2009 17:04:40 +0900
Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp> wrote:

> I removed i_mutex from generic_file_llseek.
> I think that the reason of protecting lseek with i_mutex is just
> touching i_size atomically.
> 
> So I introduce i_size_read here so i_mutex is no longer needed.
> 
> Following patch removes i_mutex from generic_file_llseek, and deletes 
> generic_file_llseek_nolock totally.
> 
> Currently there is i_mutex contention not only around lseek, but also fsync or write.
> So,  I think we can mitigate i_mutex contention between fsync lseek and write by
> removing i_mutex.

Prior to this change, generic_file_llseek() modified file->f_pos
atomically with respect to other i_mutex holders.

After this change, it doesn't.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RESEND] [PATCH] lseek: remove i_mutex
  2009-02-05 20:05 ` Andrew Morton
@ 2009-02-06  0:20   ` Hisashi Hifumi
  2009-02-06  0:33     ` Andrew Morton
  0 siblings, 1 reply; 8+ messages in thread
From: Hisashi Hifumi @ 2009-02-06  0:20 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, linux-fsdevel


At 05:05 09/02/06, Andrew Morton wrote:
>On Tue, 03 Feb 2009 17:04:40 +0900
>Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp> wrote:
>
>> I removed i_mutex from generic_file_llseek.
>> I think that the reason of protecting lseek with i_mutex is just
>> touching i_size atomically.
>> 
>> So I introduce i_size_read here so i_mutex is no longer needed.
>> 
>> Following patch removes i_mutex from generic_file_llseek, and deletes 
>> generic_file_llseek_nolock totally.
>> 
>> Currently there is i_mutex contention not only around lseek, but also 
>fsync or write.
>> So,  I think we can mitigate i_mutex contention between fsync lseek and 
>write by
>> removing i_mutex.
>
>Prior to this change, generic_file_llseek() modified file->f_pos
>atomically with respect to other i_mutex holders.
>
>After this change, it doesn't.

Hi Andrew.

Even before this change is applied, file->f_pos access is not atomic.
sys_read change f_pos value through file_pos_write without i_mutex.
I think seqlock is needed to make f_pos access atomic.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RESEND] [PATCH] lseek: remove i_mutex
  2009-02-06  0:20   ` Hisashi Hifumi
@ 2009-02-06  0:33     ` Andrew Morton
  0 siblings, 0 replies; 8+ messages in thread
From: Andrew Morton @ 2009-02-06  0:33 UTC (permalink / raw)
  To: Hisashi Hifumi; +Cc: linux-kernel, linux-fsdevel

On Fri, 06 Feb 2009 09:20:30 +0900
Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp> wrote:

> 
> At 05:05 09/02/06, Andrew Morton wrote:
> >On Tue, 03 Feb 2009 17:04:40 +0900
> >Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp> wrote:
> >
> >> I removed i_mutex from generic_file_llseek.
> >> I think that the reason of protecting lseek with i_mutex is just
> >> touching i_size atomically.
> >> 
> >> So I introduce i_size_read here so i_mutex is no longer needed.
> >> 
> >> Following patch removes i_mutex from generic_file_llseek, and deletes 
> >> generic_file_llseek_nolock totally.
> >> 
> >> Currently there is i_mutex contention not only around lseek, but also 
> >fsync or write.
> >> So,  I think we can mitigate i_mutex contention between fsync lseek and 
> >write by
> >> removing i_mutex.
> >
> >Prior to this change, generic_file_llseek() modified file->f_pos
> >atomically with respect to other i_mutex holders.
> >
> >After this change, it doesn't.
> 
> Hi Andrew.
> 
> Even before this change is applied, file->f_pos access is not atomic.
> sys_read change f_pos value through file_pos_write without i_mutex.

I know.  That's why I specified "with respect to other i_mutex holders".

This patch makes things worse.

At very very minimum the changelog should explain that this patch makes
things worse, and demonstrate why this is justifiable.

> I think seqlock is needed to make f_pos access atomic.

Maybe.  Or atomic64_t, or spinlocking, or i_mutex, or something else.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RESEND][PATCH] lseek: remove i_mutex
@ 2009-05-14  0:26 Hisashi Hifumi
  2009-05-14  5:57 ` Eric Dumazet
  0 siblings, 1 reply; 8+ messages in thread
From: Hisashi Hifumi @ 2009-05-14  0:26 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, linux-fsdevel

Hi, 

Following patch removes i_mutex from generic_file_llseek.
I think the reason of protecting lseek with i_mutex is just
touching i_size (and f_pos) atomically.
So i_mutex is no longer needed here by introducing i_size_read 
to read i_size.
I also made f_pos access atomic here without i_mutex regarding
former i_mutex holders by introducing file_pos_read/file_pos_write
that use seqlock to preserve atomicity.

Following patch removes i_mutex from generic_file_llseek, and deletes 
generic_file_llseek_nolock totally.

Currently there is i_mutex contention not only around lseek, but also fsync or write.
So,  I think we can mitigate i_mutex contention between fsync lseek and write by
removing i_mutex.

Thanks.

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>

diff -Nrup linux-2.6.30-rc5.org/fs/cifs/cifsfs.c linux-2.6.30-rc5.lseek/fs/cifs/cifsfs.c
--- linux-2.6.30-rc5.org/fs/cifs/cifsfs.c	2009-05-11 10:07:14.000000000 +0900
+++ linux-2.6.30-rc5.lseek/fs/cifs/cifsfs.c	2009-05-11 10:10:36.000000000 +0900
@@ -635,7 +635,7 @@ static loff_t cifs_llseek(struct file *f
 		if (retval < 0)
 			return (loff_t)retval;
 	}
-	return generic_file_llseek_unlocked(file, offset, origin);
+	return generic_file_llseek(file, offset, origin);
 }
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
diff -Nrup linux-2.6.30-rc5.org/fs/file_table.c linux-2.6.30-rc5.lseek/fs/file_table.c
--- linux-2.6.30-rc5.org/fs/file_table.c	2009-05-11 10:07:14.000000000 +0900
+++ linux-2.6.30-rc5.lseek/fs/file_table.c	2009-05-11 10:10:36.000000000 +0900
@@ -126,6 +126,7 @@ struct file *get_empty_filp(void)
 
 	INIT_LIST_HEAD(&f->f_u.fu_list);
 	atomic_long_set(&f->f_count, 1);
+	f_pos_ordered_init(f);
 	rwlock_init(&f->f_owner.lock);
 	f->f_cred = get_cred(cred);
 	spin_lock_init(&f->f_lock);
diff -Nrup linux-2.6.30-rc5.org/fs/gfs2/ops_file.c linux-2.6.30-rc5.lseek/fs/gfs2/ops_file.c
--- linux-2.6.30-rc5.org/fs/gfs2/ops_file.c	2009-05-11 10:07:15.000000000 +0900
+++ linux-2.6.30-rc5.lseek/fs/gfs2/ops_file.c	2009-05-11 10:10:36.000000000 +0900
@@ -63,11 +63,11 @@ static loff_t gfs2_llseek(struct file *f
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
 					   &i_gh);
 		if (!error) {
-			error = generic_file_llseek_unlocked(file, offset, origin);
+			error = generic_file_llseek(file, offset, origin);
 			gfs2_glock_dq_uninit(&i_gh);
 		}
 	} else
-		error = generic_file_llseek_unlocked(file, offset, origin);
+		error = generic_file_llseek(file, offset, origin);
 
 	return error;
 }
diff -Nrup linux-2.6.30-rc5.org/fs/ncpfs/file.c linux-2.6.30-rc5.lseek/fs/ncpfs/file.c
--- linux-2.6.30-rc5.org/fs/ncpfs/file.c	2009-03-24 08:12:14.000000000 +0900
+++ linux-2.6.30-rc5.lseek/fs/ncpfs/file.c	2009-05-11 10:10:36.000000000 +0900
@@ -286,7 +286,7 @@ static loff_t ncp_remote_llseek(struct f
 {
 	loff_t ret;
 	lock_kernel();
-	ret = generic_file_llseek_unlocked(file, offset, origin);
+	ret = generic_file_llseek(file, offset, origin);
 	unlock_kernel();
 	return ret;
 }
diff -Nrup linux-2.6.30-rc5.org/fs/nfs/file.c linux-2.6.30-rc5.lseek/fs/nfs/file.c
--- linux-2.6.30-rc5.org/fs/nfs/file.c	2009-05-11 10:07:15.000000000 +0900
+++ linux-2.6.30-rc5.lseek/fs/nfs/file.c	2009-05-11 10:10:36.000000000 +0900
@@ -188,10 +188,10 @@ static loff_t nfs_file_llseek(struct fil
 			return (loff_t)retval;
 
 		spin_lock(&inode->i_lock);
-		loff = generic_file_llseek_unlocked(filp, offset, origin);
+		loff = generic_file_llseek(filp, offset, origin);
 		spin_unlock(&inode->i_lock);
 	} else
-		loff = generic_file_llseek_unlocked(filp, offset, origin);
+		loff = generic_file_llseek(filp, offset, origin);
 	return loff;
 }
 
diff -Nrup linux-2.6.30-rc5.org/fs/read_write.c linux-2.6.30-rc5.lseek/fs/read_write.c
--- linux-2.6.30-rc5.org/fs/read_write.c	2009-05-11 10:07:15.000000000 +0900
+++ linux-2.6.30-rc5.lseek/fs/read_write.c	2009-05-11 10:10:36.000000000 +0900
@@ -31,23 +31,61 @@ const struct file_operations generic_ro_
 
 EXPORT_SYMBOL(generic_ro_fops);
 
+static inline loff_t file_pos_read(struct file *file)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+	loff_t pos;
+	unsigned int seq;
+
+	do {
+		seq = read_seqbegin(&file->f_pos_seqlock);
+		pos = file->f_pos;
+	} while (read_seqretry(&file->f_pos_seqlock, seq));
+	return pos;
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+	loff_t pos;
+
+	preempt_disable();
+	pos = file->f_pos;
+	preempt_enable();
+	return pos;
+#else
+	return file->f_pos;
+#endif
+}
+
+static inline void file_pos_write(struct file *file, loff_t pos)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+	write_seqlock(&file->f_pos_seqlock);
+	file->f_pos = pos;
+	write_sequnlock(&file->f_pos_seqlock);
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+	preempt_disable();
+	file->f_pos = pos;
+	preempt_enable();
+#else
+	file->f_pos = pos;
+#endif
+}
+
 /**
- * generic_file_llseek_unlocked - lockless generic llseek implementation
+ * generic_file_llseek - generic llseek implementation for regular files
  * @file:	file structure to seek on
  * @offset:	file offset to seek to
  * @origin:	type of seek
  *
- * Updates the file offset to the value specified by @offset and @origin.
- * Locking must be provided by the caller.
+ * This is a generic implemenation of ->llseek useable for all normal local
+ * filesystems.  It just updates the file offset to the value specified by
+ * @offset and @origin.
  */
-loff_t
-generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
+loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
 {
 	struct inode *inode = file->f_mapping->host;
 
 	switch (origin) {
 	case SEEK_END:
-		offset += inode->i_size;
+		offset += i_size_read(inode);
 		break;
 	case SEEK_CUR:
 		/*
@@ -57,8 +95,8 @@ generic_file_llseek_unlocked(struct file
 		* write() or lseek() might have altered it
 		*/
 		if (offset == 0)
-			return file->f_pos;
-		offset += file->f_pos;
+			return file_pos_read(file);
+		offset += file_pos_read(file);
 		break;
 	}
 
@@ -66,35 +104,13 @@ generic_file_llseek_unlocked(struct file
 		return -EINVAL;
 
 	/* Special lock needed here? */
-	if (offset != file->f_pos) {
-		file->f_pos = offset;
+	if (offset != file_pos_read(file)) {
+		file_pos_write(file, offset);
 		file->f_version = 0;
 	}
 
 	return offset;
 }
-EXPORT_SYMBOL(generic_file_llseek_unlocked);
-
-/**
- * generic_file_llseek - generic llseek implementation for regular files
- * @file:	file structure to seek on
- * @offset:	file offset to seek to
- * @origin:	type of seek
- *
- * This is a generic implemenation of ->llseek useable for all normal local
- * filesystems.  It just updates the file offset to the value specified by
- * @offset and @origin under i_mutex.
- */
-loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
-{
-	loff_t rval;
-
-	mutex_lock(&file->f_dentry->d_inode->i_mutex);
-	rval = generic_file_llseek_unlocked(file, offset, origin);
-	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
-
-	return rval;
-}
 EXPORT_SYMBOL(generic_file_llseek);
 
 loff_t no_llseek(struct file *file, loff_t offset, int origin)
@@ -359,16 +375,6 @@ ssize_t vfs_write(struct file *file, con
 
 EXPORT_SYMBOL(vfs_write);
 
-static inline loff_t file_pos_read(struct file *file)
-{
-	return file->f_pos;
-}
-
-static inline void file_pos_write(struct file *file, loff_t pos)
-{
-	file->f_pos = pos;
-}
-
 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
 {
 	struct file *file;
diff -Nrup linux-2.6.30-rc5.org/fs/smbfs/file.c linux-2.6.30-rc5.lseek/fs/smbfs/file.c
--- linux-2.6.30-rc5.org/fs/smbfs/file.c	2009-03-24 08:12:14.000000000 +0900
+++ linux-2.6.30-rc5.lseek/fs/smbfs/file.c	2009-05-11 10:10:36.000000000 +0900
@@ -426,7 +426,7 @@ static loff_t smb_remote_llseek(struct f
 {
 	loff_t ret;
 	lock_kernel();
-	ret = generic_file_llseek_unlocked(file, offset, origin);
+	ret = generic_file_llseek(file, offset, origin);
 	unlock_kernel();
 	return ret;
 }
diff -Nrup linux-2.6.30-rc5.org/include/linux/fs.h linux-2.6.30-rc5.lseek/include/linux/fs.h
--- linux-2.6.30-rc5.org/include/linux/fs.h	2009-05-11 10:07:17.000000000 +0900
+++ linux-2.6.30-rc5.lseek/include/linux/fs.h	2009-05-11 10:10:36.000000000 +0900
@@ -896,6 +896,16 @@ static inline int ra_has_index(struct fi
 #define FILE_MNT_WRITE_TAKEN	1
 #define FILE_MNT_WRITE_RELEASED	2
 
+/*
+ * Use sequence lock to get consistent f_pos on 32-bit processors.
+ */
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+#define __NEED_F_POS_ORDERED
+#define f_pos_ordered_init(file) seqlock_init(&file->f_pos_seqlock)
+#else
+#define f_pos_ordered_init(file) do { } while (0)
+#endif
+
 struct file {
 	/*
 	* fu_list becomes invalid after file_free is called and queued via
@@ -914,6 +924,9 @@ struct file {
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
 	loff_t			f_pos;
+#ifdef __NEED_F_POS_ORDERED
+	seqlock_t               f_pos_seqlock;
+#endif
 	struct fown_struct	f_owner;
 	const struct cred	*f_cred;
 	struct file_ra_state	f_ra;
@@ -2215,8 +2228,6 @@ extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
 extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
-extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset,
-			int origin);
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RESEND][PATCH] lseek: remove i_mutex
  2009-05-14  0:26 Hisashi Hifumi
@ 2009-05-14  5:57 ` Eric Dumazet
  0 siblings, 0 replies; 8+ messages in thread
From: Eric Dumazet @ 2009-05-14  5:57 UTC (permalink / raw)
  To: Hisashi Hifumi; +Cc: Andrew Morton, linux-kernel, linux-fsdevel

Hisashi Hifumi a écrit :
> Hi, 
> 
> Following patch removes i_mutex from generic_file_llseek.
> I think the reason of protecting lseek with i_mutex is just
> touching i_size (and f_pos) atomically.
> So i_mutex is no longer needed here by introducing i_size_read 
> to read i_size.
> I also made f_pos access atomic here without i_mutex regarding
> former i_mutex holders by introducing file_pos_read/file_pos_write
> that use seqlock to preserve atomicity.
> 
> Following patch removes i_mutex from generic_file_llseek, and deletes 
> generic_file_llseek_nolock totally.
> 
> Currently there is i_mutex contention not only around lseek, but also fsync or write.
> So,  I think we can mitigate i_mutex contention between fsync lseek and write by
> removing i_mutex.
> 
> Thanks.

One problem with this patch is :

Currently on x86_32, filp kmem_cache uses 128 bytes
per object, because sizeof(struct file) = 128

Adding a seqlock_t makes sizeof(struct file) = 136,
and filp kmem_cache uses 192 or 256 bytes per object, a 50% or 100 % increase,
depending on processor (64 or 128 bytes L1 cache lines)

Maybe just use existing f_lock (spinlock) ?

A seqlock is nice whe you have many readers on SMP.

But on every file syscall (mentioned in your changelog) 
every "seqlock reader" will have to take a reference on f_count
any way, so the cache line is already dirtied : We already lost
seqlock benefit. f_pos is not a "read mostly" field, its quite
the reverse in the real world.


> 
> Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
> 
> diff -Nrup linux-2.6.30-rc5.org/fs/cifs/cifsfs.c linux-2.6.30-rc5.lseek/fs/cifs/cifsfs.c
> --- linux-2.6.30-rc5.org/fs/cifs/cifsfs.c	2009-05-11 10:07:14.000000000 +0900
> +++ linux-2.6.30-rc5.lseek/fs/cifs/cifsfs.c	2009-05-11 10:10:36.000000000 +0900
> @@ -635,7 +635,7 @@ static loff_t cifs_llseek(struct file *f
>  		if (retval < 0)
>  			return (loff_t)retval;
>  	}
> -	return generic_file_llseek_unlocked(file, offset, origin);
> +	return generic_file_llseek(file, offset, origin);
>  }
>  
>  #ifdef CONFIG_CIFS_EXPERIMENTAL
> diff -Nrup linux-2.6.30-rc5.org/fs/file_table.c linux-2.6.30-rc5.lseek/fs/file_table.c
> --- linux-2.6.30-rc5.org/fs/file_table.c	2009-05-11 10:07:14.000000000 +0900
> +++ linux-2.6.30-rc5.lseek/fs/file_table.c	2009-05-11 10:10:36.000000000 +0900
> @@ -126,6 +126,7 @@ struct file *get_empty_filp(void)
>  
>  	INIT_LIST_HEAD(&f->f_u.fu_list);
>  	atomic_long_set(&f->f_count, 1);
> +	f_pos_ordered_init(f);
>  	rwlock_init(&f->f_owner.lock);
>  	f->f_cred = get_cred(cred);
>  	spin_lock_init(&f->f_lock);
> diff -Nrup linux-2.6.30-rc5.org/fs/gfs2/ops_file.c linux-2.6.30-rc5.lseek/fs/gfs2/ops_file.c
> --- linux-2.6.30-rc5.org/fs/gfs2/ops_file.c	2009-05-11 10:07:15.000000000 +0900
> +++ linux-2.6.30-rc5.lseek/fs/gfs2/ops_file.c	2009-05-11 10:10:36.000000000 +0900
> @@ -63,11 +63,11 @@ static loff_t gfs2_llseek(struct file *f
>  		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
>  					   &i_gh);
>  		if (!error) {
> -			error = generic_file_llseek_unlocked(file, offset, origin);
> +			error = generic_file_llseek(file, offset, origin);
>  			gfs2_glock_dq_uninit(&i_gh);
>  		}
>  	} else
> -		error = generic_file_llseek_unlocked(file, offset, origin);
> +		error = generic_file_llseek(file, offset, origin);
>  
>  	return error;
>  }
> diff -Nrup linux-2.6.30-rc5.org/fs/ncpfs/file.c linux-2.6.30-rc5.lseek/fs/ncpfs/file.c
> --- linux-2.6.30-rc5.org/fs/ncpfs/file.c	2009-03-24 08:12:14.000000000 +0900
> +++ linux-2.6.30-rc5.lseek/fs/ncpfs/file.c	2009-05-11 10:10:36.000000000 +0900
> @@ -286,7 +286,7 @@ static loff_t ncp_remote_llseek(struct f
>  {
>  	loff_t ret;
>  	lock_kernel();
> -	ret = generic_file_llseek_unlocked(file, offset, origin);
> +	ret = generic_file_llseek(file, offset, origin);
>  	unlock_kernel();
>  	return ret;
>  }
> diff -Nrup linux-2.6.30-rc5.org/fs/nfs/file.c linux-2.6.30-rc5.lseek/fs/nfs/file.c
> --- linux-2.6.30-rc5.org/fs/nfs/file.c	2009-05-11 10:07:15.000000000 +0900
> +++ linux-2.6.30-rc5.lseek/fs/nfs/file.c	2009-05-11 10:10:36.000000000 +0900
> @@ -188,10 +188,10 @@ static loff_t nfs_file_llseek(struct fil
>  			return (loff_t)retval;
>  
>  		spin_lock(&inode->i_lock);
> -		loff = generic_file_llseek_unlocked(filp, offset, origin);
> +		loff = generic_file_llseek(filp, offset, origin);
>  		spin_unlock(&inode->i_lock);
>  	} else
> -		loff = generic_file_llseek_unlocked(filp, offset, origin);
> +		loff = generic_file_llseek(filp, offset, origin);
>  	return loff;
>  }
>  
> diff -Nrup linux-2.6.30-rc5.org/fs/read_write.c linux-2.6.30-rc5.lseek/fs/read_write.c
> --- linux-2.6.30-rc5.org/fs/read_write.c	2009-05-11 10:07:15.000000000 +0900
> +++ linux-2.6.30-rc5.lseek/fs/read_write.c	2009-05-11 10:10:36.000000000 +0900
> @@ -31,23 +31,61 @@ const struct file_operations generic_ro_
>  
>  EXPORT_SYMBOL(generic_ro_fops);
>  
> +static inline loff_t file_pos_read(struct file *file)
> +{
> +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
> +	loff_t pos;
> +	unsigned int seq;
> +
> +	do {
> +		seq = read_seqbegin(&file->f_pos_seqlock);
> +		pos = file->f_pos;
> +	} while (read_seqretry(&file->f_pos_seqlock, seq));
> +	return pos;
> +#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
> +	loff_t pos;
> +
> +	preempt_disable();
> +	pos = file->f_pos;
> +	preempt_enable();
> +	return pos;
> +#else
> +	return file->f_pos;
> +#endif
> +}
> +
> +static inline void file_pos_write(struct file *file, loff_t pos)
> +{
> +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
> +	write_seqlock(&file->f_pos_seqlock);
> +	file->f_pos = pos;
> +	write_sequnlock(&file->f_pos_seqlock);
> +#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
> +	preempt_disable();
> +	file->f_pos = pos;
> +	preempt_enable();
> +#else
> +	file->f_pos = pos;
> +#endif
> +}
> +
>  /**
> - * generic_file_llseek_unlocked - lockless generic llseek implementation
> + * generic_file_llseek - generic llseek implementation for regular files
>   * @file:	file structure to seek on
>   * @offset:	file offset to seek to
>   * @origin:	type of seek
>   *
> - * Updates the file offset to the value specified by @offset and @origin.
> - * Locking must be provided by the caller.
> + * This is a generic implemenation of ->llseek useable for all normal local
> + * filesystems.  It just updates the file offset to the value specified by
> + * @offset and @origin.
>   */
> -loff_t
> -generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
> +loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
>  {
>  	struct inode *inode = file->f_mapping->host;
>  
>  	switch (origin) {
>  	case SEEK_END:
> -		offset += inode->i_size;
> +		offset += i_size_read(inode);
>  		break;
>  	case SEEK_CUR:
>  		/*
> @@ -57,8 +95,8 @@ generic_file_llseek_unlocked(struct file
>  		* write() or lseek() might have altered it
>  		*/
>  		if (offset == 0)
> -			return file->f_pos;
> -		offset += file->f_pos;
> +			return file_pos_read(file);
> +		offset += file_pos_read(file);
>  		break;
>  	}
>  
> @@ -66,35 +104,13 @@ generic_file_llseek_unlocked(struct file
>  		return -EINVAL;
>  
>  	/* Special lock needed here? */
> -	if (offset != file->f_pos) {
> -		file->f_pos = offset;
> +	if (offset != file_pos_read(file)) {
> +		file_pos_write(file, offset);
>  		file->f_version = 0;
>  	}
>  
>  	return offset;
>  }
> -EXPORT_SYMBOL(generic_file_llseek_unlocked);
> -
> -/**
> - * generic_file_llseek - generic llseek implementation for regular files
> - * @file:	file structure to seek on
> - * @offset:	file offset to seek to
> - * @origin:	type of seek
> - *
> - * This is a generic implemenation of ->llseek useable for all normal local
> - * filesystems.  It just updates the file offset to the value specified by
> - * @offset and @origin under i_mutex.
> - */
> -loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
> -{
> -	loff_t rval;
> -
> -	mutex_lock(&file->f_dentry->d_inode->i_mutex);
> -	rval = generic_file_llseek_unlocked(file, offset, origin);
> -	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
> -
> -	return rval;
> -}
>  EXPORT_SYMBOL(generic_file_llseek);
>  
>  loff_t no_llseek(struct file *file, loff_t offset, int origin)
> @@ -359,16 +375,6 @@ ssize_t vfs_write(struct file *file, con
>  
>  EXPORT_SYMBOL(vfs_write);
>  
> -static inline loff_t file_pos_read(struct file *file)
> -{
> -	return file->f_pos;
> -}
> -
> -static inline void file_pos_write(struct file *file, loff_t pos)
> -{
> -	file->f_pos = pos;
> -}
> -
>  SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
>  {
>  	struct file *file;
> diff -Nrup linux-2.6.30-rc5.org/fs/smbfs/file.c linux-2.6.30-rc5.lseek/fs/smbfs/file.c
> --- linux-2.6.30-rc5.org/fs/smbfs/file.c	2009-03-24 08:12:14.000000000 +0900
> +++ linux-2.6.30-rc5.lseek/fs/smbfs/file.c	2009-05-11 10:10:36.000000000 +0900
> @@ -426,7 +426,7 @@ static loff_t smb_remote_llseek(struct f
>  {
>  	loff_t ret;
>  	lock_kernel();
> -	ret = generic_file_llseek_unlocked(file, offset, origin);
> +	ret = generic_file_llseek(file, offset, origin);
>  	unlock_kernel();
>  	return ret;
>  }
> diff -Nrup linux-2.6.30-rc5.org/include/linux/fs.h linux-2.6.30-rc5.lseek/include/linux/fs.h
> --- linux-2.6.30-rc5.org/include/linux/fs.h	2009-05-11 10:07:17.000000000 +0900
> +++ linux-2.6.30-rc5.lseek/include/linux/fs.h	2009-05-11 10:10:36.000000000 +0900
> @@ -896,6 +896,16 @@ static inline int ra_has_index(struct fi
>  #define FILE_MNT_WRITE_TAKEN	1
>  #define FILE_MNT_WRITE_RELEASED	2
>  
> +/*
> + * Use sequence lock to get consistent f_pos on 32-bit processors.
> + */
> +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
> +#define __NEED_F_POS_ORDERED
> +#define f_pos_ordered_init(file) seqlock_init(&file->f_pos_seqlock)
> +#else
> +#define f_pos_ordered_init(file) do { } while (0)
> +#endif
> +
>  struct file {
>  	/*
>  	* fu_list becomes invalid after file_free is called and queued via
> @@ -914,6 +924,9 @@ struct file {
>  	unsigned int 		f_flags;
>  	fmode_t			f_mode;
>  	loff_t			f_pos;
> +#ifdef __NEED_F_POS_ORDERED
> +	seqlock_t               f_pos_seqlock;
> +#endif
>  	struct fown_struct	f_owner;
>  	const struct cred	*f_cred;
>  	struct file_ra_state	f_ra;
> @@ -2215,8 +2228,6 @@ extern void
>  file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
>  extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
>  extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
> -extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset,
> -			int origin);
>  extern int generic_file_open(struct inode * inode, struct file * filp);
>  extern int nonseekable_open(struct inode * inode, struct file * filp);
>  
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RESEND][PATCH] lseek: remove i_mutex
@ 2009-06-25  4:41 Hisashi Hifumi
  2009-06-25  4:54 ` Al Viro
  0 siblings, 1 reply; 8+ messages in thread
From: Hisashi Hifumi @ 2009-06-25  4:41 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, linux-fsdevel, Al Viro, Christoph Hellwig

Hi, 

Following patch removes i_mutex from generic_file_llseek.
I think the reason of protecting lseek with i_mutex is just
touching i_size (and f_pos) atomically.
So i_mutex is no longer needed here by introducing i_size_read 
to read i_size.
I also made f_pos access atomic here without i_mutex regarding
former i_mutex holders by introducing file_pos_read/file_pos_write
that use seqlock to preserve atomicity.

Following patch removes i_mutex from generic_file_llseek, and deletes 
generic_file_llseek_nolock totally.

Currently there is i_mutex contention not only around lseek, but also fsync or write.
So,  I think we can mitigate i_mutex contention between fsync, lseek and write by
removing i_mutex. For example, PostgreSQL (DBMS software) can obtain benefit 
from this patch. When checkpoint of DBMS started, lseek and write contend with i_mutex 
and throughput drops. But with this patch this performance drop can mitigate because of the
reduction of i_mutex contention. 

I did some test to measure i_mutex contention.
This test do:
	1. make an 128MB file.
	2. fork 100 processes. repeat 1000000 times lseeking and reading randomly on 
		each process to this file.
	3, gauge seconds between start and end of this test.

The result was:

	-2.6.31-rc1
	# time ./lseek_test
	182 sec

	real    3m2.751s
	user    0m13.273s
	sys     46m26.764s

	-2.6.31-rc1-patched
	# time ./lseek_test
	8 sec

	real    0m9.026s
	user    0m12.680s
	sys     2m0.643s 

Hardware environment:
CPU 2.4GHz(Quad Core) *4
Memory 64GB

Following is the test program "lseek_test.c".

#include <stdio.h>
#include <sched.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <signal.h>
#define NUM 100
#define LEN 4096
#define LOOP 32*1024

int num;

void catch_SIGCHLD(int signo)
{
	pid_t child_pid = 0;
	do {
		int child_ret;
		child_pid = waitpid(-1, &child_ret, WNOHANG);
		if (child_pid > 0)
			num++;
	} while (child_pid > 0);
}
main()
{
        int  i, pid;
	char buf[LEN];
	unsigned long offset, filesize;
	time_t t1, t2;
	struct sigaction act;
	memset(buf, 0, LEN);
	memset(&act, 0, sizeof(act));
	act.sa_handler = catch_SIGCHLD;
	act.sa_flags = SA_NOCLDSTOP|SA_RESTART;
	sigemptyset(&act.sa_mask);
	sigaction(SIGCHLD, &act, NULL);	

	filesize = LEN * LOOP;
	int fd = open("targetfile1",O_RDWR|O_CREAT);	

	/* create a 128MB file */
	for(i = 0; i < LOOP; i++)
		write(fd, buf, LEN);
	fsync(fd);
	close(fd);
	
	time(&t1);	
	for (i = 0; i < NUM; i++) {	
		pid = fork();
		if(pid == 0){
		/* child */
			int fd = open("targetfile1", O_RDWR);	
			int j;
			for (j = 0; j < 1000000; j++) {
				offset = (random() % filesize);
				lseek(fd, offset, SEEK_SET);
				read(fd, buf, 10);
			}
			close(fd);
			exit(0);
		}
	}
	while(num < NUM)
		sleep(1);
	time(&t2);	
	printf("%d sec\n",t2-t1);
}



Thanks.

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>

diff -Nrup linux-2.6.31-rc1.org/fs/cifs/cifsfs.c linux-2.6.31-rc1/fs/cifs/cifsfs.c
--- linux-2.6.31-rc1.org/fs/cifs/cifsfs.c	2009-06-25 10:22:44.000000000 +0900
+++ linux-2.6.31-rc1/fs/cifs/cifsfs.c	2009-06-25 10:30:27.000000000 +0900
@@ -641,7 +641,7 @@ static loff_t cifs_llseek(struct file *f
 		if (retval < 0)
 			return (loff_t)retval;
 	}
-	return generic_file_llseek_unlocked(file, offset, origin);
+	return generic_file_llseek(file, offset, origin);
 }
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
diff -Nrup linux-2.6.31-rc1.org/fs/file_table.c linux-2.6.31-rc1/fs/file_table.c
--- linux-2.6.31-rc1.org/fs/file_table.c	2009-06-25 10:22:44.000000000 +0900
+++ linux-2.6.31-rc1/fs/file_table.c	2009-06-25 10:31:12.000000000 +0900
@@ -126,6 +126,7 @@ struct file *get_empty_filp(void)
 
 	INIT_LIST_HEAD(&f->f_u.fu_list);
 	atomic_long_set(&f->f_count, 1);
+	f_pos_ordered_init(f);
 	rwlock_init(&f->f_owner.lock);
 	f->f_cred = get_cred(cred);
 	spin_lock_init(&f->f_lock);
diff -Nrup linux-2.6.31-rc1.org/fs/gfs2/file.c linux-2.6.31-rc1/fs/gfs2/file.c
--- linux-2.6.31-rc1.org/fs/gfs2/file.c	2009-06-25 10:22:44.000000000 +0900
+++ linux-2.6.31-rc1/fs/gfs2/file.c	2009-06-25 10:32:25.000000000 +0900
@@ -62,11 +62,11 @@ static loff_t gfs2_llseek(struct file *f
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
 					   &i_gh);
 		if (!error) {
-			error = generic_file_llseek_unlocked(file, offset, origin);
+			error = generic_file_llseek(file, offset, origin);
 			gfs2_glock_dq_uninit(&i_gh);
 		}
 	} else
-		error = generic_file_llseek_unlocked(file, offset, origin);
+		error = generic_file_llseek(file, offset, origin);
 
 	return error;
 }
diff -Nrup linux-2.6.31-rc1.org/fs/ncpfs/file.c linux-2.6.31-rc1/fs/ncpfs/file.c
--- linux-2.6.31-rc1.org/fs/ncpfs/file.c	2009-06-10 12:05:27.000000000 +0900
+++ linux-2.6.31-rc1/fs/ncpfs/file.c	2009-06-25 10:32:55.000000000 +0900
@@ -286,7 +286,7 @@ static loff_t ncp_remote_llseek(struct f
 {
 	loff_t ret;
 	lock_kernel();
-	ret = generic_file_llseek_unlocked(file, offset, origin);
+	ret = generic_file_llseek(file, offset, origin);
 	unlock_kernel();
 	return ret;
 }
diff -Nrup linux-2.6.31-rc1.org/fs/nfs/file.c linux-2.6.31-rc1/fs/nfs/file.c
--- linux-2.6.31-rc1.org/fs/nfs/file.c	2009-06-25 10:22:44.000000000 +0900
+++ linux-2.6.31-rc1/fs/nfs/file.c	2009-06-25 10:33:39.000000000 +0900
@@ -192,10 +192,10 @@ static loff_t nfs_file_llseek(struct fil
 			return (loff_t)retval;
 
 		spin_lock(&inode->i_lock);
-		loff = generic_file_llseek_unlocked(filp, offset, origin);
+		loff = generic_file_llseek(filp, offset, origin);
 		spin_unlock(&inode->i_lock);
 	} else
-		loff = generic_file_llseek_unlocked(filp, offset, origin);
+		loff = generic_file_llseek(filp, offset, origin);
 	return loff;
 }
 
diff -Nrup linux-2.6.31-rc1.org/fs/read_write.c linux-2.6.31-rc1/fs/read_write.c
--- linux-2.6.31-rc1.org/fs/read_write.c	2009-06-25 10:22:44.000000000 +0900
+++ linux-2.6.31-rc1/fs/read_write.c	2009-06-25 10:48:08.000000000 +0900
@@ -31,23 +31,61 @@ const struct file_operations generic_ro_
 
 EXPORT_SYMBOL(generic_ro_fops);
 
+static inline loff_t file_pos_read(struct file *file)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+	loff_t pos;
+	unsigned int seq;
+
+	do {
+		seq = read_seqbegin(&file->f_pos_seqlock);
+		pos = file->f_pos;
+	} while (read_seqretry(&file->f_pos_seqlock, seq));
+	return pos;
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+	loff_t pos;
+
+	preempt_disable();
+	pos = file->f_pos;
+	preempt_enable();
+	return pos;
+#else
+	return file->f_pos;
+#endif
+}
+
+static inline void file_pos_write(struct file *file, loff_t pos)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+	write_seqlock(&file->f_pos_seqlock);
+	file->f_pos = pos;
+	write_sequnlock(&file->f_pos_seqlock);
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+	preempt_disable();
+	file->f_pos = pos;
+	preempt_enable();
+#else
+	file->f_pos = pos;
+#endif
+}
+
 /**
- * generic_file_llseek_unlocked - lockless generic llseek implementation
+ * generic_file_llseek - generic llseek implementation for regular files
  * @file:	file structure to seek on
  * @offset:	file offset to seek to
  * @origin:	type of seek
  *
- * Updates the file offset to the value specified by @offset and @origin.
- * Locking must be provided by the caller.
+ * This is a generic implemenation of ->llseek useable for all normal local
+ * filesystems.  It just updates the file offset to the value specified by
+ * @offset and @origin.
  */
-loff_t
-generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
+loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
 {
 	struct inode *inode = file->f_mapping->host;
 
 	switch (origin) {
 	case SEEK_END:
-		offset += inode->i_size;
+		offset += i_size_read(inode);
 		break;
 	case SEEK_CUR:
 		/*
@@ -57,8 +95,8 @@ generic_file_llseek_unlocked(struct file
 		 * write() or lseek() might have altered it
 		 */
 		if (offset == 0)
-			return file->f_pos;
-		offset += file->f_pos;
+			return file_pos_read(file);
+		offset += file_pos_read(file);
 		break;
 	}
 
@@ -66,35 +104,13 @@ generic_file_llseek_unlocked(struct file
 		return -EINVAL;
 
 	/* Special lock needed here? */
-	if (offset != file->f_pos) {
-		file->f_pos = offset;
+	if (offset != file_pos_read(file)) {
+		file_pos_write(file, offset);
 		file->f_version = 0;
 	}
 
 	return offset;
 }
-EXPORT_SYMBOL(generic_file_llseek_unlocked);
-
-/**
- * generic_file_llseek - generic llseek implementation for regular files
- * @file:	file structure to seek on
- * @offset:	file offset to seek to
- * @origin:	type of seek
- *
- * This is a generic implemenation of ->llseek useable for all normal local
- * filesystems.  It just updates the file offset to the value specified by
- * @offset and @origin under i_mutex.
- */
-loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
-{
-	loff_t rval;
-
-	mutex_lock(&file->f_dentry->d_inode->i_mutex);
-	rval = generic_file_llseek_unlocked(file, offset, origin);
-	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
-
-	return rval;
-}
 EXPORT_SYMBOL(generic_file_llseek);
 
 loff_t no_llseek(struct file *file, loff_t offset, int origin)
@@ -359,16 +375,6 @@ ssize_t vfs_write(struct file *file, con
 
 EXPORT_SYMBOL(vfs_write);
 
-static inline loff_t file_pos_read(struct file *file)
-{
-	return file->f_pos;
-}
-
-static inline void file_pos_write(struct file *file, loff_t pos)
-{
-	file->f_pos = pos;
-}
-
 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
 {
 	struct file *file;
diff -Nrup linux-2.6.31-rc1.org/fs/smbfs/file.c linux-2.6.31-rc1/fs/smbfs/file.c
--- linux-2.6.31-rc1.org/fs/smbfs/file.c	2009-06-10 12:05:27.000000000 +0900
+++ linux-2.6.31-rc1/fs/smbfs/file.c	2009-06-25 10:48:29.000000000 +0900
@@ -426,7 +426,7 @@ static loff_t smb_remote_llseek(struct f
 {
 	loff_t ret;
 	lock_kernel();
-	ret = generic_file_llseek_unlocked(file, offset, origin);
+	ret = generic_file_llseek(file, offset, origin);
 	unlock_kernel();
 	return ret;
 }
diff -Nrup linux-2.6.31-rc1.org/include/linux/fs.h linux-2.6.31-rc1/include/linux/fs.h
--- linux-2.6.31-rc1.org/include/linux/fs.h	2009-06-25 10:22:44.000000000 +0900
+++ linux-2.6.31-rc1/include/linux/fs.h	2009-06-25 10:51:08.000000000 +0900
@@ -902,6 +902,16 @@ static inline int ra_has_index(struct fi
 #define FILE_MNT_WRITE_TAKEN	1
 #define FILE_MNT_WRITE_RELEASED	2
 
+/*
+ * Use sequence lock to get consistent f_pos on 32-bit processors.
+ */
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+#define __NEED_F_POS_ORDERED
+#define f_pos_ordered_init(file) seqlock_init(&file->f_pos_seqlock)
+#else
+#define f_pos_ordered_init(file) do { } while (0)
+#endif
+
 struct file {
 	/*
 	 * fu_list becomes invalid after file_free is called and queued via
@@ -920,6 +930,9 @@ struct file {
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
 	loff_t			f_pos;
+#ifdef __NEED_F_POS_ORDERED
+	seqlock_t               f_pos_seqlock;
+#endif
 	struct fown_struct	f_owner;
 	const struct cred	*f_cred;
 	struct file_ra_state	f_ra;
@@ -2219,8 +2232,6 @@ extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
 extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
-extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset,
-			int origin);
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RESEND][PATCH] lseek: remove i_mutex
  2009-06-25  4:41 [RESEND][PATCH] lseek: remove i_mutex Hisashi Hifumi
@ 2009-06-25  4:54 ` Al Viro
  0 siblings, 0 replies; 8+ messages in thread
From: Al Viro @ 2009-06-25  4:54 UTC (permalink / raw)
  To: Hisashi Hifumi
  Cc: Andrew Morton, linux-kernel, linux-fsdevel, Christoph Hellwig

On Thu, Jun 25, 2009 at 01:41:10PM +0900, Hisashi Hifumi wrote:
> Hi, 
> 
> Following patch removes i_mutex from generic_file_llseek.
> I think the reason of protecting lseek with i_mutex is just
> touching i_size (and f_pos) atomically.

NAK.  There's also a lot of fun with directories - readdir() vs. llseek().

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2009-06-25  4:54 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-06-25  4:41 [RESEND][PATCH] lseek: remove i_mutex Hisashi Hifumi
2009-06-25  4:54 ` Al Viro
  -- strict thread matches above, loose matches on Subject: below --
2009-05-14  0:26 Hisashi Hifumi
2009-05-14  5:57 ` Eric Dumazet
2009-02-03  8:04 [RESEND] [PATCH] " Hisashi Hifumi
2009-02-05 20:05 ` Andrew Morton
2009-02-06  0:20   ` Hisashi Hifumi
2009-02-06  0:33     ` Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).