* [PATCH] VFS: make file->f_pos access atomic on 32bit arch
@ 2008-09-16 10:35 Hisashi Hifumi
0 siblings, 0 replies; 11+ messages in thread
From: Hisashi Hifumi @ 2008-09-16 10:35 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, linux-fsdevel
Hi.
Currently reading or writing file->f_pos is not atomic on 32bit environment,
so two or more simultaneous access can corrupt file->f_pos value.
There are some past discussions about this issue, but this is not fixed yet.
http://marc.info/?l=linux-kernel&m=120764199819899&w=2
http://marc.info/?l=linux-kernel&m=114490379102476&w=2
Protecting file->f_pos with a lock adds some overhead but I think we should
fix this.
I used seqlock to serialize the access to file->f_pos. This approach is similar
to inode->i_size synchronization.
Comments?
Thanks.
Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
diff -Nrup linux-2.6.27-rc6.org/fs/block_dev.c linux-2.6.27-rc6.fpos/fs/block_dev.c
--- linux-2.6.27-rc6.org/fs/block_dev.c 2008-09-10 10:30:08.000000000 +0900
+++ linux-2.6.27-rc6.fpos/fs/block_dev.c 2008-09-16 18:45:17.000000000 +0900
@@ -225,13 +225,12 @@ static loff_t block_llseek(struct file *
offset += size;
break;
case 1:
- offset += file->f_pos;
+ offset += file_pos_read(file);
}
retval = -EINVAL;
if (offset >= 0 && offset <= size) {
- if (offset != file->f_pos) {
- file->f_pos = offset;
- }
+ if (offset != file_pos_read(file))
+ file_pos_write(file, offset);
retval = offset;
}
mutex_unlock(&bd_inode->i_mutex);
diff -Nrup linux-2.6.27-rc6.org/fs/file_table.c linux-2.6.27-rc6.fpos/fs/file_table.c
--- linux-2.6.27-rc6.org/fs/file_table.c 2008-09-10 10:30:08.000000000 +0900
+++ linux-2.6.27-rc6.fpos/fs/file_table.c 2008-09-16 14:18:29.000000000 +0900
@@ -121,6 +121,7 @@ struct file *get_empty_filp(void)
tsk = current;
INIT_LIST_HEAD(&f->f_u.fu_list);
atomic_long_set(&f->f_count, 1);
+ f_pos_ordered_init(f);
rwlock_init(&f->f_owner.lock);
f->f_uid = tsk->fsuid;
f->f_gid = tsk->fsgid;
diff -Nrup linux-2.6.27-rc6.org/fs/locks.c linux-2.6.27-rc6.fpos/fs/locks.c
--- linux-2.6.27-rc6.org/fs/locks.c 2008-09-10 10:30:08.000000000 +0900
+++ linux-2.6.27-rc6.fpos/fs/locks.c 2008-09-16 18:35:14.000000000 +0900
@@ -317,7 +317,7 @@ static int flock_to_posix_lock(struct fi
start = 0;
break;
case SEEK_CUR:
- start = filp->f_pos;
+ start = file_pos_read(filp);
break;
case SEEK_END:
start = i_size_read(filp->f_path.dentry->d_inode);
@@ -367,7 +367,7 @@ static int flock64_to_posix_lock(struct
start = 0;
break;
case SEEK_CUR:
- start = filp->f_pos;
+ start = file_pos_read(filp);
break;
case SEEK_END:
start = i_size_read(filp->f_path.dentry->d_inode);
diff -Nrup linux-2.6.27-rc6.org/fs/read_write.c linux-2.6.27-rc6.fpos/fs/read_write.c
--- linux-2.6.27-rc6.org/fs/read_write.c 2008-09-10 10:30:08.000000000 +0900
+++ linux-2.6.27-rc6.fpos/fs/read_write.c 2008-09-16 18:40:26.000000000 +0900
@@ -42,13 +42,13 @@ generic_file_llseek_unlocked(struct file
offset += inode->i_size;
break;
case SEEK_CUR:
- offset += file->f_pos;
+ offset += file_pos_read(file);
}
retval = -EINVAL;
if (offset>=0 && offset<=inode->i_sb->s_maxbytes) {
/* Special lock needed here? */
- if (offset != file->f_pos) {
- file->f_pos = offset;
+ if (offset != file_pos_read(file)) {
+ file_pos_write(file, offset);
file->f_version = 0;
}
retval = offset;
@@ -83,12 +83,12 @@ loff_t default_llseek(struct file *file,
offset += i_size_read(file->f_path.dentry->d_inode);
break;
case SEEK_CUR:
- offset += file->f_pos;
+ offset += file_pos_read(file);
}
retval = -EINVAL;
if (offset >= 0) {
- if (offset != file->f_pos) {
- file->f_pos = offset;
+ if (offset != file_pos_read(file)) {
+ file_pos_write(file, offset);
file->f_version = 0;
}
retval = offset;
@@ -324,16 +324,6 @@ ssize_t vfs_write(struct file *file, con
EXPORT_SYMBOL(vfs_write);
-static inline loff_t file_pos_read(struct file *file)
-{
- return file->f_pos;
-}
-
-static inline void file_pos_write(struct file *file, loff_t pos)
-{
- file->f_pos = pos;
-}
-
asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count)
{
struct file *file;
diff -Nrup linux-2.6.27-rc6.org/include/linux/fs.h linux-2.6.27-rc6.fpos/include/linux/fs.h
--- linux-2.6.27-rc6.org/include/linux/fs.h 2008-09-10 10:30:10.000000000 +0900
+++ linux-2.6.27-rc6.fpos/include/linux/fs.h 2008-09-16 18:34:14.000000000 +0900
@@ -805,6 +805,16 @@ static inline int ra_has_index(struct fi
#define FILE_MNT_WRITE_TAKEN 1
#define FILE_MNT_WRITE_RELEASED 2
+/*
+ * Use sequence lock to get consistent f_pos on 32-bit processors.
+ */
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+#define __NEED_F_POS_ORDERED
+#define f_pos_ordered_init(file) seqlock_init(&file->f_pos_seqlock)
+#else
+#define f_pos_ordered_init(file) do { } while (0)
+#endif
+
struct file {
/*
* fu_list becomes invalid after file_free is called and queued via
@@ -822,6 +832,9 @@ struct file {
unsigned int f_flags;
mode_t f_mode;
loff_t f_pos;
+#ifdef __NEED_F_POS_ORDERED
+ seqlock_t f_pos_seqlock;
+#endif
struct fown_struct f_owner;
unsigned int f_uid, f_gid;
struct file_ra_state f_ra;
@@ -850,6 +863,47 @@ extern spinlock_t files_lock;
#define get_file(x) atomic_long_inc(&(x)->f_count)
#define file_count(x) atomic_long_read(&(x)->f_count)
+/*
+ * file_pos_read/write is atomic by using sequence lock on 32bit arch.
+ */
+static inline loff_t file_pos_read(struct file *file)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ loff_t pos;
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&file->f_pos_seqlock);
+ pos = file->f_pos;
+ } while (read_seqretry(&file->f_pos_seqlock, seq));
+ return pos;
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+ loff_t pos;
+
+ preempt_disable();
+ pos = file->f_pos;
+ preempt_enable();
+ return pos;
+#else
+ return file->f_pos;
+#endif
+}
+
+static inline void file_pos_write(struct file *file, loff_t pos)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ write_seqlock(&file->f_pos_seqlock);
+ file->f_pos = pos;
+ write_sequnlock(&file->f_pos_seqlock);
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+ preempt_disable();
+ file->f_pos = pos;
+ preempt_enable();
+#else
+ file->f_pos = pos;
+#endif
+}
+
#ifdef CONFIG_DEBUG_WRITECOUNT
static inline void file_take_write(struct file *f)
{
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
@ 2008-09-16 11:11 Michael Trimarchi
2008-09-16 12:54 ` Nick Piggin
0 siblings, 1 reply; 11+ messages in thread
From: Michael Trimarchi @ 2008-09-16 11:11 UTC (permalink / raw)
To: Hisashi Hifumi, akpm; +Cc: linux-kernel, linux-fsdevel
Hi,
> + if (offset != file_pos_read(file))
> + file_pos_write(file, offset);
> retval = offset;
> }
Is not more efficient here a compare xchg operation?
Regards Michael
__________________________________________________
Do You Yahoo!?
Poco spazio e tanto spam? Yahoo! Mail ti protegge dallo spam e ti da tanto spazio gratuito per i tuoi file e i messaggi
http://mail.yahoo.it
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
2008-09-16 11:11 Michael Trimarchi
@ 2008-09-16 12:54 ` Nick Piggin
0 siblings, 0 replies; 11+ messages in thread
From: Nick Piggin @ 2008-09-16 12:54 UTC (permalink / raw)
To: Michael Trimarchi; +Cc: Hisashi Hifumi, akpm, linux-kernel, linux-fsdevel
On Tuesday 16 September 2008 21:11, Michael Trimarchi wrote:
> Hi,
>
> > + if (offset != file_pos_read(file))
> > + file_pos_write(file, offset);
> > retval = offset;
> > }
>
> Is not more efficient here a compare xchg operation?
The compiler should do it if it was.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
@ 2008-09-16 13:57 Michael Trimarchi
2008-09-16 14:11 ` Nick Piggin
0 siblings, 1 reply; 11+ messages in thread
From: Michael Trimarchi @ 2008-09-16 13:57 UTC (permalink / raw)
To: Nick Piggin; +Cc: Hisashi Hifumi, akpm, linux-kernel, linux-fsdevel
Hi,
----- Messaggio originale -----
> Da: Nick Piggin <nickpiggin@yahoo.com.au>
> A: Michael Trimarchi <trimarchimichael@yahoo.it>
> Cc: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org
> Inviato: Martedì 16 settembre 2008, 14:54:21
> Oggetto: Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
>
> On Tuesday 16 September 2008 21:11, Michael Trimarchi wrote:
> > Hi,
> >
> > > + if (offset != file_pos_read(file))
> > > + file_pos_write(file, offset);
> > > retval = offset;
> > > }
> >
> > Is not more efficient here a compare xchg operation?
>
> The compiler should do it if it was.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
i was thinking about introducing a file_pos_update() not implemented using
file_pos_read()/file_pos_write() and taking the seq_lock or
disabling preemption only one time
__________________________________________________
Do You Yahoo!?
Poco spazio e tanto spam? Yahoo! Mail ti protegge dallo spam e ti da tanto spazio gratuito per i tuoi file e i messaggi
http://mail.yahoo.it
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
2008-09-16 13:57 Michael Trimarchi
@ 2008-09-16 14:11 ` Nick Piggin
0 siblings, 0 replies; 11+ messages in thread
From: Nick Piggin @ 2008-09-16 14:11 UTC (permalink / raw)
To: Michael Trimarchi; +Cc: Hisashi Hifumi, akpm, linux-kernel, linux-fsdevel
On Tuesday 16 September 2008 23:57, Michael Trimarchi wrote:
> Hi,
>
>
>
> ----- Messaggio originale -----
>
> > Da: Nick Piggin <nickpiggin@yahoo.com.au>
> > A: Michael Trimarchi <trimarchimichael@yahoo.it>
> > Cc: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>;
> > akpm@linux-foundation.org; linux-kernel@vger.kernel.org;
> > linux-fsdevel@vger.kernel.org Inviato: Martedì 16 settembre 2008,
> > 14:54:21
> > Oggetto: Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
> >
> > On Tuesday 16 September 2008 21:11, Michael Trimarchi wrote:
> > > Hi,
> > >
> > > > + if (offset != file_pos_read(file))
> > > > + file_pos_write(file, offset);
> > > > retval = offset;
> > > > }
> > >
> > > Is not more efficient here a compare xchg operation?
> >
> > The compiler should do it if it was.
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel"
> > in the body of a message to majordomo@vger.kernel.org
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at http://www.tux.org/lkml/
>
> i was thinking about introducing a file_pos_update() not implemented using
> file_pos_read()/file_pos_write() and taking the seq_lock or
> disabling preemption only one time
AFAIK, the only way to do an atomic 64 bit store on 32-bit x86 is
"cmpxchg8b" (with lock prefix on SMP). That would be far slower
I'm sure.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
@ 2008-09-16 14:57 Michael Trimarchi
2008-09-17 1:24 ` Nick Piggin
2008-09-18 7:08 ` Hisashi Hifumi
0 siblings, 2 replies; 11+ messages in thread
From: Michael Trimarchi @ 2008-09-16 14:57 UTC (permalink / raw)
To: Nick Piggin; +Cc: Hisashi Hifumi, akpm, linux-kernel, linux-fsdevel
Hi,
----- Messaggio originale -----
> Da: Nick Piggin <nickpiggin@yahoo.com.au>
> A: Michael Trimarchi <trimarchimichael@yahoo.it>
>
Cc: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>;
akpm@linux-foundation.org; linux-kernel@vger.kernel.org;
linux-fsdevel@vger.kernel.org
> Inviato: Martedì 16 settembre 2008, 16:11:21
> Oggetto: Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
>
...
> > i was thinking about introducing a file_pos_update() not implemented using
> > file_pos_read()/file_pos_write() and taking the seq_lock or
> > disabling preemption only one time
>
> AFAIK, the only way to do an atomic 64 bit store on 32-bit x86 is
> "cmpxchg8b" (with lock prefix on SMP). That would be far slower
> I'm sure.
Maybe it is clear with an example:
+/*
+ * file_pos_read/write is atomic by using sequence lock on 32bit arch.
+ */
+static inline loff_t file_pos_read(struct file *file)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ loff_t pos;
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&file->f_pos_seqlock);
+ pos = file->f_pos;
+ } while (read_seqretry(&file->f_pos_seqlock, seq));
+ return pos;
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+ loff_t pos;
+
+ preempt_disable();
+ pos = file->f_pos;
+ preempt_enable();
+ return pos;
+#else
+ return file->f_pos;
+#endif
+}
+
+static inline void file_pos_write(struct file *file, loff_t pos)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ write_seqlock(&file->f_pos_seqlock);
+ file->f_pos = pos;
+ write_sequnlock(&file->f_pos_seqlock);
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+ preempt_disable();
+ file->f_pos = pos;
+ preempt_enable();
+#else
+ file->f_pos = pos;
+#endif
+}
+
if (offset != file_pos_read(file))
file_pos_write(file, offset);
Compile with:
BITS_PER_LONG=32
CONFIG_PREEMPT
The code does:
file_pos_read(...)
preempt_disable
....
preempt_enable
file_pos_write(...)
preempt_disable
....
preempt_enable
with the file_pos_update() the code does:
preempt_disable
...
...
preempt_enable
Regards Michael
__________________________________________________
Do You Yahoo!?
Poco spazio e tanto spam? Yahoo! Mail ti protegge dallo spam e ti da tanto spazio gratuito per i tuoi file e i messaggi
http://mail.yahoo.it
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
[not found] <bcGib-3Fp-7@gated-at.bofh.it>
@ 2008-09-16 15:32 ` Bodo Eggert
0 siblings, 0 replies; 11+ messages in thread
From: Bodo Eggert @ 2008-09-16 15:32 UTC (permalink / raw)
To: Hisashi Hifumi, akpm, linux-kernel, linux-fsdevel
Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp> wrote:
> Protecting file->f_pos with a lock adds some overhead but I think we should
> fix this.
> I used seqlock to serialize the access to file->f_pos. This approach is
> similar to inode->i_size synchronization.
>
> Comments?
> + if (offset != file_pos_read(file))
> + file_pos_write(file, offset);
Maybe file_pos_write should do this check?
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
@ 2008-09-16 16:01 Michael Trimarchi
0 siblings, 0 replies; 11+ messages in thread
From: Michael Trimarchi @ 2008-09-16 16:01 UTC (permalink / raw)
To: 7eggert, Hisashi Hifumi, akpm, linux-kernel, linux-fsdevel
hi,
----- Messaggio originale -----
> Da: Bodo Eggert <7eggert@gmx.de>
> A: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>; akpm@linux-foundation.org; linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org
> Inviato: Martedì 16 settembre 2008, 17:32:08
> Oggetto: Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
>
> Hisashi Hifumi wrote:
>
> > Protecting file->f_pos with a lock adds some overhead but I think we should
> > fix this.
> > I used seqlock to serialize the access to file->f_pos. This approach is
> > similar to inode->i_size synchronization.
> >
> > Comments?
>
> > + if (offset != file_pos_read(file))
> > + file_pos_write(file, offset);
>
> Maybe file_pos_write should do this check?
>
I agree :)
Regards Michael
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
__________________________________________________
Do You Yahoo!?
Poco spazio e tanto spam? Yahoo! Mail ti protegge dallo spam e ti da tanto spazio gratuito per i tuoi file e i messaggi
http://mail.yahoo.it
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
2008-09-16 14:57 [PATCH] VFS: make file->f_pos access atomic on 32bit arch Michael Trimarchi
@ 2008-09-17 1:24 ` Nick Piggin
2008-09-18 9:59 ` Hisashi Hifumi
2008-09-18 7:08 ` Hisashi Hifumi
1 sibling, 1 reply; 11+ messages in thread
From: Nick Piggin @ 2008-09-17 1:24 UTC (permalink / raw)
To: Michael Trimarchi; +Cc: Hisashi Hifumi, akpm, linux-kernel, linux-fsdevel
On Wednesday 17 September 2008 00:57, Michael Trimarchi wrote:
> Hi,
>
> ----- Messaggio originale -----
>
> > Da: Nick Piggin <nickpiggin@yahoo.com.au>
> > A: Michael Trimarchi <trimarchimichael@yahoo.it>
>
> Cc: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>;
> akpm@linux-foundation.org; linux-kernel@vger.kernel.org;
> linux-fsdevel@vger.kernel.org
>
> > Inviato: Martedì 16 settembre 2008, 16:11:21
> > Oggetto: Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
>
> ...
>
> > > i was thinking about introducing a file_pos_update() not implemented
> > > using file_pos_read()/file_pos_write() and taking the seq_lock or
> > > disabling preemption only one time
> >
> > AFAIK, the only way to do an atomic 64 bit store on 32-bit x86 is
> > "cmpxchg8b" (with lock prefix on SMP). That would be far slower
> > I'm sure.
>
> Maybe it is clear with an example:
I understand and agree.
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
2008-09-16 14:57 [PATCH] VFS: make file->f_pos access atomic on 32bit arch Michael Trimarchi
2008-09-17 1:24 ` Nick Piggin
@ 2008-09-18 7:08 ` Hisashi Hifumi
1 sibling, 0 replies; 11+ messages in thread
From: Hisashi Hifumi @ 2008-09-18 7:08 UTC (permalink / raw)
To: Michael Trimarchi, Nick Piggin, akpm; +Cc: linux-kernel, linux-fsdevel
>
>Maybe it is clear with an example:
>
>+/*
>+ * file_pos_read/write is atomic by using sequence lock on 32bit arch.
>+ */
>+static inline loff_t file_pos_read(struct file *file)
>+{
>+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
>+ loff_t pos;
>+ unsigned int seq;
>+
>+ do {
>+ seq = read_seqbegin(&file->f_pos_seqlock);
>+ pos = file->f_pos;
>+ } while (read_seqretry(&file->f_pos_seqlock, seq));
>+ return pos;
>+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
>+ loff_t pos;
>+
>+ preempt_disable();
>+ pos = file->f_pos;
>+ preempt_enable();
>+ return pos;
>+#else
>+ return file->f_pos;
>+#endif
>+}
>+
>+static inline void file_pos_write(struct file *file, loff_t pos)
>+{
>+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
>+ write_seqlock(&file->f_pos_seqlock);
>+ file->f_pos = pos;
>+ write_sequnlock(&file->f_pos_seqlock);
>+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
>+ preempt_disable();
>+ file->f_pos = pos;
>+ preempt_enable();
>+#else
>+ file->f_pos = pos;
>+#endif
>+}
>+
>if (offset != file_pos_read(file))
> file_pos_write(file, offset);
>
>Compile with:
>BITS_PER_LONG=32
>CONFIG_PREEMPT
>
>The code does:
>
>file_pos_read(...)
>preempt_disable
>....
>preempt_enable
>
>file_pos_write(...)
>preempt_disable
>....
>preempt_enable
>
>with the file_pos_update() the code does:
>
>preempt_disable
>...
>...
>preempt_enable
>
>Regards Michael
>
I think file_pos_update() with BITS_PER_LONG=32 && CONFIG_PREEMPT is easy,
this is like this.
static inline void file_pos_update(struct file *file, loff_t pos)
{
preempt_disable();
if (pos != file->f_pos)
file->f_pos = pos;
preempt_enable();
}
But I think BITS_PER_LONG=32 && CONFIG_SMP version has a problem, this is like this.
static inline void file_pos_update(struct file *file, loff_t pos)
{
write_seqlock(&file->f_pos_seqlock);
if (pos != file->f_pos)
file->f_pos = pos;
write_sequnlock(&file->f_pos_seqlock);
}
file_pos_update acquires seqlock only once, but write_seqlock is called whether file->f_pos
is overwritten or not.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] VFS: make file->f_pos access atomic on 32bit arch
2008-09-17 1:24 ` Nick Piggin
@ 2008-09-18 9:59 ` Hisashi Hifumi
0 siblings, 0 replies; 11+ messages in thread
From: Hisashi Hifumi @ 2008-09-18 9:59 UTC (permalink / raw)
To: Nick Piggin, Michael Trimarchi, akpm; +Cc: linux-kernel, linux-fsdevel
At 10:24 08/09/17, Nick Piggin wrote:
>> > > i was thinking about introducing a file_pos_update() not implemented
>> > > using file_pos_read()/file_pos_write() and taking the seq_lock or
>> > > disabling preemption only one time
>> >
>> > AFAIK, the only way to do an atomic 64 bit store on 32-bit x86 is
>> > "cmpxchg8b" (with lock prefix on SMP). That would be far slower
>> > I'm sure.
>>
>> Maybe it is clear with an example:
>
>I understand and agree.
I modified my patch by introducing file_pos_update().
Comments?
Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
diff -Nrup linux-2.6.27-rc6.org/fs/block_dev.c linux-2.6.27-rc6.fpos/fs/block_dev.c
--- linux-2.6.27-rc6.org/fs/block_dev.c 2008-09-10 10:30:08.000000000 +0900
+++ linux-2.6.27-rc6.fpos/fs/block_dev.c 2008-09-18 18:24:42.000000000 +0900
@@ -225,13 +225,11 @@ static loff_t block_llseek(struct file *
offset += size;
break;
case 1:
- offset += file->f_pos;
+ offset += file_pos_read(file);
}
retval = -EINVAL;
if (offset >= 0 && offset <= size) {
- if (offset != file->f_pos) {
- file->f_pos = offset;
- }
+ file_pos_update(file, offset);
retval = offset;
}
mutex_unlock(&bd_inode->i_mutex);
diff -Nrup linux-2.6.27-rc6.org/fs/file_table.c linux-2.6.27-rc6.fpos/fs/file_table.c
--- linux-2.6.27-rc6.org/fs/file_table.c 2008-09-10 10:30:08.000000000 +0900
+++ linux-2.6.27-rc6.fpos/fs/file_table.c 2008-09-16 14:18:29.000000000 +0900
@@ -121,6 +121,7 @@ struct file *get_empty_filp(void)
tsk = current;
INIT_LIST_HEAD(&f->f_u.fu_list);
atomic_long_set(&f->f_count, 1);
+ f_pos_ordered_init(f);
rwlock_init(&f->f_owner.lock);
f->f_uid = tsk->fsuid;
f->f_gid = tsk->fsgid;
diff -Nrup linux-2.6.27-rc6.org/fs/locks.c linux-2.6.27-rc6.fpos/fs/locks.c
--- linux-2.6.27-rc6.org/fs/locks.c 2008-09-10 10:30:08.000000000 +0900
+++ linux-2.6.27-rc6.fpos/fs/locks.c 2008-09-16 18:35:14.000000000 +0900
@@ -317,7 +317,7 @@ static int flock_to_posix_lock(struct fi
start = 0;
break;
case SEEK_CUR:
- start = filp->f_pos;
+ start = file_pos_read(filp);
break;
case SEEK_END:
start = i_size_read(filp->f_path.dentry->d_inode);
@@ -367,7 +367,7 @@ static int flock64_to_posix_lock(struct
start = 0;
break;
case SEEK_CUR:
- start = filp->f_pos;
+ start = file_pos_read(filp);
break;
case SEEK_END:
start = i_size_read(filp->f_path.dentry->d_inode);
diff -Nrup linux-2.6.27-rc6.org/fs/read_write.c linux-2.6.27-rc6.fpos/fs/read_write.c
--- linux-2.6.27-rc6.org/fs/read_write.c 2008-09-10 10:30:08.000000000 +0900
+++ linux-2.6.27-rc6.fpos/fs/read_write.c 2008-09-18 18:27:53.000000000 +0900
@@ -42,15 +42,13 @@ generic_file_llseek_unlocked(struct file
offset += inode->i_size;
break;
case SEEK_CUR:
- offset += file->f_pos;
+ offset += file_pos_read(file);
}
retval = -EINVAL;
if (offset>=0 && offset<=inode->i_sb->s_maxbytes) {
/* Special lock needed here? */
- if (offset != file->f_pos) {
- file->f_pos = offset;
+ if (file_pos_update(file, offset))
file->f_version = 0;
- }
retval = offset;
}
return retval;
@@ -83,14 +81,12 @@ loff_t default_llseek(struct file *file,
offset += i_size_read(file->f_path.dentry->d_inode);
break;
case SEEK_CUR:
- offset += file->f_pos;
+ offset += file_pos_read(file);
}
retval = -EINVAL;
if (offset >= 0) {
- if (offset != file->f_pos) {
- file->f_pos = offset;
+ if (file_pos_update(file, offset))
file->f_version = 0;
- }
retval = offset;
}
unlock_kernel();
@@ -324,16 +320,6 @@ ssize_t vfs_write(struct file *file, con
EXPORT_SYMBOL(vfs_write);
-static inline loff_t file_pos_read(struct file *file)
-{
- return file->f_pos;
-}
-
-static inline void file_pos_write(struct file *file, loff_t pos)
-{
- file->f_pos = pos;
-}
-
asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count)
{
struct file *file;
diff -Nrup linux-2.6.27-rc6.org/include/linux/fs.h linux-2.6.27-rc6.fpos/include/linux/fs.h
--- linux-2.6.27-rc6.org/include/linux/fs.h 2008-09-10 10:30:10.000000000 +0900
+++ linux-2.6.27-rc6.fpos/include/linux/fs.h 2008-09-18 18:29:09.000000000 +0900
@@ -805,6 +805,16 @@ static inline int ra_has_index(struct fi
#define FILE_MNT_WRITE_TAKEN 1
#define FILE_MNT_WRITE_RELEASED 2
+/*
+ * Use sequence lock to get consistent f_pos on 32-bit processors.
+ */
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+#define __NEED_F_POS_ORDERED
+#define f_pos_ordered_init(file) seqlock_init(&file->f_pos_seqlock)
+#else
+#define f_pos_ordered_init(file) do { } while (0)
+#endif
+
struct file {
/*
* fu_list becomes invalid after file_free is called and queued via
@@ -822,6 +832,9 @@ struct file {
unsigned int f_flags;
mode_t f_mode;
loff_t f_pos;
+#ifdef __NEED_F_POS_ORDERED
+ seqlock_t f_pos_seqlock;
+#endif
struct fown_struct f_owner;
unsigned int f_uid, f_gid;
struct file_ra_state f_ra;
@@ -850,6 +863,73 @@ extern spinlock_t files_lock;
#define get_file(x) atomic_long_inc(&(x)->f_count)
#define file_count(x) atomic_long_read(&(x)->f_count)
+/*
+ * file_pos_read/write is atomic by using sequence lock on 32bit arch.
+ */
+static inline loff_t file_pos_read(struct file *file)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ loff_t pos;
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&file->f_pos_seqlock);
+ pos = file->f_pos;
+ } while (read_seqretry(&file->f_pos_seqlock, seq));
+ return pos;
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+ loff_t pos;
+
+ preempt_disable();
+ pos = file->f_pos;
+ preempt_enable();
+ return pos;
+#else
+ return file->f_pos;
+#endif
+}
+
+static inline void file_pos_write(struct file *file, loff_t pos)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ write_seqlock(&file->f_pos_seqlock);
+ file->f_pos = pos;
+ write_sequnlock(&file->f_pos_seqlock);
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+ preempt_disable();
+ file->f_pos = pos;
+ preempt_enable();
+#else
+ file->f_pos = pos;
+#endif
+}
+
+static inline int file_pos_update(struct file *file, loff_t offset)
+{
+ int ret = 0;
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ write_seqlock(&file->f_pos_seqlock);
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ ret = 1;
+ }
+ write_sequnlock(&file->f_pos_seqlock);
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+ preempt_disable();
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ ret = 1;
+ }
+ preempt_enable();
+#else
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ ret = 1;
+ }
+#endif
+ return ret;
+}
+
#ifdef CONFIG_DEBUG_WRITECOUNT
static inline void file_take_write(struct file *f)
{
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2008-09-18 10:01 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-09-16 14:57 [PATCH] VFS: make file->f_pos access atomic on 32bit arch Michael Trimarchi
2008-09-17 1:24 ` Nick Piggin
2008-09-18 9:59 ` Hisashi Hifumi
2008-09-18 7:08 ` Hisashi Hifumi
-- strict thread matches above, loose matches on Subject: below --
2008-09-16 16:01 Michael Trimarchi
[not found] <bcGib-3Fp-7@gated-at.bofh.it>
2008-09-16 15:32 ` Bodo Eggert
2008-09-16 13:57 Michael Trimarchi
2008-09-16 14:11 ` Nick Piggin
2008-09-16 11:11 Michael Trimarchi
2008-09-16 12:54 ` Nick Piggin
2008-09-16 10:35 Hisashi Hifumi
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).