From: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
To: akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: [RESEND] [PATCH] VFS: make file->f_pos access atomic on 32bit arch
Date: Tue, 07 Oct 2008 14:07:23 +0900 [thread overview]
Message-ID: <6.0.0.20.2.20081007140438.0580f110@172.19.0.2> (raw)
Hi Andrew.
Currently reading or writing file->f_pos is not atomic on 32bit environment,
so two or more simultaneous access can corrupt file->f_pos value.
There are some past discussions about this issue, but this is not fixed yet.
http://marc.info/?l=linux-kernel&m=120764199819899&w=2
http://marc.info/?l=linux-kernel&m=114490379102476&w=2
Protecting file->f_pos with a lock adds some overhead but I think we should
fix this.
I used seqlock to serialize the access to file->f_pos. This approach is similar
to inode->i_size synchronization.
Thanks.
Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
diff -Nrup linux-2.6.27-rc9.org/fs/block_dev.c linux-2.6.27-rc9/fs/block_dev.c
--- linux-2.6.27-rc9.org/fs/block_dev.c 2008-10-07 11:44:42.000000000 +0900
+++ linux-2.6.27-rc9/fs/block_dev.c 2008-10-07 11:48:26.000000000 +0900
@@ -225,13 +225,11 @@ static loff_t block_llseek(struct file *
offset += size;
break;
case 1:
- offset += file->f_pos;
+ offset += file_pos_read(file);
}
retval = -EINVAL;
if (offset >= 0 && offset <= size) {
- if (offset != file->f_pos) {
- file->f_pos = offset;
- }
+ file_pos_update(file, offset);
retval = offset;
}
mutex_unlock(&bd_inode->i_mutex);
diff -Nrup linux-2.6.27-rc9.org/fs/file_table.c linux-2.6.27-rc9/fs/file_table.c
--- linux-2.6.27-rc9.org/fs/file_table.c 2008-10-07 11:44:42.000000000 +0900
+++ linux-2.6.27-rc9/fs/file_table.c 2008-10-07 11:48:26.000000000 +0900
@@ -121,6 +121,7 @@ struct file *get_empty_filp(void)
tsk = current;
INIT_LIST_HEAD(&f->f_u.fu_list);
atomic_long_set(&f->f_count, 1);
+ f_pos_ordered_init(f);
rwlock_init(&f->f_owner.lock);
f->f_uid = tsk->fsuid;
f->f_gid = tsk->fsgid;
diff -Nrup linux-2.6.27-rc9.org/fs/locks.c linux-2.6.27-rc9/fs/locks.c
--- linux-2.6.27-rc9.org/fs/locks.c 2008-10-07 11:44:42.000000000 +0900
+++ linux-2.6.27-rc9/fs/locks.c 2008-10-07 11:48:26.000000000 +0900
@@ -317,7 +317,7 @@ static int flock_to_posix_lock(struct fi
start = 0;
break;
case SEEK_CUR:
- start = filp->f_pos;
+ start = file_pos_read(filp);
break;
case SEEK_END:
start = i_size_read(filp->f_path.dentry->d_inode);
@@ -367,7 +367,7 @@ static int flock64_to_posix_lock(struct
start = 0;
break;
case SEEK_CUR:
- start = filp->f_pos;
+ start = file_pos_read(filp);
break;
case SEEK_END:
start = i_size_read(filp->f_path.dentry->d_inode);
diff -Nrup linux-2.6.27-rc9.org/fs/read_write.c linux-2.6.27-rc9/fs/read_write.c
--- linux-2.6.27-rc9.org/fs/read_write.c 2008-10-07 11:44:42.000000000 +0900
+++ linux-2.6.27-rc9/fs/read_write.c 2008-10-07 11:48:26.000000000 +0900
@@ -42,15 +42,13 @@ generic_file_llseek_unlocked(struct file
offset += inode->i_size;
break;
case SEEK_CUR:
- offset += file->f_pos;
+ offset += file_pos_read(file);
}
retval = -EINVAL;
if (offset>=0 && offset<=inode->i_sb->s_maxbytes) {
/* Special lock needed here? */
- if (offset != file->f_pos) {
- file->f_pos = offset;
+ if (file_pos_update(file, offset))
file->f_version = 0;
- }
retval = offset;
}
return retval;
@@ -83,14 +81,12 @@ loff_t default_llseek(struct file *file,
offset += i_size_read(file->f_path.dentry->d_inode);
break;
case SEEK_CUR:
- offset += file->f_pos;
+ offset += file_pos_read(file);
}
retval = -EINVAL;
if (offset >= 0) {
- if (offset != file->f_pos) {
- file->f_pos = offset;
+ if (file_pos_update(file, offset))
file->f_version = 0;
- }
retval = offset;
}
unlock_kernel();
@@ -324,16 +320,6 @@ ssize_t vfs_write(struct file *file, con
EXPORT_SYMBOL(vfs_write);
-static inline loff_t file_pos_read(struct file *file)
-{
- return file->f_pos;
-}
-
-static inline void file_pos_write(struct file *file, loff_t pos)
-{
- file->f_pos = pos;
-}
-
asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count)
{
struct file *file;
diff -Nrup linux-2.6.27-rc9.org/include/linux/fs.h linux-2.6.27-rc9/include/linux/fs.h
--- linux-2.6.27-rc9.org/include/linux/fs.h 2008-10-07 11:44:44.000000000 +0900
+++ linux-2.6.27-rc9/include/linux/fs.h 2008-10-07 11:48:26.000000000 +0900
@@ -805,6 +805,16 @@ static inline int ra_has_index(struct fi
#define FILE_MNT_WRITE_TAKEN 1
#define FILE_MNT_WRITE_RELEASED 2
+/*
+ * Use sequence lock to get consistent f_pos on 32-bit processors.
+ */
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+#define __NEED_F_POS_ORDERED
+#define f_pos_ordered_init(file) seqlock_init(&file->f_pos_seqlock)
+#else
+#define f_pos_ordered_init(file) do { } while (0)
+#endif
+
struct file {
/*
* fu_list becomes invalid after file_free is called and queued via
@@ -822,6 +832,9 @@ struct file {
unsigned int f_flags;
mode_t f_mode;
loff_t f_pos;
+#ifdef __NEED_F_POS_ORDERED
+ seqlock_t f_pos_seqlock;
+#endif
struct fown_struct f_owner;
unsigned int f_uid, f_gid;
struct file_ra_state f_ra;
@@ -850,6 +863,73 @@ extern spinlock_t files_lock;
#define get_file(x) atomic_long_inc(&(x)->f_count)
#define file_count(x) atomic_long_read(&(x)->f_count)
+/*
+ * file_pos_read/write is atomic by using sequence lock on 32bit arch.
+ */
+static inline loff_t file_pos_read(struct file *file)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ loff_t pos;
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&file->f_pos_seqlock);
+ pos = file->f_pos;
+ } while (read_seqretry(&file->f_pos_seqlock, seq));
+ return pos;
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+ loff_t pos;
+
+ preempt_disable();
+ pos = file->f_pos;
+ preempt_enable();
+ return pos;
+#else
+ return file->f_pos;
+#endif
+}
+
+static inline void file_pos_write(struct file *file, loff_t pos)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ write_seqlock(&file->f_pos_seqlock);
+ file->f_pos = pos;
+ write_sequnlock(&file->f_pos_seqlock);
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+ preempt_disable();
+ file->f_pos = pos;
+ preempt_enable();
+#else
+ file->f_pos = pos;
+#endif
+}
+
+static inline int file_pos_update(struct file *file, loff_t offset)
+{
+ int ret = 0;
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ write_seqlock(&file->f_pos_seqlock);
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ ret = 1;
+ }
+ write_sequnlock(&file->f_pos_seqlock);
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
+ preempt_disable();
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ ret = 1;
+ }
+ preempt_enable();
+#else
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ ret = 1;
+ }
+#endif
+ return ret;
+}
+
#ifdef CONFIG_DEBUG_WRITECOUNT
static inline void file_take_write(struct file *f)
{
next reply other threads:[~2008-10-07 5:10 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-10-07 5:07 Hisashi Hifumi [this message]
2008-10-07 6:43 ` [RESEND] [PATCH] VFS: make file->f_pos access atomic on 32bit arch Andi Kleen
2008-10-07 10:11 ` Hisashi Hifumi
2008-10-07 10:29 ` Andi Kleen
2008-10-07 16:27 ` Nick Piggin
2008-10-07 17:50 ` Andrew Morton
2008-10-07 18:59 ` Peter Zijlstra
2008-10-08 2:35 ` Nick Piggin
2008-10-08 2:52 ` Matthew Wilcox
2008-10-09 12:23 ` Pavel Machek
2008-10-09 12:49 ` Valdis.Kletnieks
2008-10-09 13:01 ` Matthew Wilcox
2008-10-09 13:38 ` Miklos Szeredi
2008-10-09 14:58 ` Linus Torvalds
2008-10-09 17:29 ` Jeremy Fitzhardinge
2008-10-08 4:48 ` Hisashi Hifumi
2008-10-08 5:10 ` Nick Piggin
2008-10-08 5:16 ` Matthew Wilcox
2008-10-08 6:28 ` Andrew Morton
2008-10-08 6:51 ` Peter Zijlstra
2008-10-08 8:32 ` Eric Dumazet
2008-10-08 8:48 ` Nick Piggin
2008-10-08 9:17 ` Peter Zijlstra
2008-10-09 21:51 ` dcg
2008-10-10 2:25 ` Nick Piggin
2008-10-09 12:16 ` Pavel Machek
2008-10-08 0:40 ` Nick Piggin
2008-10-07 18:00 ` Matthew Wilcox
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=6.0.0.20.2.20081007140438.0580f110@172.19.0.2 \
--to=hifumi.hisashi@oss.ntt.co.jp \
--cc=akpm@linux-foundation.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).