* [2.6.26 PATCH, RESEND]: fs_stack/eCryptfs: fsstack_copy_* updates
@ 2008-04-21 6:50 Erez Zadok
2008-04-30 17:17 ` Andrew Morton
0 siblings, 1 reply; 12+ messages in thread
From: Erez Zadok @ 2008-04-21 6:50 UTC (permalink / raw)
To: akpm; +Cc: linux-fsdevel, linux-kernel, Al Viro, hch, Mike Halcrow,
Hugh Dickins
1. remove the 3rd arg to fsstack_copy_attr_all. There are no users for it:
ecryptfs never used the 3rd arg; unionfs stopped using it a long time
ago. Halcrow ok'ed this patch some time ago.
2. add necessary locking for 32-bit smp systems in fsstack_copy_inode_size
(courtesy Hugh Dickins).
3. minor commenting style changes, and addition of copyrights which were
missing.
Acked-by: Mike Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 5e59658..4621f89 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
struct inode *lower_inode =
ecryptfs_inode_to_lower(dentry->d_inode);
- fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL);
+ fsstack_copy_attr_all(dentry->d_inode, lower_inode);
}
out:
return rc;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e238611..687819d 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -575,9 +575,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
lower_new_dir_dentry->d_inode, lower_new_dentry);
if (rc)
goto out_lock;
- fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL);
+ fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
if (new_dir != old_dir)
- fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL);
+ fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
out_lock:
unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
dput(lower_new_dentry->d_parent);
@@ -910,7 +910,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
rc = notify_change(lower_dentry, ia);
out:
- fsstack_copy_attr_all(inode, lower_inode, NULL);
+ fsstack_copy_attr_all(inode, lower_inode);
return rc;
}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d25ac95..7eac062 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -211,7 +211,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
d_add(dentry, inode);
else
d_instantiate(dentry, inode);
- fsstack_copy_attr_all(inode, lower_inode, NULL);
+ fsstack_copy_attr_all(inode, lower_inode);
/* This size will be overwritten for real files w/ headers and
* other metadata */
fsstack_copy_inode_size(inode, lower_inode);
diff --git a/fs/stack.c b/fs/stack.c
index 67716f6..4336f2b 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -1,24 +1,42 @@
+/*
+ * Copyright (c) 2006-2007 Erez Zadok
+ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
+ * Copyright (c) 2006-2007 Stony Brook University
+ * Copyright (c) 2006-2007 The Research Foundation of SUNY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/fs_stack.h>
-/* does _NOT_ require i_mutex to be held.
+/*
+ * does _NOT_ require i_mutex to be held.
*
* This function cannot be inlined since i_size_{read,write} is rather
* heavy-weight on 32-bit systems
*/
void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
{
- i_size_write(dst, i_size_read((struct inode *)src));
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ spin_lock(&dst->i_lock);
+#endif
+ i_size_write(dst, i_size_read(src));
dst->i_blocks = src->i_blocks;
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ spin_unlock(&dst->i_lock);
+#endif
}
EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
-/* copy all attributes; get_nlinks is optional way to override the i_nlink
+/*
+ * copy all attributes; get_nlinks is optional way to override the i_nlink
* copying
*/
-void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
- int (*get_nlinks)(struct inode *))
+void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
{
dest->i_mode = src->i_mode;
dest->i_uid = src->i_uid;
@@ -29,14 +47,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
dest->i_ctime = src->i_ctime;
dest->i_blkbits = src->i_blkbits;
dest->i_flags = src->i_flags;
-
- /*
- * Update the nlinks AFTER updating the above fields, because the
- * get_links callback may depend on them.
- */
- if (!get_nlinks)
- dest->i_nlink = src->i_nlink;
- else
- dest->i_nlink = (*get_nlinks)(dest);
+ dest->i_nlink = src->i_nlink;
}
EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index bb516ce..6b52faf 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -1,17 +1,28 @@
+/*
+ * Copyright (c) 2006-2007 Erez Zadok
+ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
+ * Copyright (c) 2006-2007 Stony Brook University
+ * Copyright (c) 2006-2007 The Research Foundation of SUNY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
#ifndef _LINUX_FS_STACK_H
#define _LINUX_FS_STACK_H
-/* This file defines generic functions used primarily by stackable
+/*
+ * This file defines generic functions used primarily by stackable
* filesystems; none of these functions require i_mutex to be held.
*/
#include <linux/fs.h>
/* externs for fs/stack.c */
-extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
- int (*get_nlinks)(struct inode *));
-
-extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
+extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
+extern void fsstack_copy_inode_size(struct inode *dst,
+ const struct inode *src);
/* inlines */
static inline void fsstack_copy_attr_atime(struct inode *dest,
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [2.6.26 PATCH, RESEND]: fs_stack/eCryptfs: fsstack_copy_* updates
2008-04-21 6:50 [2.6.26 PATCH, RESEND]: fs_stack/eCryptfs: fsstack_copy_* updates Erez Zadok
@ 2008-04-30 17:17 ` Andrew Morton
2008-04-30 21:09 ` Erez Zadok
` (2 more replies)
0 siblings, 3 replies; 12+ messages in thread
From: Andrew Morton @ 2008-04-30 17:17 UTC (permalink / raw)
To: Erez Zadok; +Cc: linux-fsdevel, linux-kernel, viro, hch, mhalcrow, hugh
On Mon, 21 Apr 2008 02:50:42 -0400
Erez Zadok <ezk@cs.sunysb.edu> wrote:
>
> 1. remove the 3rd arg to fsstack_copy_attr_all. There are no users for it:
> ecryptfs never used the 3rd arg; unionfs stopped using it a long time
> ago. Halcrow ok'ed this patch some time ago.
>
> 2. add necessary locking for 32-bit smp systems in fsstack_copy_inode_size
> (courtesy Hugh Dickins).
>
> 3. minor commenting style changes, and addition of copyrights which were
> missing.
>
> Acked-by: Mike Halcrow <mhalcrow@us.ibm.com>
> Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
>
> ...
>
> void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
> {
> - i_size_write(dst, i_size_read((struct inode *)src));
> +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
> + spin_lock(&dst->i_lock);
> +#endif
The defined(CONFIG_SMP) is wrong. The spinlock is here to protect
dst->i_blocks, but it can be corrupted via preemption on uniprocessor as
well. So a plain old
#if BITS_PER_LONG == 32
would fix that.
> + i_size_write(dst, i_size_read(src));
> dst->i_blocks = src->i_blocks;
> +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
> + spin_unlock(&dst->i_lock);
> +#endif
> }
However, what about src->i_blocks? It is protected by src->i_lock. The
code as you have it here could read transient values.
Furthermore, i_lock is defined as an innermost lock, for protection of
inode internals. But here we're proposing "taking" inode->i_size_seqcount
inside i_lock. Not necessarily a problem, but it broke the old rule.
We're also doing a read_seqlock of a _different_ inode inside this inode's
i_lock. Again, this is not necessarily a problem (but it might be!) but it
adds complexity and needs thought.
Can we avoid having to think?
void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
{
blkcnt_t i_blocks;
loff_t i_size;
i_size = i_size_read(src);
spin_lock_32bit(&src->i_lock);
i_blocks = src->i_blocks;
spin_unlock_32bit(&src->i_lock);
i_size_write(dst, i_size);
spin_lock_32bit(&dst->i_lock)
dst->i_blocks = i_blocks;
spin_unlock_32bit(&dst->i_lock)
}
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [2.6.26 PATCH, RESEND]: fs_stack/eCryptfs: fsstack_copy_* updates
2008-04-30 17:17 ` Andrew Morton
@ 2008-04-30 21:09 ` Erez Zadok
2008-04-30 21:25 ` Andrew Morton
2008-05-01 17:18 ` Erez Zadok
2008-05-02 13:17 ` Hugh Dickins
2 siblings, 1 reply; 12+ messages in thread
From: Erez Zadok @ 2008-04-30 21:09 UTC (permalink / raw)
To: Andrew Morton
Cc: Erez Zadok, linux-fsdevel, linux-kernel, viro, hch, mhalcrow,
hugh
In message <20080430101704.9cbd6384.akpm@linux-foundation.org>, Andrew Morton writes:
> On Mon, 21 Apr 2008 02:50:42 -0400
> Erez Zadok <ezk@cs.sunysb.edu> wrote:
[...]
> Can we avoid having to think?
>
> void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
> {
> blkcnt_t i_blocks;
> loff_t i_size;
>
> i_size = i_size_read(src);
> spin_lock_32bit(&src->i_lock);
> i_blocks = src->i_blocks;
> spin_unlock_32bit(&src->i_lock);
>
> i_size_write(dst, i_size);
> spin_lock_32bit(&dst->i_lock)
> dst->i_blocks = i_blocks;
> spin_unlock_32bit(&dst->i_lock)
> }
Thanks. I can't say that I'm an expert in these SMP issues. But I'll run
your rewritten function through my 32 and 64-bit SMP and non-SMP systems,
and see how it behaves.
Erez.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [2.6.26 PATCH, RESEND]: fs_stack/eCryptfs: fsstack_copy_* updates
2008-04-30 21:09 ` Erez Zadok
@ 2008-04-30 21:25 ` Andrew Morton
0 siblings, 0 replies; 12+ messages in thread
From: Andrew Morton @ 2008-04-30 21:25 UTC (permalink / raw)
To: Erez Zadok; +Cc: ezk, linux-fsdevel, linux-kernel, viro, hch, mhalcrow, hugh
On Wed, 30 Apr 2008 17:09:15 -0400
Erez Zadok <ezk@cs.sunysb.edu> wrote:
> In message <20080430101704.9cbd6384.akpm@linux-foundation.org>, Andrew Morton writes:
> > On Mon, 21 Apr 2008 02:50:42 -0400
> > Erez Zadok <ezk@cs.sunysb.edu> wrote:
> [...]
> > Can we avoid having to think?
> >
> > void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
> > {
> > blkcnt_t i_blocks;
> > loff_t i_size;
> >
> > i_size = i_size_read(src);
> > spin_lock_32bit(&src->i_lock);
> > i_blocks = src->i_blocks;
> > spin_unlock_32bit(&src->i_lock);
> >
> > i_size_write(dst, i_size);
> > spin_lock_32bit(&dst->i_lock)
> > dst->i_blocks = i_blocks;
> > spin_unlock_32bit(&dst->i_lock)
> > }
>
> Thanks. I can't say that I'm an expert in these SMP issues. But I'll run
> your rewritten function through my 32 and 64-bit SMP and non-SMP systems,
> and see how it behaves.
>
The obvious risk here is that there's no synchronisation between the
copying of i_size and i_blocks. If that's a problem, I _expect_ that
i_mutex wold give pretty good coverage (but insufficient for
mmap-write-over-a-hole, I guess).
And someone needs to write spin_lock_32bit() ;)
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [2.6.26 PATCH, RESEND]: fs_stack/eCryptfs: fsstack_copy_* updates
2008-04-30 17:17 ` Andrew Morton
2008-04-30 21:09 ` Erez Zadok
@ 2008-05-01 17:18 ` Erez Zadok
2008-05-01 19:21 ` Andrew Morton
2008-05-02 13:17 ` Hugh Dickins
2 siblings, 1 reply; 12+ messages in thread
From: Erez Zadok @ 2008-05-01 17:18 UTC (permalink / raw)
To: Andrew Morton
Cc: Erez Zadok, linux-fsdevel, linux-kernel, viro, hch, mhalcrow,
hugh
In message <20080430101704.9cbd6384.akpm@linux-foundation.org>, Andrew Morton writes:
[...[
> Can we avoid having to think?
>
> void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
> {
> blkcnt_t i_blocks;
> loff_t i_size;
>
> i_size = i_size_read(src);
> spin_lock_32bit(&src->i_lock);
> i_blocks = src->i_blocks;
> spin_unlock_32bit(&src->i_lock);
>
> i_size_write(dst, i_size);
> spin_lock_32bit(&dst->i_lock)
> dst->i_blocks = i_blocks;
> spin_unlock_32bit(&dst->i_lock)
> }
I can't find spin_[um]lock_32bit anywhere (checkd latest mmotm and linus's
tree). I therefore assume this was just your way of saying I should:
#if BITS_PER_LONG == 32
spin_unlock(&dst->i_lock);
#endif
Erez.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [2.6.26 PATCH, RESEND]: fs_stack/eCryptfs: fsstack_copy_* updates
2008-05-01 17:18 ` Erez Zadok
@ 2008-05-01 19:21 ` Andrew Morton
2008-05-01 23:44 ` Erez Zadok
0 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2008-05-01 19:21 UTC (permalink / raw)
To: Erez Zadok; +Cc: ezk, linux-fsdevel, linux-kernel, viro, hch, mhalcrow, hugh
On Thu, 1 May 2008 13:18:09 -0400
Erez Zadok <ezk@cs.sunysb.edu> wrote:
> In message <20080430101704.9cbd6384.akpm@linux-foundation.org>, Andrew Morton writes:
> [...[
> > Can we avoid having to think?
> >
> > void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
> > {
> > blkcnt_t i_blocks;
> > loff_t i_size;
> >
> > i_size = i_size_read(src);
> > spin_lock_32bit(&src->i_lock);
> > i_blocks = src->i_blocks;
> > spin_unlock_32bit(&src->i_lock);
> >
> > i_size_write(dst, i_size);
> > spin_lock_32bit(&dst->i_lock)
> > dst->i_blocks = i_blocks;
> > spin_unlock_32bit(&dst->i_lock)
> > }
>
> I can't find spin_[um]lock_32bit anywhere (checkd latest mmotm and linus's
> tree). I therefore assume this was just your way of saying I should:
>
> #if BITS_PER_LONG == 32
> spin_unlock(&dst->i_lock);
> #endif
Nope, it was my way of suggesting that you implement it ;)
include/linux/spinlock.h would be a good place.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [2.6.26 PATCH, RESEND]: fs_stack/eCryptfs: fsstack_copy_* updates
2008-05-01 19:21 ` Andrew Morton
@ 2008-05-01 23:44 ` Erez Zadok
2008-05-02 0:08 ` Andrew Morton
0 siblings, 1 reply; 12+ messages in thread
From: Erez Zadok @ 2008-05-01 23:44 UTC (permalink / raw)
To: Andrew Morton
Cc: Erez Zadok, linux-fsdevel, linux-kernel, viro, hch, mhalcrow,
hugh
In message <20080501122136.bc2215c9.akpm@linux-foundation.org>, Andrew Morton writes:
> On Thu, 1 May 2008 13:18:09 -0400
> Erez Zadok <ezk@cs.sunysb.edu> wrote:
>
> > In message <20080430101704.9cbd6384.akpm@linux-foundation.org>, Andrew Morton writes:
> > [...[
> > > Can we avoid having to think?
> > >
> > > void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
> > > {
> > > blkcnt_t i_blocks;
> > > loff_t i_size;
> > >
> > > i_size = i_size_read(src);
> > > spin_lock_32bit(&src->i_lock);
> > > i_blocks = src->i_blocks;
> > > spin_unlock_32bit(&src->i_lock);
> > >
> > > i_size_write(dst, i_size);
> > > spin_lock_32bit(&dst->i_lock)
> > > dst->i_blocks = i_blocks;
> > > spin_unlock_32bit(&dst->i_lock)
> > > }
> >
> > I can't find spin_[um]lock_32bit anywhere (checkd latest mmotm and linus's
> > tree). I therefore assume this was just your way of saying I should:
> >
> > #if BITS_PER_LONG == 32
> > spin_unlock(&dst->i_lock);
> > #endif
>
> Nope, it was my way of suggesting that you implement it ;)
> include/linux/spinlock.h would be a good place.
Sure, I can implement it and submit a patch. But I've got two questions
first:
1. i_size_read has a tri-state behaviour:
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
// play with seqcount
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
// wrap in preempt_disable/enable
#else
// just return value
#endif
Shouldn't the same be done for i_blocks, with matching i_blocks_read(),
i_blocks_write(), and adding an inode->i_blocks_seqcount field?
2. I've rewritten your suggested code a bit to reduce stack use. Modulo
having 32-bit spin_lock/unlock variants, do you see any problem with this
code below? My testing of it so far on 32/64-bit SMP/UMP have all
passed.
void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
{
#if BITS_PER_LONG == 32
blkcnt_t i_blocks;
spin_lock(&src->i_lock);
i_blocks = src->i_blocks;
spin_unlock(&src->i_lock);
spin_lock(&dst->i_lock);
dst->i_blocks = i_blocks;
spin_unlock(&dst->i_lock);
#else
dst->i_blocks = src->i_blocks;
#endif
i_size_write(dst, i_size_read(src));
}
Thanks,
Erez.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [2.6.26 PATCH, RESEND]: fs_stack/eCryptfs: fsstack_copy_* updates
2008-05-01 23:44 ` Erez Zadok
@ 2008-05-02 0:08 ` Andrew Morton
2008-05-02 5:58 ` Erez Zadok
0 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2008-05-02 0:08 UTC (permalink / raw)
To: Erez Zadok; +Cc: ezk, linux-fsdevel, linux-kernel, viro, hch, mhalcrow, hugh
On Thu, 1 May 2008 19:44:18 -0400
Erez Zadok <ezk@cs.sunysb.edu> wrote:
> In message <20080501122136.bc2215c9.akpm@linux-foundation.org>, Andrew Morton writes:
> > On Thu, 1 May 2008 13:18:09 -0400
> > Erez Zadok <ezk@cs.sunysb.edu> wrote:
> >
> > > In message <20080430101704.9cbd6384.akpm@linux-foundation.org>, Andrew Morton writes:
> > > [...[
> > > > Can we avoid having to think?
> > > >
> > > > void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
> > > > {
> > > > blkcnt_t i_blocks;
> > > > loff_t i_size;
> > > >
> > > > i_size = i_size_read(src);
> > > > spin_lock_32bit(&src->i_lock);
> > > > i_blocks = src->i_blocks;
> > > > spin_unlock_32bit(&src->i_lock);
> > > >
> > > > i_size_write(dst, i_size);
> > > > spin_lock_32bit(&dst->i_lock)
> > > > dst->i_blocks = i_blocks;
> > > > spin_unlock_32bit(&dst->i_lock)
> > > > }
> > >
> > > I can't find spin_[um]lock_32bit anywhere (checkd latest mmotm and linus's
> > > tree). I therefore assume this was just your way of saying I should:
> > >
> > > #if BITS_PER_LONG == 32
> > > spin_unlock(&dst->i_lock);
> > > #endif
> >
> > Nope, it was my way of suggesting that you implement it ;)
> > include/linux/spinlock.h would be a good place.
>
> Sure, I can implement it and submit a patch. But I've got two questions
> first:
>
> 1. i_size_read has a tri-state behaviour:
>
> #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
> // play with seqcount
> #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
> // wrap in preempt_disable/enable
> #else
> // just return value
> #endif
>
> Shouldn't the same be done for i_blocks, with matching i_blocks_read(),
> i_blocks_write(), and adding an inode->i_blocks_seqcount field?
I guess so, yes. Maybe there are other things too. We just haven't cared
enough to do all this fuss for i_blocks. Even i_size was a bit marginal.
i_size is much more important because glitches in there can result in
incorrect data being returned from read() and things like that. i_blocks
is just a beancounting curiosity.
>
> 2. I've rewritten your suggested code a bit to reduce stack use. Modulo
> having 32-bit spin_lock/unlock variants, do you see any problem with this
> code below? My testing of it so far on 32/64-bit SMP/UMP have all
> passed.
>
> void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
> {
> #if BITS_PER_LONG == 32
> blkcnt_t i_blocks;
>
> spin_lock(&src->i_lock);
> i_blocks = src->i_blocks;
> spin_unlock(&src->i_lock);
> spin_lock(&dst->i_lock);
> dst->i_blocks = i_blocks;
> spin_unlock(&dst->i_lock);
> #else
> dst->i_blocks = src->i_blocks;
> #endif
> i_size_write(dst, i_size_read(src));
> }
That looks sane, as long as we don't care about i_size-vs-i_blocks
coherency.
However I expect that approximately zero of the sites which modify i_blocks
are taking i_lock to do so.
otoh, many of them _will_ accidentally have i_mutex coverage. On the
write() path at least. What happened to that idea?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [2.6.26 PATCH, RESEND]: fs_stack/eCryptfs: fsstack_copy_* updates
2008-05-02 0:08 ` Andrew Morton
@ 2008-05-02 5:58 ` Erez Zadok
2008-05-02 6:11 ` Andrew Morton
2008-05-12 0:44 ` hooanon05
0 siblings, 2 replies; 12+ messages in thread
From: Erez Zadok @ 2008-05-02 5:58 UTC (permalink / raw)
To: Andrew Morton
Cc: Erez Zadok, linux-fsdevel, linux-kernel, viro, hch, mhalcrow,
hugh
In message <20080501170819.bdcb9035.akpm@linux-foundation.org>, Andrew Morton writes:
> On Thu, 1 May 2008 19:44:18 -0400
> Erez Zadok <ezk@cs.sunysb.edu> wrote:
[...]
> i_size is much more important because glitches in there can result in
> incorrect data being returned from read() and things like that. i_blocks
> is just a beancounting curiosity.
>
> >
> > 2. I've rewritten your suggested code a bit to reduce stack use. Modulo
> > having 32-bit spin_lock/unlock variants, do you see any problem with this
> > code below? My testing of it so far on 32/64-bit SMP/UMP have all
> > passed.
> >
> > void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
> > {
> > #if BITS_PER_LONG == 32
> > blkcnt_t i_blocks;
> >
> > spin_lock(&src->i_lock);
> > i_blocks = src->i_blocks;
> > spin_unlock(&src->i_lock);
> > spin_lock(&dst->i_lock);
> > dst->i_blocks = i_blocks;
> > spin_unlock(&dst->i_lock);
> > #else
> > dst->i_blocks = src->i_blocks;
> > #endif
> > i_size_write(dst, i_size_read(src));
> > }
>
> That looks sane, as long as we don't care about i_size-vs-i_blocks
> coherency.
> However I expect that approximately zero of the sites which modify i_blocks
> are taking i_lock to do so.
If i_blocks is indeed less important than i_size, then we can live with some
incoherency b/t i_size and i_blocks, for now. Nevertheless, I propose
adding this to linux/fs.h:
static inline blkcnt_t i_blocks_read(const struct inode *inode)
{
#if BITS_PER_LONG == 32
blkcnt_t i_blocks;
spin_lock(&src->i_lock);
i_blocks = src->i_blocks;
spin_unlock(&src->i_lock);
return i_blocks;
#else
return src->i_blocks;
#endif
}
and a matching i_blocks_write function. We can then gradually convert those
"unsafe" users of i_blocks to use the new i_blocks_read/write helpers.
The nice thing about these two helpers is fsstack_copy_inode_size becomes a
lot cleaner and more elegant:
void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
{
i_blocks_write(dst, i_blocks_read(src));
i_size_write(dst, i_size_read(src));
}
And, if we ever wanted to ensure coherency b/t i_blocks and i_size, we'll
need to create helpers that merge the functionality of i_size_read/write and
i_blocks_read/write.
What do you think?
Erez.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [2.6.26 PATCH, RESEND]: fs_stack/eCryptfs: fsstack_copy_* updates
2008-05-02 5:58 ` Erez Zadok
@ 2008-05-02 6:11 ` Andrew Morton
2008-05-12 0:44 ` hooanon05
1 sibling, 0 replies; 12+ messages in thread
From: Andrew Morton @ 2008-05-02 6:11 UTC (permalink / raw)
To: Erez Zadok; +Cc: linux-fsdevel, linux-kernel, viro, hch, mhalcrow, hugh
On Fri, 2 May 2008 01:58:05 -0400 Erez Zadok <ezk@cs.sunysb.edu> wrote:
> In message <20080501170819.bdcb9035.akpm@linux-foundation.org>, Andrew Morton writes:
> > On Thu, 1 May 2008 19:44:18 -0400
> > Erez Zadok <ezk@cs.sunysb.edu> wrote:
> [...]
> > i_size is much more important because glitches in there can result in
> > incorrect data being returned from read() and things like that. i_blocks
> > is just a beancounting curiosity.
> >
> > >
> > > 2. I've rewritten your suggested code a bit to reduce stack use. Modulo
> > > having 32-bit spin_lock/unlock variants, do you see any problem with this
> > > code below? My testing of it so far on 32/64-bit SMP/UMP have all
> > > passed.
> > >
> > > void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
> > > {
> > > #if BITS_PER_LONG == 32
> > > blkcnt_t i_blocks;
> > >
> > > spin_lock(&src->i_lock);
> > > i_blocks = src->i_blocks;
> > > spin_unlock(&src->i_lock);
> > > spin_lock(&dst->i_lock);
> > > dst->i_blocks = i_blocks;
> > > spin_unlock(&dst->i_lock);
> > > #else
> > > dst->i_blocks = src->i_blocks;
> > > #endif
> > > i_size_write(dst, i_size_read(src));
> > > }
> >
> > That looks sane, as long as we don't care about i_size-vs-i_blocks
> > coherency.
>
> > However I expect that approximately zero of the sites which modify i_blocks
> > are taking i_lock to do so.
>
> If i_blocks is indeed less important than i_size, then we can live with some
> incoherency b/t i_size and i_blocks, for now. Nevertheless, I propose
> adding this to linux/fs.h:
>
> static inline blkcnt_t i_blocks_read(const struct inode *inode)
> {
> #if BITS_PER_LONG == 32
> blkcnt_t i_blocks;
> spin_lock(&src->i_lock);
> i_blocks = src->i_blocks;
> spin_unlock(&src->i_lock);
> return i_blocks;
> #else
> return src->i_blocks;
> #endif
> }
We actually only need the spinlocked version if blkcnt_t is 64-bit.
So #if BITS_PER_LONG == 32 && defined(CONFIG_LSF), plus explanatory comment.
The spinlocked version will be too large for inlining, I expect.
> and a matching i_blocks_write function.
You'll also need i_blocks_mod() for things like
fs/hpfs/dnode.c: i->i_blocks += 4;
> We can then gradually convert those
> "unsafe" users of i_blocks to use the new i_blocks_read/write helpers.
>
> The nice thing about these two helpers is fsstack_copy_inode_size becomes a
> lot cleaner and more elegant:
>
> void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
> {
> i_blocks_write(dst, i_blocks_read(src));
> i_size_write(dst, i_size_read(src));
> }
>
> And, if we ever wanted to ensure coherency b/t i_blocks and i_size, we'll
> need to create helpers that merge the functionality of i_size_read/write and
> i_blocks_read/write.
>
> What do you think?
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [2.6.26 PATCH, RESEND]: fs_stack/eCryptfs: fsstack_copy_* updates
2008-04-30 17:17 ` Andrew Morton
2008-04-30 21:09 ` Erez Zadok
2008-05-01 17:18 ` Erez Zadok
@ 2008-05-02 13:17 ` Hugh Dickins
2 siblings, 0 replies; 12+ messages in thread
From: Hugh Dickins @ 2008-05-02 13:17 UTC (permalink / raw)
To: Andrew Morton
Cc: Erez Zadok, linux-fsdevel, linux-kernel, viro, hch, mhalcrow
Sorry, I keep putting off a reply until I can append a patch,
but still won't get to do so today. Comments below.
On Wed, 30 Apr 2008, Andrew Morton wrote:
> On Mon, 21 Apr 2008 02:50:42 -0400
> Erez Zadok <ezk@cs.sunysb.edu> wrote:
>
> >
> > 1. remove the 3rd arg to fsstack_copy_attr_all. There are no users for it:
> > ecryptfs never used the 3rd arg; unionfs stopped using it a long time
> > ago. Halcrow ok'ed this patch some time ago.
> >
> > 2. add necessary locking for 32-bit smp systems in fsstack_copy_inode_size
> > (courtesy Hugh Dickins).
> >
> > 3. minor commenting style changes, and addition of copyrights which were
> > missing.
> >
> > Acked-by: Mike Halcrow <mhalcrow@us.ibm.com>
> > Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
> >
> > ...
> >
> > void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
> > {
> > - i_size_write(dst, i_size_read((struct inode *)src));
> > +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
> > + spin_lock(&dst->i_lock);
> > +#endif
>
> The defined(CONFIG_SMP) is wrong. The spinlock is here to protect
> dst->i_blocks, but it can be corrupted via preemption on uniprocessor as
> well. So a plain old
>
> #if BITS_PER_LONG == 32
>
> would fix that.
Some confusion here - which would have been avoided if I'd been
so good as to put much-needed comment in the code of the original
patch - sorry. There's two separate issues, i_size and i_blocks.
i_size is what I was addressing by adding the #ifdef'ed spin_lock,
i_blocks (in the CONFIG_LSF case more precisely than the 32-bit case)
is an arguable problem that nobody noticed until you did so now.
Here's my original patch comment to Erez:
+ LTP's iogen01 doio tests hang nicely on 32-bit SMP when /tmp is a unionfs
+ mount of a tmpfs. See the comment on i_size_write in linux/fs.h: it needs
+ to be locked, otherwise i_size_read can spin forever waiting for a lost
+ seqcount update.
+
+ Most filesystems are already holding i_mutex for this, but unionfs calls
+ fsstack_copy_inode_size from many places, not necessarily holding i_mutex.
+ Use the low-level i_lock within fsstack_copy_inode_size when 32-bit SMP.
So far as that goes, the CONFIG_SMP is correct, because i_size_write
does preempt_disable/preempt_enable in the 32-bit PREEMPT but not SMP
case. The ifdef chosen reflects that in i_size_write, but I've no
objection to removing the CONFIG_SMP part of it if it ends up more
palatable that way - I imagine the cost of a double preempt_disable
where a single would do is just too minuscule to worry about.
But the new use of i_lock, together with the way I only unlock after
dealing with i_blocks (I thought, if we have to have preemption off,
let's do i_blocks too while it's still off), led you to think that I
was trying to keep the two halves of a CONFIG_LSF i_blocks together -
whereas I'd never even glanced at the typedef of i_blocks.
Personally I don't think it's worth worrying about i_blocks coherency
here. stat's generic_fillattr doesn't worry about it. unionfs (ah,
but here we're in general stackable filesystem territory) won't very
often be handling files where the top half of the u64 is set. Quotas
have a strong reason for keeping i_blocks coherent, but unionfs (and
any stacking filesystem?) won't get into quotas itself. And tmpfs
amongst non-quota others does not use i_lock to manipulate i_blocks.
In later mail, it looks like you're wondering whether it's worth
bothering about too: and perhaps you wouldn't have bothered about
it in the first place, if the patch comment provided had made it
clear what this change was for, i_size not i_blocks.
>
> > + i_size_write(dst, i_size_read(src));
> > dst->i_blocks = src->i_blocks;
> > +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
> > + spin_unlock(&dst->i_lock);
> > +#endif
> > }
>
> However, what about src->i_blocks? It is protected by src->i_lock. The
> code as you have it here could read transient values.
In some cases it's protected by src->i_lock.
>
> Furthermore, i_lock is defined as an innermost lock, for protection of
> inode internals. But here we're proposing "taking" inode->i_size_seqcount
> inside i_lock. Not necessarily a problem, but it broke the old rule.
>
> We're also doing a read_seqlock of a _different_ inode inside this inode's
> i_lock. Again, this is not necessarily a problem (but it might be!) but it
> adds complexity and needs thought.
Good points, I didn't think of those at all (I guess I don't properly
think of the seqcount as a lock). There's no good reason to do that
i_size_read within the i_lock, you're quite right to take it outside.
>
> Can we avoid having to think?
Not such a good idea...
>
> void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
> {
> blkcnt_t i_blocks;
> loff_t i_size;
>
> i_size = i_size_read(src);
> spin_lock_32bit(&src->i_lock);
> i_blocks = src->i_blocks;
> spin_unlock_32bit(&src->i_lock);
>
> i_size_write(dst, i_size);
> spin_lock_32bit(&dst->i_lock)
> dst->i_blocks = i_blocks;
> spin_unlock_32bit(&dst->i_lock)
> }
No. I see Erez has followed this up, and reduced it to a nice looking
i_blocks_write(dst, i_blocks_read(src));
i_size_write(dst, i_size_read(src));
but I'm afraid that doesn't address the hang my patch was to fix.
My inclination is to go for something like:
i_size = i_size_read(src);
spin_lock_32bit(&dst->i_lock)
i_size_write(dst, i_size);
dst->i_blocks = src->i_blocks;
spin_unlock_32bit(&dst->i_lock)
with spin_(un)lock_32bit declared as inlines in the very same file,
unless you really want to embark on some i_blocks coherency cleanup.
With comments in the code as well as in the patch description.
Hugh
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [2.6.26 PATCH, RESEND]: fs_stack/eCryptfs: fsstack_copy_* updates
2008-05-02 5:58 ` Erez Zadok
2008-05-02 6:11 ` Andrew Morton
@ 2008-05-12 0:44 ` hooanon05
1 sibling, 0 replies; 12+ messages in thread
From: hooanon05 @ 2008-05-12 0:44 UTC (permalink / raw)
To: Erez Zadok
Cc: Andrew Morton, linux-fsdevel, linux-kernel, viro, hch, mhalcrow,
hugh
Erez Zadok:
> The nice thing about these two helpers is fsstack_copy_inode_size becomes a
> lot cleaner and more elegant:
>
> void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
> {
> i_blocks_write(dst, i_blocks_read(src));
> i_size_write(dst, i_size_read(src));
> }
While this function looks simple, I think it is more generic and better
that the caller of fsstack_copy_inode_size() holds i_lock instead of
lock it in the callee.
Junjiro Okajima
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2008-05-12 0:49 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-04-21 6:50 [2.6.26 PATCH, RESEND]: fs_stack/eCryptfs: fsstack_copy_* updates Erez Zadok
2008-04-30 17:17 ` Andrew Morton
2008-04-30 21:09 ` Erez Zadok
2008-04-30 21:25 ` Andrew Morton
2008-05-01 17:18 ` Erez Zadok
2008-05-01 19:21 ` Andrew Morton
2008-05-01 23:44 ` Erez Zadok
2008-05-02 0:08 ` Andrew Morton
2008-05-02 5:58 ` Erez Zadok
2008-05-02 6:11 ` Andrew Morton
2008-05-12 0:44 ` hooanon05
2008-05-02 13:17 ` Hugh Dickins
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).