* [Patch] BME, noatime and nodiratime
@ 2004-04-06 14:55 Herbert Poetzl
2004-04-06 20:48 ` viro
0 siblings, 1 reply; 12+ messages in thread
From: Herbert Poetzl @ 2004-04-06 14:55 UTC (permalink / raw)
To: Andrew Morton; +Cc: torvalds, linux-kernel, viro
[-- Attachment #1: Type: text/plain, Size: 278 bytes --]
Hi Andrew!
according to todays vfs strategy (hope it hasn't changed
again), here is the first patch, which adds the mount
flags propagation, fixes the /proc display, and implements
noatime and nodiratime per mountpoint ...
please consider for inclusion ...
best,
Herbert
[-- Attachment #2: patch-2.6.5-atime.diff --]
[-- Type: text/plain, Size: 5549 bytes --]
diff -NurpP --minimal linux-2.6.5/fs/namespace.c linux-2.6.5-atime/fs/namespace.c
--- linux-2.6.5/fs/namespace.c 2004-04-04 18:03:09.000000000 +0200
+++ linux-2.6.5-atime/fs/namespace.c 2004-04-04 23:46:28.000000000 +0200
@@ -206,37 +206,40 @@ static int show_vfsmnt(struct seq_file *
struct vfsmount *mnt = v;
int err = 0;
static struct proc_fs_info {
- int flag;
- char *str;
+ int s_flag;
+ int mnt_flag;
+ char *set_str;
+ char *unset_str;
} fs_info[] = {
- { MS_SYNCHRONOUS, ",sync" },
- { MS_DIRSYNC, ",dirsync" },
- { MS_MANDLOCK, ",mand" },
- { MS_NOATIME, ",noatime" },
- { MS_NODIRATIME, ",nodiratime" },
- { 0, NULL }
- };
- static struct proc_fs_info mnt_info[] = {
- { MNT_NOSUID, ",nosuid" },
- { MNT_NODEV, ",nodev" },
- { MNT_NOEXEC, ",noexec" },
- { 0, NULL }
+ { MS_RDONLY, 0, "ro", "rw" },
+ { MS_SYNCHRONOUS, 0, ",sync", NULL },
+ { MS_DIRSYNC, 0, ",dirsync", NULL },
+ { MS_MANDLOCK, 0, ",mand", NULL },
+ { MS_NOATIME, MNT_NOATIME, ",noatime", NULL },
+ { MS_NODIRATIME, MNT_NODIRATIME, ",nodiratime", NULL },
+ { 0, MNT_NOSUID, ",nosuid", NULL },
+ { 0, MNT_NODEV, ",nodev", NULL },
+ { 0, MNT_NOEXEC, ",noexec", NULL },
+ { 0, 0, NULL, NULL }
};
- struct proc_fs_info *fs_infop;
+ struct proc_fs_info *p;
+ unsigned long s_flags = mnt->mnt_sb->s_flags;
+ int mnt_flags = mnt->mnt_flags;
mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
seq_putc(m, ' ');
seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
seq_putc(m, ' ');
mangle(m, mnt->mnt_sb->s_type->name);
- seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
- for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
- if (mnt->mnt_sb->s_flags & fs_infop->flag)
- seq_puts(m, fs_infop->str);
- }
- for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
- if (mnt->mnt_flags & fs_infop->flag)
- seq_puts(m, fs_infop->str);
+ seq_putc(m, ' ');
+ for (p = fs_info; (p->s_flag | p->mnt_flag) ; p++) {
+ if ((s_flags & p->s_flag) || (mnt_flags & p->mnt_flag)) {
+ if (p->set_str)
+ seq_puts(m, p->set_str);
+ } else {
+ if (p->unset_str)
+ seq_puts(m, p->unset_str);
+ }
}
if (mnt->mnt_sb->s_op->show_options)
err = mnt->mnt_sb->s_op->show_options(m, mnt);
@@ -522,11 +525,13 @@ out_unlock:
/*
* do loopback mount.
*/
-static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
+static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags, int mnt_flags)
{
struct nameidata old_nd;
struct vfsmount *mnt = NULL;
+ int recurse = flags & MS_REC;
int err = mount_is_safe(nd);
+
if (err)
return err;
if (!old_name || !*old_name)
@@ -553,6 +558,7 @@ static int do_loopback(struct nameidata
spin_unlock(&vfsmount_lock);
} else
mntput(mnt);
+ mnt->mnt_flags = mnt_flags;
}
up_write(¤t->namespace->sem);
@@ -763,6 +769,8 @@ long do_mount(char * dev_name, char * di
((char *)data_page)[PAGE_SIZE - 1] = 0;
/* Separate the per-mountpoint flags */
+ if (flags & MS_RDONLY)
+ mnt_flags |= MNT_RDONLY;
if (flags & MS_NOSUID)
mnt_flags |= MNT_NOSUID;
if (flags & MS_NODEV)
@@ -769,6 +777,10 @@ long do_mount(char * dev_name, char * di
mnt_flags |= MNT_NODEV;
if (flags & MS_NOEXEC)
mnt_flags |= MNT_NOEXEC;
+ if (flags & MS_NOATIME)
+ mnt_flags |= MNT_NOATIME;
+ if (flags & MS_NODIRATIME)
+ mnt_flags |= MNT_NODIRATIME;
flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
/* ... and get the mountpoint */
@@ -784,7 +796,7 @@ long do_mount(char * dev_name, char * di
retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
data_page);
else if (flags & MS_BIND)
- retval = do_loopback(&nd, dev_name, flags & MS_REC);
+ retval = do_loopback(&nd, dev_name, flags, mnt_flags);
else if (flags & MS_MOVE)
retval = do_move_mount(&nd, dev_name);
else
diff -NurpP --minimal linux-2.6.5/include/linux/fs.h linux-2.6.5-atime/include/linux/fs.h
--- linux-2.6.5/include/linux/fs.h 2004-04-04 18:03:13.000000000 +0200
+++ linux-2.6.5-atime/include/linux/fs.h 2004-04-04 23:54:10.000000000 +0200
@@ -19,6 +19,7 @@
#include <linux/cache.h>
#include <linux/radix-tree.h>
#include <linux/kobject.h>
+#include <linux/mount.h>
#include <asm/atomic.h>
struct iovec;
@@ -916,8 +933,14 @@ static inline void mark_inode_dirty_sync
static inline void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
{
- /* per-mountpoint checks will go here */
- update_atime(dentry->d_inode);
+ struct inode *inode = dentry->d_inode;
+
+ if (MNT_IS_NOATIME(mnt))
+ return;
+ if (S_ISDIR(inode->i_mode) && MNT_IS_NODIRATIME(mnt))
+ return;
+
+ update_atime(inode);
}
static inline void file_accessed(struct file *file)
diff -NurpP --minimal linux-2.6.5/include/linux/mount.h linux-2.6.5-atime/include/linux/mount.h
--- linux-2.6.5/include/linux/mount.h 2004-03-11 03:55:22.000000000 +0100
+++ linux-2.6.5-atime/include/linux/mount.h 2004-04-04 23:46:28.000000000 +0200
@@ -14,9 +14,12 @@
#include <linux/list.h>
-#define MNT_NOSUID 1
-#define MNT_NODEV 2
-#define MNT_NOEXEC 4
+#define MNT_RDONLY 1
+#define MNT_NOSUID 2
+#define MNT_NODEV 4
+#define MNT_NOEXEC 8
+#define MNT_NOATIME 16
+#define MNT_NODIRATIME 32
struct vfsmount
{
@@ -33,6 +36,10 @@ struct vfsmount
struct list_head mnt_list;
};
+#define MNT_IS_RDONLY(m) ((m) && ((m)->mnt_flags & MNT_RDONLY))
+#define MNT_IS_NOATIME(m) ((m) && ((m)->mnt_flags & MNT_NOATIME))
+#define MNT_IS_NODIRATIME(m) ((m) && ((m)->mnt_flags & MNT_NODIRATIME))
+
static inline struct vfsmount *mntget(struct vfsmount *mnt)
{
if (mnt)
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [Patch] BME, noatime and nodiratime
2004-04-06 14:55 [Patch] BME, noatime and nodiratime Herbert Poetzl
@ 2004-04-06 20:48 ` viro
2004-04-06 23:11 ` viro
2004-04-07 6:46 ` Herbert Poetzl
0 siblings, 2 replies; 12+ messages in thread
From: viro @ 2004-04-06 20:48 UTC (permalink / raw)
To: Andrew Morton, torvalds, linux-kernel
On Tue, Apr 06, 2004 at 04:55:44PM +0200, Herbert Poetzl wrote:
>
> Hi Andrew!
>
> according to todays vfs strategy (hope it hasn't changed
> again), here is the first patch, which adds the mount
> flags propagation, fixes the /proc display, and implements
> noatime and nodiratime per mountpoint ...
>
> please consider for inclusion ...
noatime/nodiratime: OK, but we still have direct modifications of i_atime
that need to be taken care of.
massage of ->show(): more or less OK. However, we don't need to keep
MS_NOATIME and MS_NODIRATIME in flags at all -
> + if (flags & MS_NOATIME)
> + mnt_flags |= MNT_NOATIME;
> + if (flags & MS_NODIRATIME)
> + mnt_flags |= MNT_NODIRATIME;
> flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
should remove them from flags in the last line, same way we do that for
nosuid/noexec/nodev, with obvious consequences for ->show().
Note that we don't need to keep MS_NOATIME check in update_atime() - that
animal is purely per-mountpoint now.
> + if (MNT_IS_NOATIME(mnt))
> + return;
> + if (S_ISDIR(inode->i_mode) && MNT_IS_NODIRATIME(mnt))
> + return;
Do we need those to be macros? AFAICS, this is the only place where we
do such checks and we shouldn't get new callers. IOW, keeping them
separate doesn't buy us anything and only obfuscates the code.
> -#define MNT_NOSUID 1
> -#define MNT_NODEV 2
> -#define MNT_NOEXEC 4
> +#define MNT_RDONLY 1
> +#define MNT_NOSUID 2
> +#define MNT_NODEV 4
> +#define MNT_NOEXEC 8
> +#define MNT_NOATIME 16
> +#define MNT_NODIRATIME 32
*ugh*
a) what's the point of reordering them (rdonly shifting the existing ones)?
b) since MNT_RDONLY doesn't do anything at that point, why introduce it
(and associated confusion) now? As it is, your /proc/mounts will pretend
that per-mountpoint r/o works right now. Since it doesn't...
> +#define MNT_IS_RDONLY(m) ((m) && ((m)->mnt_flags & MNT_RDONLY))
> +#define MNT_IS_NOATIME(m) ((m) && ((m)->mnt_flags & MNT_NOATIME))
> +#define MNT_IS_NODIRATIME(m) ((m) && ((m)->mnt_flags & MNT_NODIRATIME))
See above. Besides, are we ever planning to pass NULL to these guys?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Patch] BME, noatime and nodiratime
2004-04-06 20:48 ` viro
@ 2004-04-06 23:11 ` viro
2004-04-06 23:35 ` Russell King
2004-04-14 15:14 ` Linus Torvalds
2004-04-07 6:46 ` Herbert Poetzl
1 sibling, 2 replies; 12+ messages in thread
From: viro @ 2004-04-06 23:11 UTC (permalink / raw)
To: Andrew Morton, torvalds, linux-kernel
On Tue, Apr 06, 2004 at 09:48:44PM +0100, viro@parcelfarce.linux.theplanet.co.uk wrote:
> noatime/nodiratime: OK, but we still have direct modifications of i_atime
> that need to be taken care of.
Particulary interesting one is in tty_io.c. There we
1) unconditionally touch i_atime on read()
2) do not touch it on write()
3) never mark the inode dirty.
Note that the last one means that doing stat() in a loop will sometimes
give atime going backwards. We also completely ignore noatime here.
There are similar places in some other char drivers. Obvious step would
be to have them do file_accessed() instead; however, I'd really like to
hear the rationale for existing behaviour. Comments?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Patch] BME, noatime and nodiratime
2004-04-06 23:11 ` viro
@ 2004-04-06 23:35 ` Russell King
2004-04-07 6:44 ` viro
2004-04-14 15:14 ` Linus Torvalds
1 sibling, 1 reply; 12+ messages in thread
From: Russell King @ 2004-04-06 23:35 UTC (permalink / raw)
To: viro; +Cc: Andrew Morton, torvalds, linux-kernel
On Wed, Apr 07, 2004 at 12:11:36AM +0100, viro@parcelfarce.linux.theplanet.co.uk wrote:
> Note that the last one means that doing stat() in a loop will sometimes
> give atime going backwards. We also completely ignore noatime here.
>
> There are similar places in some other char drivers. Obvious step would
> be to have them do file_accessed() instead; however, I'd really like to
> hear the rationale for existing behaviour. Comments?
I believe its so that we update the data in the cache, and avoid writing
it back to disk unnecessarily - consider the case where you have a lot
of tty activity (which updates atime). You don't particularly want to
be committing atime updates to disk every, what, 5 seconds, or performing
the NFS operations for the same.
The above is my understanding of the situation, which comes from when I
looked into these issues back in 2.0.3x days on a root-NFS machine and
asked (iirc) Alan Cox about it. - in other words, don't attach too much
reliability on it. 8)
[And for those who don't know - why are tty atimes updated in the
first place? For 'w' 'finger' etc which report login idle times
( := now - tty atime ).]
--
Russell King
Linux kernel 2.6 ARM Linux - http://www.arm.linux.org.uk/
maintainer of: 2.6 PCMCIA - http://pcmcia.arm.linux.org.uk/
2.6 Serial core
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [Patch] BME, noatime and nodiratime
2004-04-06 23:35 ` Russell King
@ 2004-04-07 6:44 ` viro
0 siblings, 0 replies; 12+ messages in thread
From: viro @ 2004-04-07 6:44 UTC (permalink / raw)
To: Andrew Morton, torvalds, linux-kernel
On Wed, Apr 07, 2004 at 12:35:06AM +0100, Russell King wrote:
> I believe its so that we update the data in the cache, and avoid writing
> it back to disk unnecessarily - consider the case where you have a lot
> of tty activity (which updates atime). You don't particularly want to
> be committing atime updates to disk every, what, 5 seconds, or performing
> the NFS operations for the same.
OK, but at least we want to dirty the inode at some point (e.g. final
close), so that atime would be monotonous - as it is, we get it reset
when inode goes out of cache and is reread again...
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Patch] BME, noatime and nodiratime
2004-04-06 23:11 ` viro
2004-04-06 23:35 ` Russell King
@ 2004-04-14 15:14 ` Linus Torvalds
2004-04-14 16:26 ` viro
1 sibling, 1 reply; 12+ messages in thread
From: Linus Torvalds @ 2004-04-14 15:14 UTC (permalink / raw)
To: viro; +Cc: Andrew Morton, linux-kernel
On Wed, 7 Apr 2004 viro@parcelfarce.linux.theplanet.co.uk wrote:
>
> On Tue, Apr 06, 2004 at 09:48:44PM +0100, viro@parcelfarce.linux.theplanet.co.uk wrote:
>
> > noatime/nodiratime: OK, but we still have direct modifications of i_atime
> > that need to be taken care of.
>
> Particulary interesting one is in tty_io.c. There we
> 1) unconditionally touch i_atime on read()
> 2) do not touch it on write()
> 3) never mark the inode dirty.
All of 1-3 are correct.
They all mean that:
- atime never gets written out for tty devices, because there is no
point, and we shouldn't cause disk activity (or worse, network
activity) just because somebody is typing at the keyboard. Thus #3.
- atime is maintained properly for "last effective read" purposes, which
is what "ps"/"w" and friends want to see for idle time reporting.
Thus both #1 and #2 are important.
- atime is only valid as long as the tty is open (again, think "idle
time" - nobody actually cares what the atime is after the device has
been closed). Thus "atime potentially going backwards" due to #3 is a
non-issue.
So yes, tty atime updates are strange, but they are strange on PURPOSE.
> Note that the last one means that doing stat() in a loop will sometimes
> give atime going backwards. We also completely ignore noatime here.
Ignoring noatime is potentially the only one we should look at, but since
tty's really _are_ "noatime" as far as the filesystem is concerned, I
think it makes sense in the situation we are in anyway. The real reason
for "noatime" is to avoid unnecessary filesystem activity, not that we
necessarily want a constant atime.
> There are similar places in some other char drivers. Obvious step would
> be to have them do file_accessed() instead; however, I'd really like to
> hear the rationale for existing behaviour. Comments?
I don't know about other char drivers, those may well be wrong. But for
tty's in particular, idle time calculations really do pretty much require
the behaviour (apart from #3 - and #3 is, I think, effectively required by
not wanting to touch the disk on keyboard accesses).
Doing effectively a update_atime() on final tty close might be ok just to
avoid the backwards-running time, but you'd have to open-code it to avoid
the "inode_times_differ()" check. Not worth it, I feel, since atime on a
tty that has been closed is irrelevant.
Linus
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Patch] BME, noatime and nodiratime
2004-04-14 15:14 ` Linus Torvalds
@ 2004-04-14 16:26 ` viro
0 siblings, 0 replies; 12+ messages in thread
From: viro @ 2004-04-14 16:26 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Andrew Morton, linux-kernel
On Wed, Apr 14, 2004 at 08:14:18AM -0700, Linus Torvalds wrote:
> Ignoring noatime is potentially the only one we should look at, but since
> tty's really _are_ "noatime" as far as the filesystem is concerned, I
> think it makes sense in the situation we are in anyway. The real reason
> for "noatime" is to avoid unnecessary filesystem activity, not that we
> necessarily want a constant atime.
Another thing we are ignoring is r/o. Oh, well - the same arguments apply.
> > There are similar places in some other char drivers. Obvious step would
> > be to have them do file_accessed() instead; however, I'd really like to
> > hear the rationale for existing behaviour. Comments?
>
> I don't know about other char drivers, those may well be wrong. But for
> tty's in particular, idle time calculations really do pretty much require
> the behaviour (apart from #3 - and #3 is, I think, effectively required by
> not wanting to touch the disk on keyboard accesses).
AFAICS, they simply copy what tty_io.c does. Out of these guys busmouse.c
might have a similar excuse; qtronix.c and sonypi.c AFAICS have no reason
to touch atime at all. No idea what should usb/core/devio.c do...
Anyway, I'm going down right now; expect a patchbomb tonight after I get
some sleep...
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Patch] BME, noatime and nodiratime
2004-04-06 20:48 ` viro
2004-04-06 23:11 ` viro
@ 2004-04-07 6:46 ` Herbert Poetzl
2004-04-07 8:47 ` viro
1 sibling, 1 reply; 12+ messages in thread
From: Herbert Poetzl @ 2004-04-07 6:46 UTC (permalink / raw)
To: viro; +Cc: Andrew Morton, torvalds, linux-kernel
On Tue, Apr 06, 2004 at 09:48:44PM +0100, viro@parcelfarce.linux.theplanet.co.uk wrote:
> On Tue, Apr 06, 2004 at 04:55:44PM +0200, Herbert Poetzl wrote:
> >
> > Hi Andrew!
> >
> > according to todays vfs strategy (hope it hasn't changed
> > again), here is the first patch, which adds the mount
> > flags propagation, fixes the /proc display, and implements
> > noatime and nodiratime per mountpoint ...
> >
> > please consider for inclusion ...
>
> noatime/nodiratime: OK, but we still have direct modifications of i_atime
> that need to be taken care of.
okay, will look at it ...
> massage of ->show(): more or less OK. However, we don't need to keep
> MS_NOATIME and MS_NODIRATIME in flags at all -
> > + if (flags & MS_NOATIME)
> > + mnt_flags |= MNT_NOATIME;
> > + if (flags & MS_NODIRATIME)
> > + mnt_flags |= MNT_NODIRATIME;
> > flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
>
> should remove them from flags in the last line, same way we do that for
> nosuid/noexec/nodev, with obvious consequences for ->show().
>
> Note that we don't need to keep MS_NOATIME check in update_atime() - that
> animal is purely per-mountpoint now.
hmm, wasn't there a reason, for having them per inode
like for 'special' files, which I do not remember atm?
> > + if (MNT_IS_NOATIME(mnt))
> > + return;
> > + if (S_ISDIR(inode->i_mode) && MNT_IS_NODIRATIME(mnt))
> > + return;
>
> Do we need those to be macros? AFAICS, this is the only place where we
> do such checks and we shouldn't get new callers. IOW, keeping them
> separate doesn't buy us anything and only obfuscates the code.
okay, no problem with that ...
> > -#define MNT_NOSUID 1
> > -#define MNT_NODEV 2
> > -#define MNT_NOEXEC 4
> > +#define MNT_RDONLY 1
> > +#define MNT_NOSUID 2
> > +#define MNT_NODEV 4
> > +#define MNT_NOEXEC 8
> > +#define MNT_NOATIME 16
> > +#define MNT_NODIRATIME 32
>
> *ugh*
>
> a) what's the point of reordering them (rdonly shifting the existing ones)?
simple, to match the MS_* counterparts, something which
actually confused me in the first place (in the code)
> b) since MNT_RDONLY doesn't do anything at that point, why introduce it
> (and associated confusion) now? As it is, your /proc/mounts will pretend
> that per-mountpoint r/o works right now. Since it doesn't...
no, they won't. the required MNT_RDONLY flag in the proc
function isn't set, so only MS_RDONLY will be reported
(from the superblock) which is equivalent to what is
reported in the current version.
> > +#define MNT_IS_RDONLY(m) ((m) && ((m)->mnt_flags & MNT_RDONLY))
> > +#define MNT_IS_NOATIME(m) ((m) && ((m)->mnt_flags & MNT_NOATIME))
> > +#define MNT_IS_NODIRATIME(m) ((m) && ((m)->mnt_flags & MNT_NODIRATIME))
>
> See above. Besides, are we ever planning to pass NULL to these guys?
originally there was a 'comment' which said, the
(m) check can be removed, when we are sure that this
isn't called with mnt == NULL ...
so maybe a BUGON(!m) might be useful?
I'll add the 'updates' today ...
best,
Herbert
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Patch] BME, noatime and nodiratime
2004-04-07 6:46 ` Herbert Poetzl
@ 2004-04-07 8:47 ` viro
2004-04-07 10:19 ` Herbert Poetzl
0 siblings, 1 reply; 12+ messages in thread
From: viro @ 2004-04-07 8:47 UTC (permalink / raw)
To: Andrew Morton, torvalds, linux-kernel
On Wed, Apr 07, 2004 at 08:46:37AM +0200, Herbert Poetzl wrote:
> originally there was a 'comment' which said, the
> (m) check can be removed, when we are sure that this
> isn't called with mnt == NULL ...
>
> so maybe a BUGON(!m) might be useful?
Accessing m->mnt_flags will do just fine ;-)
> > Note that we don't need to keep MS_NOATIME check in update_atime() - that
> > animal is purely per-mountpoint now.
>
> hmm, wasn't there a reason, for having them per inode
> like for 'special' files, which I do not remember atm?
Yes, but that's S_NOATIME in inode->i_flags, not MS_NOATIME in sb->s_flags.
Actually, I've been wrong here - we *do* need that check, since there are
filesystems that force noatime or nodiratime.
>From grepping for that stuff it appears that
a) a bunch of filesystems force nodiratime and do not allow to override it
on remount. Same for noatime (BTW, noatime implies nodiratime, so setting
both is pointless).
b) some filesystems force nodiratime at mount time, but do not care to preserve
it on remount. Most of those are my fault - I've missed the remount side of
things in readdir patch. They should have nodiratime always on to match
the original behaviour. IMO, both (a) and (b) should be handled by a new
field - sb->s_forced_flags. do_remount_sb() would set those in flags before
doing anything else. Note that assignment to ->s_flags in do_remount_sb()
is not safe - e.g. ext[23] can get forced r/o by fs error and if that
happens after return from ->remount_sb() but before assignement, we are
screwed. IOW, do_remount_sb() will need more work.
c) XFS has an odd wankfest around MS_NOATIME - grep for XFSMNT_NOATIME and
XFS_MOUNT_NOATIME. AFAICS the only use is in xfs_ichgtime() and it's
redundant - we check IS_NOATIME() right after checking for XFS_MOUNT_NOATIME
there. However, that will become very interesting when it gets to
per-mountpoint r/o
/*
* We're not supposed to change timestamps in readonly-mounted
* filesystems. Throw it away if anyone asks us.
*/
if (unlikely(vp->v_vfsp->vfs_flag & VFS_RDONLY))
return;
in xfs_ichgtime() is too damn deep in call chains, so...
d) nfs_getattr() is very odd - it overloads semantics of noatime and nodiratime
for NFS in a strange way.
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [Patch] BME, noatime and nodiratime
2004-04-07 8:47 ` viro
@ 2004-04-07 10:19 ` Herbert Poetzl
2004-04-07 12:46 ` viro
0 siblings, 1 reply; 12+ messages in thread
From: Herbert Poetzl @ 2004-04-07 10:19 UTC (permalink / raw)
To: viro; +Cc: Andrew Morton, torvalds, linux-kernel
On Wed, Apr 07, 2004 at 09:47:22AM +0100, viro@parcelfarce.linux.theplanet.co.uk wrote:
> On Wed, Apr 07, 2004 at 08:46:37AM +0200, Herbert Poetzl wrote:
> > originally there was a 'comment' which said, the
> > (m) check can be removed, when we are sure that this
> > isn't called with mnt == NULL ...
> >
> > so maybe a BUGON(!m) might be useful?
>
> Accessing m->mnt_flags will do just fine ;-)
right ;)
> > > Note that we don't need to keep MS_NOATIME check in update_atime() - that
> > > animal is purely per-mountpoint now.
> >
> > hmm, wasn't there a reason, for having them per inode
> > like for 'special' files, which I do not remember atm?
>
> Yes, but that's S_NOATIME in inode->i_flags, not MS_NOATIME in sb->s_flags.
> Actually, I've been wrong here - we *do* need that check, since there are
> filesystems that force noatime or nodiratime.
so we keep them?
> >From grepping for that stuff it appears that
>
> a) a bunch of filesystems force nodiratime and do not allow to override it
> on remount. Same for noatime (BTW, noatime implies nodiratime, so setting
> both is pointless).
>
> b) some filesystems force nodiratime at mount time, but do not care to preserve
> it on remount. Most of those are my fault - I've missed the remount side of
> things in readdir patch. They should have nodiratime always on to match
> the original behaviour. IMO, both (a) and (b) should be handled by a new
> field - sb->s_forced_flags. do_remount_sb() would set those in flags before
> doing anything else. Note that assignment to ->s_flags in do_remount_sb()
> is not safe - e.g. ext[23] can get forced r/o by fs error and if that
> happens after return from ->remount_sb() but before assignement, we are
> screwed. IOW, do_remount_sb() will need more work.
are you going to do this?
> c) XFS has an odd wankfest around MS_NOATIME - grep for XFSMNT_NOATIME and
> XFS_MOUNT_NOATIME. AFAICS the only use is in xfs_ichgtime() and it's
> redundant - we check IS_NOATIME() right after checking for XFS_MOUNT_NOATIME
> there. However, that will become very interesting when it gets to
> per-mountpoint r/o
> /*
> * We're not supposed to change timestamps in readonly-mounted
> * filesystems. Throw it away if anyone asks us.
> */
> if (unlikely(vp->v_vfsp->vfs_flag & VFS_RDONLY))
> return;
> in xfs_ichgtime() is too damn deep in call chains, so...
>
> d) nfs_getattr() is very odd - it overloads semantics of noatime and nodiratime
> for NFS in a strange way.
open issues:
>>> a) what's the point of reordering them (rdonly shifting the existing ones)?
>> simple, to match the MS_* counterparts, something which
>> actually confused me in the first place (in the code)
so is this okay? actually I'd prefer to use the same
values for the MNT_NOATIME and MNT_NODIRATIME too ...
(as in the previous version I did)
best,
Herbert
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Patch] BME, noatime and nodiratime
2004-04-07 10:19 ` Herbert Poetzl
@ 2004-04-07 12:46 ` viro
2004-04-07 14:24 ` Herbert Poetzl
0 siblings, 1 reply; 12+ messages in thread
From: viro @ 2004-04-07 12:46 UTC (permalink / raw)
To: Andrew Morton, torvalds, linux-kernel
On Wed, Apr 07, 2004 at 12:19:12PM +0200, Herbert Poetzl wrote:
> > Yes, but that's S_NOATIME in inode->i_flags, not MS_NOATIME in sb->s_flags.
> > Actually, I've been wrong here - we *do* need that check, since there are
> > filesystems that force noatime or nodiratime.
>
> so we keep them?
yes.
> > it on remount. Most of those are my fault - I've missed the remount side of
> > things in readdir patch. They should have nodiratime always on to match
> > the original behaviour. IMO, both (a) and (b) should be handled by a new
> > field - sb->s_forced_flags. do_remount_sb() would set those in flags before
> > doing anything else. Note that assignment to ->s_flags in do_remount_sb()
> > is not safe - e.g. ext[23] can get forced r/o by fs error and if that
> > happens after return from ->remount_sb() but before assignement, we are
> > screwed. IOW, do_remount_sb() will need more work.
>
> are you going to do this?
Yes - for now I'll do a fix that doesn't change API (IOW, add/update
->remount_fs() to filesystems with forced flags, forced r/o has the
same problems); we'll see what should be done in the long run when
the things clean up a bit. I'd rather avoid struct super_block changes
that would have to be reverted soon.
> >> simple, to match the MS_* counterparts, something which
> >> actually confused me in the first place (in the code)
>
> so is this okay? actually I'd prefer to use the same
> values for the MNT_NOATIME and MNT_NODIRATIME too ...
> (as in the previous version I did)
Hmm... Potentially we are breaking ABI for no good reason, since these
defines are visible to out-of-tree code. I don't think that we should
care about matching MS_... stuff, simply because MS_... encoding is ugly
as hell and there's no reason to use MNT_... and MS_... in the same
context.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [Patch] BME, noatime and nodiratime
2004-04-07 12:46 ` viro
@ 2004-04-07 14:24 ` Herbert Poetzl
0 siblings, 0 replies; 12+ messages in thread
From: Herbert Poetzl @ 2004-04-07 14:24 UTC (permalink / raw)
To: viro; +Cc: Andrew Morton, torvalds, linux-kernel
On Wed, Apr 07, 2004 at 01:46:41PM +0100, viro@parcelfarce.linux.theplanet.co.uk wrote:
> > >> simple, to match the MS_* counterparts, something which
> > >> actually confused me in the first place (in the code)
> >
> > so is this okay? actually I'd prefer to use the same
> > values for the MNT_NOATIME and MNT_NODIRATIME too ...
> > (as in the previous version I did)
>
> Hmm... Potentially we are breaking ABI for no good reason, since these
> defines are visible to out-of-tree code. I don't think that we should
> care about matching MS_... stuff, simply because MS_... encoding is ugly
> as hell and there's no reason to use MNT_... and MS_... in the same
> context.
hmm, well breaking ABI here should not hurt anywhere, as
they are 'just' defined flags, and no code should rely on the
actual values (or if it does, it is broken anyway, right?)
but if you 'think' that it will break something, it's okay
for me to keep the 'old' values ... and 'just' add new ones
for RDONLY, NOATIME, and NODIRATIME ...
best,
Herbert
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2004-04-14 16:27 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-04-06 14:55 [Patch] BME, noatime and nodiratime Herbert Poetzl
2004-04-06 20:48 ` viro
2004-04-06 23:11 ` viro
2004-04-06 23:35 ` Russell King
2004-04-07 6:44 ` viro
2004-04-14 15:14 ` Linus Torvalds
2004-04-14 16:26 ` viro
2004-04-07 6:46 ` Herbert Poetzl
2004-04-07 8:47 ` viro
2004-04-07 10:19 ` Herbert Poetzl
2004-04-07 12:46 ` viro
2004-04-07 14:24 ` Herbert Poetzl
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).