linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] vfs: seq_file: add helpers for data filling
@ 2009-09-21 12:48 Miklos Szeredi
  2009-09-21 12:51 ` [PATCH 2/2] vfs: fix d_path() for unreachable paths Miklos Szeredi
  0 siblings, 1 reply; 9+ messages in thread
From: Miklos Szeredi @ 2009-09-21 12:48 UTC (permalink / raw)
  To: Al Viro
  Cc: akpm, linux-fsdevel, linux-kernel, Valdis.Kletnieks, agruen, hch,
	hugh.dickins, matthew

From: Miklos Szeredi <mszeredi@suse.cz>

Add two helpers that allow access to the seq_file's own buffer, but
hide the internal details of seq_files.

This allows easier implementation of special purpose filling
functions.  It also cleans up some existing functions which duplicated
the seq_file logic.

Make these inline functions in seq_file.h, as suggested by Al.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
---
 fs/seq_file.c            |   74 ++++++++++++++++++++++++-----------------------
 include/linux/seq_file.h |   38 ++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 35 deletions(-)

Index: linux-2.6/fs/seq_file.c
===================================================================
--- linux-2.6.orig/fs/seq_file.c	2009-09-09 13:29:21.000000000 +0200
+++ linux-2.6/fs/seq_file.c	2009-09-21 14:17:10.000000000 +0200
@@ -429,20 +429,21 @@ EXPORT_SYMBOL(mangle_path);
  */
 int seq_path(struct seq_file *m, struct path *path, char *esc)
 {
-	if (m->count < m->size) {
-		char *s = m->buf + m->count;
-		char *p = d_path(path, s, m->size - m->count);
+	char *buf;
+	size_t size = seq_get_buf(m, &buf);
+	int res = -1;
+
+	if (size) {
+		char *p = d_path(path, buf, size);
 		if (!IS_ERR(p)) {
-			s = mangle_path(s, p, esc);
-			if (s) {
-				p = m->buf + m->count;
-				m->count = s - m->buf;
-				return s - p;
-			}
+			char *end = mangle_path(buf, p, esc);
+			if (end)
+				res = end - buf;
 		}
 	}
-	m->count = m->size;
-	return -1;
+	seq_commit(m, res);
+
+	return res;
 }
 EXPORT_SYMBOL(seq_path);
 
@@ -454,26 +455,28 @@ EXPORT_SYMBOL(seq_path);
 int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 		  char *esc)
 {
-	int err = -ENAMETOOLONG;
-	if (m->count < m->size) {
-		char *s = m->buf + m->count;
+	char *buf;
+	size_t size = seq_get_buf(m, &buf);
+	int res = -ENAMETOOLONG;
+
+	if (size) {
 		char *p;
 
 		spin_lock(&dcache_lock);
-		p = __d_path(path, root, s, m->size - m->count);
+		p = __d_path(path, root, buf, size);
 		spin_unlock(&dcache_lock);
-		err = PTR_ERR(p);
+		res = PTR_ERR(p);
 		if (!IS_ERR(p)) {
-			s = mangle_path(s, p, esc);
-			if (s) {
-				p = m->buf + m->count;
-				m->count = s - m->buf;
-				return 0;
-			}
+			char *end = mangle_path(buf, p, esc);
+			if (end)
+				res = end - buf;
+			else
+				res = -ENAMETOOLONG;
 		}
 	}
-	m->count = m->size;
-	return err;
+	seq_commit(m, res);
+
+	return res < 0 ? res : 0;
 }
 
 /*
@@ -481,20 +484,21 @@ int seq_path_root(struct seq_file *m, st
  */
 int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
 {
-	if (m->count < m->size) {
-		char *s = m->buf + m->count;
-		char *p = dentry_path(dentry, s, m->size - m->count);
+	char *buf;
+	size_t size = seq_get_buf(m, &buf);
+	int res = -1;
+
+	if (size) {
+		char *p = dentry_path(dentry, buf, size);
 		if (!IS_ERR(p)) {
-			s = mangle_path(s, p, esc);
-			if (s) {
-				p = m->buf + m->count;
-				m->count = s - m->buf;
-				return s - p;
-			}
+			char *end = mangle_path(buf, p, esc);
+			if (end)
+				res = end - buf;
 		}
 	}
-	m->count = m->size;
-	return -1;
+	seq_commit(m, res);
+
+	return res;
 }
 
 int seq_bitmap(struct seq_file *m, const unsigned long *bits,
Index: linux-2.6/include/linux/seq_file.h
===================================================================
--- linux-2.6.orig/include/linux/seq_file.h	2009-09-09 13:29:21.000000000 +0200
+++ linux-2.6/include/linux/seq_file.h	2009-09-21 14:15:40.000000000 +0200
@@ -35,6 +35,44 @@ struct seq_operations {
 
 #define SEQ_SKIP 1
 
+/**
+ * seq_get_buf - get buffer to write arbitrary data to
+ * @m: the seq_file handle
+ * @bufp: the beginning of the buffer is stored here
+ *
+ * Return the number of bytes available in the buffer, or zero if
+ * there's no space.
+ */
+static inline size_t seq_get_buf(struct seq_file *m, char **bufp)
+{
+	BUG_ON(m->count > m->size);
+	if (m->count < m->size)
+		*bufp = m->buf + m->count;
+	else
+		*bufp = NULL;
+
+	return m->size - m->count;
+}
+
+/**
+ * seq_commit - commit data to the buffer
+ * @m: the seq_file handle
+ * @num: the number of bytes to commit
+ *
+ * Commit @num bytes of data written to a buffer previously acquired
+ * by seq_buf_get.  To signal an error condition, or that the data
+ * didn't fit in the available space, pass a negative @num value.
+ */
+static inline void seq_commit(struct seq_file *m, int num)
+{
+	if (num < 0) {
+		m->count = m->size;
+	} else {
+		BUG_ON(m->count + num > m->size);
+		m->count += num;
+	}
+}
+
 char *mangle_path(char *s, char *p, char *esc);
 int seq_open(struct file *, const struct seq_operations *);
 ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2/2] vfs: fix d_path() for unreachable paths
  2009-09-21 12:48 [PATCH 1/2] vfs: seq_file: add helpers for data filling Miklos Szeredi
@ 2009-09-21 12:51 ` Miklos Szeredi
  2009-09-21 14:02   ` Al Viro
  0 siblings, 1 reply; 9+ messages in thread
From: Miklos Szeredi @ 2009-09-21 12:51 UTC (permalink / raw)
  To: Al Viro
  Cc: akpm, linux-fsdevel, linux-kernel, Valdis.Kletnieks, agruen, hch,
	hugh.dickins, matthew

(The two fixes have been folded into this one)

----
From: Miklos Szeredi <mszeredi@suse.cz>

John Johansen pointed out, that getcwd(2) will give a garbled result
if a bind mount of a non-filesystem-root directory is detached:

   > mkdir /mnt/foo
   > mount --bind /etc /mnt/foo
   > cd /mnt/foo/skel
   > umount -l /mnt/foo
   > /bin/pwd
   etcskel

If it was the root of the filesystem which was detached, it will give
a saner looking result, but it still won't be a valid absolute path by
which the CWD can be reached (assuming the process's root is not also
on the detached mount).

A similar issue happens if the CWD is outside the process's root or in
a different namespace.  These problems are relevant to symlinks under
/proc/<pid>/ and /proc/<pid>/fd/ as well.

This patch addresses all these issues, by prefixing such unreachable
paths with "(unreachable)".  This isn't perfect since the returned
path may still be a valid _relative_ path, and applications may not
check the result of getcwd() for starting with a '/' before using it.

For this reason Andreas Gruenbacher thinks getcwd(2) should return
ENOENT in these cases, but that breaks /bin/pwd and bash in the above
cases.

Hugh Dickins reported that an old version of gnome-vfs-daemon crashes
because it finds an entry in /proc/mounts where the mountpoint is
unreachable.  So revert /proc/mounts to the old behavior (or rather a
less crazy version of the old behavior).

Also revert the effect on /proc/${PID}/maps for memory maps set up
with shmem_file_setup() or hugetlb_file_setup().  These functions set
up unlinked files under a kernel-private vfsmount.  Since this
vfsmount is unreachable from userspace, these maps will be reported
with the "(unreachable)" prefix, which is undesirable, because it
changes the kernel ABI and might break applications for no good
reason.

Reported-by: John Johansen <jjohansen@suse.de>
CC: Matthew Wilcox <matthew@wil.cx>
CC: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

---
 fs/dcache.c          |   20 ++++++++++++++++-
 fs/hugetlbfs/inode.c |   17 +++++++++++++++
 fs/namespace.c       |   57 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 mm/shmem.c           |   17 +++++++++++++++
 4 files changed, 108 insertions(+), 3 deletions(-)

Index: linux-2.6/fs/dcache.c
===================================================================
--- linux-2.6.orig/fs/dcache.c	2009-09-21 14:31:35.000000000 +0200
+++ linux-2.6/fs/dcache.c	2009-09-21 14:31:37.000000000 +0200
@@ -1883,6 +1883,12 @@ static int prepend_name(char **buffer, i
 	return prepend(buffer, buflen, name->name, name->len);
 }
 
+static bool is_pseudo_root(struct dentry *dentry)
+{
+	return IS_ROOT(dentry) &&
+		(dentry->d_name.len != 1 || dentry->d_name.name[0] != '/');
+}
+
 /**
  * __d_path - return the path of a dentry
  * @path: the dentry/vfsmount to report
@@ -1950,8 +1956,18 @@ out:
 
 global_root:
 	retval += 1;	/* hit the slash */
-	if (prepend_name(&retval, &buflen, &dentry->d_name) != 0)
-		goto Elong;
+
+	if (is_pseudo_root(dentry)) {
+		/* Pseudo filesystem with "foo:" prefix */
+		if (prepend_name(&retval, &buflen, &dentry->d_name) != 0)
+			goto Elong;
+	} else {
+		/*
+		 * Unreachable (detached or outside root or outside namespace)
+		 */
+		if (prepend(&retval, &buflen, "(unreachable)/", 14) != 0)
+			goto Elong;
+	}
 	root->mnt = vfsmnt;
 	root->dentry = dentry;
 	goto out;
Index: linux-2.6/fs/hugetlbfs/inode.c
===================================================================
--- linux-2.6.orig/fs/hugetlbfs/inode.c	2009-09-21 14:31:35.000000000 +0200
+++ linux-2.6/fs/hugetlbfs/inode.c	2009-09-21 14:31:37.000000000 +0200
@@ -931,6 +931,21 @@ static struct file_system_type hugetlbfs
 
 static struct vfsmount *hugetlbfs_vfsmount;
 
+/*
+ * Do a special d_dname function so that these are not prefixed by
+ * "(unreachable)".
+ */
+static char *hugetlb_unlinked_d_dname(struct dentry *dentry, char *buf,
+				      int buflen)
+{
+	return dynamic_dname(dentry, buf, buflen, "/%s (deleted)",
+			     dentry->d_name.name);
+}
+
+static struct dentry_operations hugetlb_unlinked_dentry_operations = {
+	.d_dname = hugetlb_unlinked_d_dname,
+};
+
 static int can_do_hugetlb_shm(void)
 {
 	return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
@@ -968,6 +983,8 @@ struct file *hugetlb_file_setup(const ch
 	if (!dentry)
 		goto out_shm_unlock;
 
+	dentry->d_op = &hugetlb_unlinked_dentry_operations;
+
 	error = -ENOSPC;
 	inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(),
 				current_fsgid(), S_IFREG | S_IRWXUGO, 0);
Index: linux-2.6/fs/namespace.c
===================================================================
--- linux-2.6.orig/fs/namespace.c	2009-09-21 14:31:35.000000000 +0200
+++ linux-2.6/fs/namespace.c	2009-09-21 14:31:37.000000000 +0200
@@ -789,6 +789,61 @@ static void show_type(struct seq_file *m
 	}
 }
 
+/*
+ * Same as d_path() except it doesn't stick "(unreachable)" in front
+ * of unreachable paths.
+ */
+static char *d_path_compat(struct path *path, char *buf, int buflen)
+{
+	char *res;
+	struct path root;
+	struct path tmp;
+
+	read_lock(&current->fs->lock);
+	root = current->fs->root;
+	path_get(&root);
+	read_unlock(&current->fs->lock);
+	spin_lock(&dcache_lock);
+	tmp = root;
+	res = __d_path(path, &tmp, buf, buflen);
+	if (!IS_ERR(res) &&
+	    (tmp.mnt != root.mnt || tmp.dentry != root.dentry)) {
+		/*
+		 * Unreachable path found, redo with the global root
+		 * so we get a normal looking path.
+		 */
+		res = __d_path(path, &tmp, buf, buflen);
+	}
+	spin_unlock(&dcache_lock);
+	path_put(&root);
+
+	return res;
+}
+
+/*
+ * Some old programs break if /proc/mounts contains a mountpoint
+ * beginning with "(unreachable)".  Revert this back to the old way of
+ * displaying the path from the global root instead.
+ */
+static int show_path_old(struct seq_file *m, struct path *path, char *esc)
+{
+	char *buf;
+	size_t size = seq_get_buf(m, &buf);
+	int res = -1;
+
+	if (size) {
+		char *p = d_path_compat(path, buf, size);
+		if (!IS_ERR(p)) {
+			char *end = mangle_path(buf, p, esc);
+			if (end)
+				res = end - buf;
+		}
+	}
+	seq_commit(m, res);
+
+	return res;
+}
+
 static int show_vfsmnt(struct seq_file *m, void *v)
 {
 	struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
@@ -797,7 +852,7 @@ static int show_vfsmnt(struct seq_file *
 
 	mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
 	seq_putc(m, ' ');
-	seq_path(m, &mnt_path, " \t\n\\");
+	show_path_old(m, &mnt_path, " \t\n\\");
 	seq_putc(m, ' ');
 	show_type(m, mnt->mnt_sb);
 	seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
Index: linux-2.6/mm/shmem.c
===================================================================
--- linux-2.6.orig/mm/shmem.c	2009-09-21 14:31:35.000000000 +0200
+++ linux-2.6/mm/shmem.c	2009-09-21 14:31:37.000000000 +0200
@@ -2601,6 +2601,21 @@ int shmem_unuse(swp_entry_t entry, struc
 
 /* common code */
 
+/*
+ * Do a special d_dname function so that these are not prefixed by
+ * "(unreachable)".
+ */
+static char *shmem_unlinked_d_dname(struct dentry *dentry, char *buf,
+				    int buflen)
+{
+	return dynamic_dname(dentry, buf, buflen, "/%s (deleted)",
+			     dentry->d_name.name);
+}
+
+static struct dentry_operations shmem_unlinked_dentry_operations = {
+	.d_dname = shmem_unlinked_d_dname,
+};
+
 /**
  * shmem_file_setup - get an unlinked file living in tmpfs
  * @name: name for dentry (to be seen in /proc/<pid>/maps
@@ -2633,6 +2648,8 @@ struct file *shmem_file_setup(const char
 	if (!dentry)
 		goto put_memory;
 
+	dentry->d_op = &shmem_unlinked_dentry_operations;
+
 	error = -ENFILE;
 	file = get_empty_filp();
 	if (!file)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] vfs: fix d_path() for unreachable paths
  2009-09-21 12:51 ` [PATCH 2/2] vfs: fix d_path() for unreachable paths Miklos Szeredi
@ 2009-09-21 14:02   ` Al Viro
  2009-09-21 14:10     ` Mike Frysinger
  2009-09-21 15:03     ` Miklos Szeredi
  0 siblings, 2 replies; 9+ messages in thread
From: Al Viro @ 2009-09-21 14:02 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: akpm, linux-fsdevel, linux-kernel, Valdis.Kletnieks, agruen, hch,
	hugh.dickins, matthew

On Mon, Sep 21, 2009 at 02:51:42PM +0200, Miklos Szeredi wrote:
> Hugh Dickins reported that an old version of gnome-vfs-daemon crashes
> because it finds an entry in /proc/mounts where the mountpoint is
> unreachable.  So revert /proc/mounts to the old behavior (or rather a
> less crazy version of the old behavior).
> 
> Also revert the effect on /proc/${PID}/maps for memory maps set up
> with shmem_file_setup() or hugetlb_file_setup().  These functions set
> up unlinked files under a kernel-private vfsmount.  Since this
> vfsmount is unreachable from userspace, these maps will be reported
> with the "(unreachable)" prefix, which is undesirable, because it
> changes the kernel ABI and might break applications for no good
> reason.

Ho-hum...
	a) d_op you've got there look like a candidate for libfs, if
we go for that at all
	b) I *really* don't like changing the behaviour of d_path() instead
of doing new behaviour in a new function and deciding where to use it on
case-by-case basis
	c) having just grepped for d_path()... oh, man
* blackfin cplbinfo: utter crap; it's used to decide which procfs file
is being opened - by dumping full pathname into a (on-stack) buffer
and then parsing it.  Stupid *and* broken.
* blackfin traps.c:decode_address(): for one thing, pathnames has been
known to be longer than 256 bytes.  For another... locking in that loop
over processes and VMAs of each looks very suspicios, while we are at it.
* pohmelfs_construct_path_string(): just what will happen if we are called
from process chrooted into that bugger?  d_path() is badly abused there.
* ext4_file_open(): d_path() per se is probably OK, but initializing
path.mnt/path.dentry isn't.  mount --move racing with that thing => loads
of fun.
* sysfs_open_file(): racy in an obvious way, but probably won't do anything
worse than garbage in oopsen.

I'm very uncomfortable with the silent change of behaviour, especially since
existing callers seem to be split between "part of user-visible ABI" and
"generally bogus, waiting for a gnat to fart".  At the very least it needs
a serious audit of callers.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] vfs: fix d_path() for unreachable paths
  2009-09-21 14:02   ` Al Viro
@ 2009-09-21 14:10     ` Mike Frysinger
  2009-09-21 14:38       ` Al Viro
  2009-09-21 15:03     ` Miklos Szeredi
  1 sibling, 1 reply; 9+ messages in thread
From: Mike Frysinger @ 2009-09-21 14:10 UTC (permalink / raw)
  To: Al Viro
  Cc: Miklos Szeredi, akpm, linux-fsdevel, linux-kernel,
	Valdis.Kletnieks, agruen, hch, hugh.dickins, matthew

On Mon, Sep 21, 2009 at 10:02, Al Viro wrote:
> * blackfin cplbinfo: utter crap; it's used to decide which procfs file
> is being opened - by dumping full pathname into a (on-stack) buffer
> and then parsing it.  Stupid *and* broken.

it works without having to copy & paste the same exact structures over
and over.  a suggestion as how to do it cleanly without bloating the
code is certainly welcome.  it doesnt really matter that it's on the
stack as the usage is small and d_path() is given the size of the
buffer, so it isnt going to overflow.

> * blackfin traps.c:decode_address(): for one thing, pathnames has been
> known to be longer than 256 bytes.

we know the paths are longer than 256 bytes.  the output is to give a
reasonable idea of what is crashing.  realistically, nothing
executable resides in a 256+ byte path on an embedded system.

>  For another... locking in that loop
> over processes and VMAs of each looks very suspicios, while we are at it.

we've reviewed it several times and exercised it in multiple ways.  so
unless you have a real idea of something being wrong, the code has
been vetted heavily.
-mike

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] vfs: fix d_path() for unreachable paths
  2009-09-21 14:10     ` Mike Frysinger
@ 2009-09-21 14:38       ` Al Viro
  2009-09-21 14:43         ` Al Viro
  2009-09-21 15:30         ` Mike Frysinger
  0 siblings, 2 replies; 9+ messages in thread
From: Al Viro @ 2009-09-21 14:38 UTC (permalink / raw)
  To: Mike Frysinger
  Cc: Miklos Szeredi, akpm, linux-fsdevel, linux-kernel,
	Valdis.Kletnieks, agruen, hch, hugh.dickins, matthew

On Mon, Sep 21, 2009 at 10:10:17AM -0400, Mike Frysinger wrote:

> it works without having to copy & paste the same exact structures over
> and over.  a suggestion as how to do it cleanly without bloating the
> code is certainly welcome.  it doesnt really matter that it's on the
> stack as the usage is small and d_path() is given the size of the
> buffer, so it isnt going to overflow.

Umm...  Surely, you can put a CPU number + one bit into PDE->data?  As in
	proc_create_data("icplb", ....., (void *)(cpu * 2))
	proc_create_data("dcplb", ....., (void *)(cpu * 2 + 1))
and
	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
	unsigned long n = (unsigned long) pde->data;
	...
	cpu = n / 2;
	is_D = n & 1;

> > ??For another... locking in that loop
> > over processes and VMAs of each looks very suspicios, while we are at it.
> 
> we've reviewed it several times and exercised it in multiple ways.  so
> unless you have a real idea of something being wrong, the code has
> been vetted heavily.

write_lock_irq tasklist_lock
loop over processes
	get_task_mm [now VM is pinned down, but not locked]
	walk the VMA list without any locking

Something on another CPU (you have SMP systems, judging by the previously
discussed code, right?) does munmap().

Boom.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] vfs: fix d_path() for unreachable paths
  2009-09-21 14:38       ` Al Viro
@ 2009-09-21 14:43         ` Al Viro
  2009-09-21 15:31           ` Mike Frysinger
  2009-09-21 15:30         ` Mike Frysinger
  1 sibling, 1 reply; 9+ messages in thread
From: Al Viro @ 2009-09-21 14:43 UTC (permalink / raw)
  To: Mike Frysinger
  Cc: Miklos Szeredi, akpm, linux-fsdevel, linux-kernel,
	Valdis.Kletnieks, agruen, hch, hugh.dickins, matthew

On Mon, Sep 21, 2009 at 03:38:57PM +0100, Al Viro wrote:
> On Mon, Sep 21, 2009 at 10:10:17AM -0400, Mike Frysinger wrote:
> 
> > it works without having to copy & paste the same exact structures over
> > and over.  a suggestion as how to do it cleanly without bloating the
> > code is certainly welcome.  it doesnt really matter that it's on the
> > stack as the usage is small and d_path() is given the size of the
> > buffer, so it isnt going to overflow.
> 
> Umm...  Surely, you can put a CPU number + one bit into PDE->data?  As in
> 	proc_create_data("icplb", ....., (void *)(cpu * 2))
> 	proc_create_data("dcplb", ....., (void *)(cpu * 2 + 1))
> and
> 	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
> 	unsigned long n = (unsigned long) pde->data;
> 	...
> 	cpu = n / 2;
> 	is_D = n & 1;

PS: as to why it is broken...  Consider e.g.
	mount --bind /proc/cplbinfo/cpu0 /mnt
	cat /mnt/icplb
Or, better yet,
	mount -t proc none /mnt/cpu
	cat /mnt/cpu/cplbinfo/cpu0/icplb

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] vfs: fix d_path() for unreachable paths
  2009-09-21 14:02   ` Al Viro
  2009-09-21 14:10     ` Mike Frysinger
@ 2009-09-21 15:03     ` Miklos Szeredi
  1 sibling, 0 replies; 9+ messages in thread
From: Miklos Szeredi @ 2009-09-21 15:03 UTC (permalink / raw)
  To: Al Viro
  Cc: miklos, akpm, linux-fsdevel, linux-kernel, Valdis.Kletnieks,
	agruen, hch, hugh.dickins, matthew

On Mon, 21 Sep 2009, Al Viro wrote:
> On Mon, Sep 21, 2009 at 02:51:42PM +0200, Miklos Szeredi wrote:
> > Hugh Dickins reported that an old version of gnome-vfs-daemon crashes
> > because it finds an entry in /proc/mounts where the mountpoint is
> > unreachable.  So revert /proc/mounts to the old behavior (or rather a
> > less crazy version of the old behavior).
> > 
> > Also revert the effect on /proc/${PID}/maps for memory maps set up
> > with shmem_file_setup() or hugetlb_file_setup().  These functions set
> > up unlinked files under a kernel-private vfsmount.  Since this
> > vfsmount is unreachable from userspace, these maps will be reported
> > with the "(unreachable)" prefix, which is undesirable, because it
> > changes the kernel ABI and might break applications for no good
> > reason.
> 
> Ho-hum...
> 	a) d_op you've got there look like a candidate for libfs, if
> we go for that at all
> 	b) I *really* don't like changing the behaviour of d_path() instead
> of doing new behaviour in a new function and deciding where to use it on
> case-by-case basis
> 	c) having just grepped for d_path()... oh, man
> * blackfin cplbinfo: utter crap; it's used to decide which procfs file
> is being opened - by dumping full pathname into a (on-stack) buffer
> and then parsing it.  Stupid *and* broken.
> * blackfin traps.c:decode_address(): for one thing, pathnames has been
> known to be longer than 256 bytes.  For another... locking in that loop
> over processes and VMAs of each looks very suspicios, while we are at it.
> * pohmelfs_construct_path_string(): just what will happen if we are called
> from process chrooted into that bugger?  d_path() is badly abused there.
> * ext4_file_open(): d_path() per se is probably OK, but initializing
> path.mnt/path.dentry isn't.  mount --move racing with that thing => loads
> of fun.
> * sysfs_open_file(): racy in an obvious way, but probably won't do anything
> worse than garbage in oopsen.
> 
> I'm very uncomfortable with the silent change of behaviour, especially since
> existing callers seem to be split between "part of user-visible ABI" and
> "generally bogus, waiting for a gnat to fart".  At the very least it needs
> a serious audit of callers.


Fair enough, I should have done a review of internal callers...  Will
do that now.

The big question is, however, if we accept the unknown risk of
changing the user-visible ABI from "broken, but path at least *looks*
normal" to "paths don't necessarily begin with a slash anymore".

Hmm?

Thanks,
Miklos


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] vfs: fix d_path() for unreachable paths
  2009-09-21 14:38       ` Al Viro
  2009-09-21 14:43         ` Al Viro
@ 2009-09-21 15:30         ` Mike Frysinger
  1 sibling, 0 replies; 9+ messages in thread
From: Mike Frysinger @ 2009-09-21 15:30 UTC (permalink / raw)
  To: Al Viro
  Cc: Miklos Szeredi, akpm, linux-fsdevel, linux-kernel,
	Valdis.Kletnieks, agruen, hch, hugh.dickins, matthew

On Mon, Sep 21, 2009 at 10:38, Al Viro wrote:
> On Mon, Sep 21, 2009 at 10:10:17AM -0400, Mike Frysinger wrote:
>> it works without having to copy & paste the same exact structures over
>> and over.  a suggestion as how to do it cleanly without bloating the
>> code is certainly welcome.  it doesnt really matter that it's on the
>> stack as the usage is small and d_path() is given the size of the
>> buffer, so it isnt going to overflow.
>
> Umm...  Surely, you can put a CPU number + one bit into PDE->data?  As in
>        proc_create_data("icplb", ....., (void *)(cpu * 2))
>        proc_create_data("dcplb", ....., (void *)(cpu * 2 + 1))
> and
>        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
>        unsigned long n = (unsigned long) pde->data;
>        ...
>        cpu = n / 2;
>        is_D = n & 1;

if i'd known this route, i'd have used it ;).  i'll look into changing
it, thanks.

>> > ??For another... locking in that loop
>> > over processes and VMAs of each looks very suspicios, while we are at it.
>>
>> we've reviewed it several times and exercised it in multiple ways.  so
>> unless you have a real idea of something being wrong, the code has
>> been vetted heavily.
>
> write_lock_irq tasklist_lock
> loop over processes
>        get_task_mm [now VM is pinned down, but not locked]
>        walk the VMA list without any locking
>
> Something on another CPU (you have SMP systems, judging by the previously
> discussed code, right?) does munmap().

the SMP port is limited due to lack of hardware cache coherency, but
we'll check out this call path
-mike
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] vfs: fix d_path() for unreachable paths
  2009-09-21 14:43         ` Al Viro
@ 2009-09-21 15:31           ` Mike Frysinger
  0 siblings, 0 replies; 9+ messages in thread
From: Mike Frysinger @ 2009-09-21 15:31 UTC (permalink / raw)
  To: Al Viro
  Cc: Miklos Szeredi, akpm, linux-fsdevel, linux-kernel,
	Valdis.Kletnieks, agruen, hch, hugh.dickins, matthew

On Mon, Sep 21, 2009 at 10:43, Al Viro wrote:
> On Mon, Sep 21, 2009 at 03:38:57PM +0100, Al Viro wrote:
>> On Mon, Sep 21, 2009 at 10:10:17AM -0400, Mike Frysinger wrote:
>> > it works without having to copy & paste the same exact structures over
>> > and over.  a suggestion as how to do it cleanly without bloating the
>> > code is certainly welcome.  it doesnt really matter that it's on the
>> > stack as the usage is small and d_path() is given the size of the
>> > buffer, so it isnt going to overflow.
>
> PS: as to why it is broken...  Consider e.g.
>        mount --bind /proc/cplbinfo/cpu0 /mnt
>        cat /mnt/icplb
> Or, better yet,
>        mount -t proc none /mnt/cpu
>        cat /mnt/cpu/cplbinfo/cpu0/icplb

i'm not disagreeing that it doesnt work under all random VFS
scenarios.  just that the realistic ones all work.
-mike

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2009-09-21 15:31 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-09-21 12:48 [PATCH 1/2] vfs: seq_file: add helpers for data filling Miklos Szeredi
2009-09-21 12:51 ` [PATCH 2/2] vfs: fix d_path() for unreachable paths Miklos Szeredi
2009-09-21 14:02   ` Al Viro
2009-09-21 14:10     ` Mike Frysinger
2009-09-21 14:38       ` Al Viro
2009-09-21 14:43         ` Al Viro
2009-09-21 15:31           ` Mike Frysinger
2009-09-21 15:30         ` Mike Frysinger
2009-09-21 15:03     ` Miklos Szeredi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).