Re: Race between "mount" uevent and /proc/mounts?

linux-hotplug.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Sergey Vlasov <vsu@altlinux.ru>
To: Kay Sievers <kay.sievers@vrfy.org>
Cc: Al Viro <viro@ftp.linux.org.uk>,
	Roderich.Schupp.extern@mch.siemens.de,
	linux-kernel@vger.kernel.org,
	linux-hotplug-devel@lists.sourceforge.net,
	Greg KH <greg@kroah.com>
Subject: Re: Race between "mount" uevent and /proc/mounts?
Date: Wed, 02 Nov 2005 13:01:18 +0000	[thread overview]
Message-ID: <20051102130118.GA23142@master.mivlgu.local> (raw)
In-Reply-To: <20051101213525.GA17207@vrfy.org>

[-- Attachment #1: Type: text/plain, Size: 9008 bytes --]

On Tue, Nov 01, 2005 at 10:35:25PM +0100, Kay Sievers wrote:
> On Tue, Nov 01, 2005 at 10:54:49PM +0300, Sergey Vlasov wrote:
> > On Tue, Nov 01, 2005 at 04:58:16AM +0100, Kay Sievers wrote:
> > > On Tue, Nov 01, 2005 at 01:28:46AM +0100, Kay Sievers wrote:
> > > > Ok, makes sense. The attached seems to work for me. If we can get
> > > > something like this, we can remove the superblock claim/release events
> > > > completely and just read the events from the /proc/mounts file itself.
> > 
> > No, we need both events.  When you need to tell the user when it is
> > safe to disconnect the storage device, the event from detach_mnt() is
> > useless - it happens too early.  In fact, even the current way of
> > sending the event from kill_block_super() is broken, because the event
> > is generated before generic_shutdown_super() and sync_blockdev(), and
> > writing out cached data may take some time.
> > 
> > We could try to emit busy/free events from bd_claim() and
> > bd_release(); this would be triggered by most "interesting" users
> > (even opens with O_EXCL), but not by things like volume_id.
> 
> Hmm, HAL polls optical drives every 2 seconds with O_EXCL to detect media
> changes. You need to do it EXCL, cause otherwise some cd burners fail.

Crap...  Then we can only move the bdev_uevent() call in
kill_block_super() later, and other users of block devices will not be
noticed at all.

> > > New patch. Missed a check for namespace == NULL in detach_mnt().
[skip]
> > This assumes that there will be only one process per namespace which
> > will call poll() on /proc/mounts.  Even though someone may argue that
> > it is the right approach (have a single process which watches
> > /proc/mounts and broadcasts updates to other interested processes,
> > e.g., over dbus), with the above implementation any unprivileged user
> > can call poll() and interfere with the operation of that designated
> > process.
> 
> Sure, capable(CAP_SYS_ADMIN) could prevent this.

poll() protected by CAP_SYS_ADMIN?  Ewww.

/proc/bus/usb/devices uses event counters in its poll() implementation;
something like this could be done here too.  What about the following
patch (compile tested only):

-----------------------------------------------------------------------

Add poll support for /proc/.../mounts

Now poll() or select() system calls can be used to wait for mount tree
changes.

Signed-off-by: Sergey Vlasov <vsu@altlinux.ru>


---

 fs/namespace.c            |   19 ++++++++++-
 fs/proc/base.c            |   75 ++++++++++++++++++++++++++++++++++-----------
 include/linux/namespace.h |    9 +++++
 3 files changed, 82 insertions(+), 21 deletions(-)

applies-to: b87f06d928e0ea06ae6244c1aeecf3e745f39bb9
1704c384737e1cdbca3194f3471195c9ee4377a5
diff --git a/fs/namespace.c b/fs/namespace.c
index 2fa9fdf..4182210 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -39,6 +39,8 @@ static inline int sysfs_init(void)
 /* spinlock for vfsmount related operations, inplace of dcache_lock */
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
 
+DECLARE_WAIT_QUEUE_HEAD(mounts_wait);
+
 static struct list_head *mount_hashtable;
 static int hash_mask __read_mostly, hash_bits __read_mostly;
 static kmem_cache_t *mnt_cache; 
@@ -120,6 +122,10 @@ static void detach_mnt(struct vfsmount *
 	list_del_init(&mnt->mnt_child);
 	list_del_init(&mnt->mnt_hash);
 	old_nd->dentry->d_mounted--;
+	if (current->namespace) {
+		current->namespace->event++;
+		wake_up_interruptible(&mounts_wait);
+	}
 }
 
 static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
@@ -129,6 +135,8 @@ static void attach_mnt(struct vfsmount *
 	list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
 	list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
 	nd->dentry->d_mounted++;
+	current->namespace->event++;
+	wake_up_interruptible(&mounts_wait);
 }
 
 static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
@@ -185,7 +193,8 @@ EXPORT_SYMBOL(__mntput);
 /* iterator */
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
-	struct namespace *n = m->private;
+	struct proc_mounts_private *private = m->private;
+	struct namespace *n = private->namespace;
 	struct list_head *p;
 	loff_t l = *pos;
 
@@ -198,7 +207,8 @@ static void *m_start(struct seq_file *m,
 
 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct namespace *n = m->private;
+	struct proc_mounts_private *private = m->private;
+	struct namespace *n = private->namespace;
 	struct list_head *p = ((struct vfsmount *)v)->mnt_list.next;
 	(*pos)++;
 	return p==&n->list ? NULL : list_entry(p, struct vfsmount, mnt_list);
@@ -206,7 +216,8 @@ static void *m_next(struct seq_file *m, 
 
 static void m_stop(struct seq_file *m, void *v)
 {
-	struct namespace *n = m->private;
+	struct proc_mounts_private *private = m->private;
+	struct namespace *n = private->namespace;
 	up_read(&n->sem);
 }
 
@@ -1093,6 +1104,7 @@ int copy_namespace(int flags, struct tas
 	atomic_set(&new_ns->count, 1);
 	init_rwsem(&new_ns->sem);
 	INIT_LIST_HEAD(&new_ns->list);
+	new_ns->event = 0;
 
 	down_write(&tsk->namespace->sem);
 	/* First pass: copy the tree topology */
@@ -1394,6 +1406,7 @@ static void __init init_mount_tree(void)
 	init_rwsem(&namespace->sem);
 	list_add(&mnt->mnt_list, &namespace->list);
 	namespace->root = mnt;
+	namespace->event = 0;
 	mnt->mnt_namespace = namespace;
 
 	init_task.namespace = namespace;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a170450..41462c0 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -57,6 +57,7 @@
 #include <linux/init.h>
 #include <linux/file.h>
 #include <linux/string.h>
+#include <linux/poll.h>
 #include <linux/seq_file.h>
 #include <linux/namei.h>
 #include <linux/namespace.h>
@@ -663,39 +664,77 @@ extern struct seq_operations mounts_op;
 static int mounts_open(struct inode *inode, struct file *file)
 {
 	struct task_struct *task = proc_task(inode);
-	int ret = seq_open(file, &mounts_op);
+	struct proc_mounts_private *private;
+	struct seq_file *m;
+	struct namespace *namespace;
+	int ret = -ENOMEM;
+
+	private = kmalloc(sizeof(*private), GFP_KERNEL);
+	if (!private)
+		goto out;
+	ret = seq_open(file, &mounts_op);
+	if (ret)
+		goto out_free_private;
+	m = file->private_data;
+	m->private = private;
 
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-		struct namespace *namespace;
-		task_lock(task);
-		namespace = task->namespace;
-		if (namespace)
-			get_namespace(namespace);
-		task_unlock(task);
-
-		if (namespace)
-			m->private = namespace;
-		else {
-			seq_release(inode, file);
-			ret = -EINVAL;
-		}
+	task_lock(task);
+	namespace = task->namespace;
+	if (namespace)
+		get_namespace(namespace);
+	task_unlock(task);
+	if (!namespace) {
+		ret = -EINVAL;
+		goto out_release;
 	}
+
+	private->namespace = namespace;
+	down_read(&namespace->sem);
+	private->last_event = namespace->event;
+	up_read(&namespace->sem);
+out:
 	return ret;
+
+out_release:
+	seq_release(inode, file);
+out_free_private:
+	kfree(private);
+	goto out;
 }
 
 static int mounts_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = file->private_data;
-	struct namespace *namespace = m->private;
-	put_namespace(namespace);
+	struct proc_mounts_private *private = m->private;
+	put_namespace(private->namespace);
+	kfree(private);
 	return seq_release(inode, file);
 }
 
+static unsigned int mounts_poll(struct file *file, poll_table *wait)
+{
+	struct seq_file *m = file->private_data;
+	struct proc_mounts_private *private = m->private;
+	struct namespace *namespace = private->namespace;
+	int ret = 0;
+
+	poll_wait(file, &mounts_wait, wait);
+
+	down_read(&namespace->sem);
+	if (private->last_event != namespace->event) {
+		private->last_event = namespace->event;
+		ret = POLLIN | POLLRDNORM;
+	}
+	up_read(&namespace->sem);
+
+	return ret;
+}
+
 static struct file_operations proc_mounts_operations = {
 	.open		= mounts_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
+	.poll		= mounts_poll,
 	.release	= mounts_release,
 };
 
diff --git a/include/linux/namespace.h b/include/linux/namespace.h
index 0e5a86f..fe5840a 100644
--- a/include/linux/namespace.h
+++ b/include/linux/namespace.h
@@ -10,6 +10,7 @@ struct namespace {
 	struct vfsmount *	root;
 	struct list_head	list;
 	struct rw_semaphore	sem;
+	unsigned int		event;
 };
 
 extern int copy_namespace(int, struct task_struct *);
@@ -38,5 +39,13 @@ static inline void get_namespace(struct 
 	atomic_inc(&namespace->count);
 }
 
+/* /proc/.../mounts file descriptor state */
+struct proc_mounts_private {
+	struct namespace *	namespace;
+	unsigned int		last_event;
+};
+
+extern wait_queue_head_t mounts_wait;
+
 #endif
 #endif
---
0.99.8.GIT

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

next prev parent reply	other threads:[~2005-11-02 13:01 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <0AD07C7729CA42458B22AFA9C72E7011C8EF@mhha22kc.mchh.siemens.de>
     [not found] ` <20051025140041.GO7992@ftp.linux.org.uk>
2005-10-26 10:27   ` Race between "mount" uevent and /proc/mounts? Sergey Vlasov
2005-10-26 11:15     ` Al Viro
2005-10-26 14:34       ` Kay Sievers
2005-10-26 14:45         ` Xavier Bestel
2005-10-26 19:28         ` Al Viro
2005-11-01  0:28           ` Kay Sievers
2005-11-01  3:58             ` Kay Sievers
2005-11-01 19:54               ` Sergey Vlasov
2005-11-01 21:35                 ` Kay Sievers
2005-11-02 13:01                   ` Sergey Vlasov [this message]
2005-11-03  8:07                     ` Al Viro
2005-11-03 10:52                       ` Sergey Vlasov
2005-11-03 11:30                         ` Al Viro

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:2fa9fdf dfblob:4182210 dfblob:a170450 dfblob:41462c0
dfblob:0e5a86f dfblob:fe5840a )
 OR (
bs:"Re: Race between "
bs:"mount"
bs:" uevent and /proc/mounts?" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20051102130118.GA23142@master.mivlgu.local \
    --to=vsu@altlinux.ru \
    --cc=Roderich.Schupp.extern@mch.siemens.de \
    --cc=greg@kroah.com \
    --cc=kay.sievers@vrfy.org \
    --cc=linux-hotplug-devel@lists.sourceforge.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=viro@ftp.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).