* [PATCH 4/18] make /proc/mounts pollable
@ 2005-11-08 2:01 Al Viro
2005-11-08 3:02 ` Neil Brown
0 siblings, 1 reply; 4+ messages in thread
From: Al Viro @ 2005-11-08 2:01 UTC (permalink / raw)
To: torvalds; +Cc: linux-kernel, linux-fsdevel, linuxram
From: Al Viro <viro@zeniv.linux.org.uk>
Date: 1131401749 -0500
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
fs/namespace.c | 30 +++++++++++++++++++++-
fs/proc/base.c | 62 ++++++++++++++++++++++++++++++++++-----------
include/linux/namespace.h | 2 +
3 files changed, 78 insertions(+), 16 deletions(-)
c98fca13440a0bbf547987f418e36e2e486e842c
diff --git a/fs/namespace.c b/fs/namespace.c
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -37,7 +37,9 @@ static inline int sysfs_init(void)
#endif
/* spinlock for vfsmount related operations, inplace of dcache_lock */
- __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
+
+static int event;
static struct list_head *mount_hashtable;
static int hash_mask __read_mostly, hash_bits __read_mostly;
@@ -111,6 +113,22 @@ static inline int check_mnt(struct vfsmo
return mnt->mnt_namespace == current->namespace;
}
+static void touch_namespace(struct namespace *ns)
+{
+ if (ns) {
+ ns->event = ++event;
+ wake_up_interruptible(&ns->poll);
+ }
+}
+
+static void __touch_namespace(struct namespace *ns)
+{
+ if (ns && ns->event != event) {
+ ns->event = event;
+ wake_up_interruptible(&ns->poll);
+ }
+}
+
static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
{
old_nd->dentry = mnt->mnt_mountpoint;
@@ -384,6 +402,7 @@ static void umount_tree(struct vfsmount
for (p = mnt; p; p = next_mnt(p, mnt)) {
list_del(&p->mnt_list);
list_add(&p->mnt_list, &kill);
+ __touch_namespace(p->mnt_namespace);
p->mnt_namespace = NULL;
}
@@ -473,6 +492,7 @@ static int do_umount(struct vfsmount *mn
down_write(¤t->namespace->sem);
spin_lock(&vfsmount_lock);
+ event++;
retval = -EBUSY;
if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) {
@@ -634,6 +654,7 @@ static int graft_tree(struct vfsmount *m
list_splice(&head, current->namespace->list.prev);
mntget(mnt);
err = 0;
+ touch_namespace(current->namespace);
}
spin_unlock(&vfsmount_lock);
out_unlock:
@@ -771,6 +792,7 @@ static int do_move_mount(struct nameidat
detach_mnt(old_nd.mnt, &parent_nd);
attach_mnt(old_nd.mnt, nd);
+ touch_namespace(current->namespace);
/* if the mount is moved, it should no longer be expire
* automatically */
@@ -877,6 +899,7 @@ static void expire_mount(struct vfsmount
struct nameidata old_nd;
/* delete from the namespace */
+ touch_namespace(mnt->mnt_namespace);
list_del_init(&mnt->mnt_list);
mnt->mnt_namespace = NULL;
detach_mnt(mnt, &old_nd);
@@ -1114,6 +1137,8 @@ int copy_namespace(int flags, struct tas
atomic_set(&new_ns->count, 1);
init_rwsem(&new_ns->sem);
INIT_LIST_HEAD(&new_ns->list);
+ init_waitqueue_head(&new_ns->poll);
+ new_ns->event = 0;
down_write(&tsk->namespace->sem);
/* First pass: copy the tree topology */
@@ -1377,6 +1402,7 @@ asmlinkage long sys_pivot_root(const cha
detach_mnt(user_nd.mnt, &root_parent);
attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */
attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */
+ touch_namespace(current->namespace);
spin_unlock(&vfsmount_lock);
chroot_fs_refs(&user_nd, &new_nd);
security_sb_post_pivotroot(&user_nd, &new_nd);
@@ -1413,6 +1439,8 @@ static void __init init_mount_tree(void)
atomic_set(&namespace->count, 1);
INIT_LIST_HEAD(&namespace->list);
init_rwsem(&namespace->sem);
+ init_waitqueue_head(&namespace->poll);
+ namespace->event = 0;
list_add(&mnt->mnt_list, &namespace->list);
namespace->root = mnt;
mnt->mnt_namespace = namespace;
diff --git a/fs/proc/base.c b/fs/proc/base.c
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -70,6 +70,7 @@
#include <linux/seccomp.h>
#include <linux/cpuset.h>
#include <linux/audit.h>
+#include <linux/poll.h>
#include "internal.h"
/*
@@ -660,26 +661,38 @@ static struct file_operations proc_smaps
#endif
extern struct seq_operations mounts_op;
+struct proc_mounts {
+ struct seq_file m;
+ int event;
+};
+
static int mounts_open(struct inode *inode, struct file *file)
{
struct task_struct *task = proc_task(inode);
- int ret = seq_open(file, &mounts_op);
+ struct namespace *namespace;
+ struct proc_mounts *p;
+ int ret = -EINVAL;
- if (!ret) {
- struct seq_file *m = file->private_data;
- struct namespace *namespace;
- task_lock(task);
- namespace = task->namespace;
- if (namespace)
- get_namespace(namespace);
- task_unlock(task);
-
- if (namespace)
- m->private = namespace;
- else {
- seq_release(inode, file);
- ret = -EINVAL;
+ task_lock(task);
+ namespace = task->namespace;
+ if (namespace)
+ get_namespace(namespace);
+ task_unlock(task);
+
+ if (namespace) {
+ ret = -ENOMEM;
+ p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
+ if (p) {
+ file->private_data = &p->m;
+ ret = seq_open(file, &mounts_op);
+ if (!ret) {
+ p->m.private = namespace;
+ p->event = namespace->event;
+ return 0;
+ }
+ kfree(p);
}
+ put_namespace(namespace);
}
return ret;
}
@@ -692,11 +705,30 @@ static int mounts_release(struct inode *
return seq_release(inode, file);
}
+static unsigned mounts_poll(struct file *file, poll_table *wait)
+{
+ struct proc_mounts *p = file->private_data;
+ struct namespace *ns = p->m.private;
+ unsigned res = 0;
+
+ poll_wait(file, &ns->poll, wait);
+
+ spin_lock(&vfsmount_lock);
+ if (p->event != ns->event) {
+ p->event = ns->event;
+ res = POLLERR;
+ }
+ spin_unlock(&vfsmount_lock);
+
+ return res;
+}
+
static struct file_operations proc_mounts_operations = {
.open = mounts_open,
.read = seq_read,
.llseek = seq_lseek,
.release = mounts_release,
+ .poll = mounts_poll,
};
#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
diff --git a/include/linux/namespace.h b/include/linux/namespace.h
--- a/include/linux/namespace.h
+++ b/include/linux/namespace.h
@@ -10,6 +10,8 @@ struct namespace {
struct vfsmount * root;
struct list_head list;
struct rw_semaphore sem;
+ wait_queue_head_t poll;
+ int event;
};
extern int copy_namespace(int, struct task_struct *);
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 4/18] make /proc/mounts pollable
2005-11-08 2:01 [PATCH 4/18] make /proc/mounts pollable Al Viro
@ 2005-11-08 3:02 ` Neil Brown
2005-11-08 3:10 ` Greg KH
0 siblings, 1 reply; 4+ messages in thread
From: Neil Brown @ 2005-11-08 3:02 UTC (permalink / raw)
To: Al Viro
Cc: torvalds, linux-kernel, linux-fsdevel, linuxram, Andrew Morton,
Christoph Hellwig
Ahh, now this is interesting.
I tried to make /proc/mdstat pollable some time ago and go howled down
as it was said to be the Wrong Thing(TM). (It was not, I hasten to
add, Al who howled me down).
I look forward to seeing the progress of this patch.
I wonder if there is any chance of attributes in sysfs being pollable
too??
NeilBrown
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 4/18] make /proc/mounts pollable
2005-11-08 3:02 ` Neil Brown
@ 2005-11-08 3:10 ` Greg KH
2005-11-08 5:18 ` Neil Brown
0 siblings, 1 reply; 4+ messages in thread
From: Greg KH @ 2005-11-08 3:10 UTC (permalink / raw)
To: Neil Brown
Cc: Al Viro, torvalds, linux-kernel, linux-fsdevel, linuxram,
Andrew Morton, Christoph Hellwig
On Tue, Nov 08, 2005 at 02:02:51PM +1100, Neil Brown wrote:
>
> Ahh, now this is interesting.
Yeah, if we do this, we get rid of the "mount"-like kernel uevents, as
they were in the wrong place.
> I wonder if there is any chance of attributes in sysfs being pollable
> too??
I haven't had anyone ask for this yet. It might be a bit harder, as we
would need to have a hook back to sysfs to let userspace know it had
changed. As long as it was optional and didn't cause any overhead for
everyone that does not need it, I don't see why it could not be added.
All we need now is a patch :)
thanks,
greg k-h
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 4/18] make /proc/mounts pollable
2005-11-08 3:10 ` Greg KH
@ 2005-11-08 5:18 ` Neil Brown
0 siblings, 0 replies; 4+ messages in thread
From: Neil Brown @ 2005-11-08 5:18 UTC (permalink / raw)
To: Greg KH
Cc: Al Viro, torvalds, linux-kernel, linux-fsdevel, linuxram,
Andrew Morton, Christoph Hellwig
On Monday November 7, greg@kroah.com wrote:
> On Tue, Nov 08, 2005 at 02:02:51PM +1100, Neil Brown wrote:
> >
> > Ahh, now this is interesting.
>
> Yeah, if we do this, we get rid of the "mount"-like kernel uevents, as
> they were in the wrong place.
>
> > I wonder if there is any chance of attributes in sysfs being pollable
> > too??
>
> I haven't had anyone ask for this yet. It might be a bit harder, as we
> would need to have a hook back to sysfs to let userspace know it had
> changed. As long as it was optional and didn't cause any overhead for
> everyone that does not need it, I don't see why it could not be added.
>
> All we need now is a patch :)
Here's one. Untested, but it compiles.
I don't actually have a need for this now, but I might soon, and would
like to know if it is an option.
Comments welcome.
NeilBrown
-------------------------------
Allow sysfs attribute files to be pollable.
This is untested 'strawman' code.
It allows an attribute file in sysfs to be polled for activity.
The 'poll' interface if based on that for /proc/mounts.
I think it works like this:
Open the file
Read all the contents.
Call poll requesting POLLERR
When poll returns, read from the file again (maybe you need to close
and reopen, may you need to lseek to the start ???)
Events are signaled by an object manager calling
sysfs_notify(kobj, dir, attr);
If the dir is non-NULL, it is used to find a subdirectory which
contains the attribute (presumably created by sysfs_create_group).
I have no idea if there is adequate locking anywhere here.
s_event should possibly be atomic_t.
This has a cost of one int and one wait_queue_head per attribute, and
one int per file.
We could probably reduce this cost by having only one wait_queue_head per
kobject.
The name "sysfs_notify" may be confused with the inotify
functionality. Maybe it would be nice to support inotify for sysfs
attributes as well?
Comments welcome.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./fs/sysfs/dir.c | 1 +
./fs/sysfs/file.c | 34 ++++++++++++++++++++++++++++++++++
./fs/sysfs/inode.c | 20 ++++++++++++++++++++
./fs/sysfs/sysfs.h | 1 +
./include/linux/sysfs.h | 3 +++
5 files changed, 59 insertions(+)
diff ./fs/sysfs/dir.c~current~ ./fs/sysfs/dir.c
--- ./fs/sysfs/dir.c~current~ 2005-11-08 15:44:16.000000000 +1100
+++ ./fs/sysfs/dir.c 2005-11-08 16:08:37.000000000 +1100
@@ -45,6 +45,7 @@ static struct sysfs_dirent * sysfs_new_d
atomic_set(&sd->s_count, 1);
INIT_LIST_HEAD(&sd->s_children);
list_add(&sd->s_sibling, &parent_sd->s_children);
+ init_waitqueue_head(&sd->s_poll);
sd->s_element = element;
return sd;
diff ./fs/sysfs/file.c~current~ ./fs/sysfs/file.c
--- ./fs/sysfs/file.c~current~ 2005-11-08 15:20:34.000000000 +1100
+++ ./fs/sysfs/file.c 2005-11-08 16:09:16.000000000 +1100
@@ -7,6 +7,7 @@
#include <linux/kobject.h>
#include <linux/namei.h>
#include <linux/limits.h>
+#include <linux/poll.h>
#include <asm/uaccess.h>
#include <asm/semaphore.h>
@@ -59,6 +60,7 @@ struct sysfs_buffer {
struct sysfs_ops * ops;
struct semaphore sem;
int needs_read_fill;
+ int event;
};
@@ -305,10 +307,12 @@ static int check_perm(struct inode * ino
*/
buffer = kmalloc(sizeof(struct sysfs_buffer),GFP_KERNEL);
if (buffer) {
+ struct sysfs_dirent * sd = file->f_dentry->d_fsdata;
memset(buffer,0,sizeof(struct sysfs_buffer));
init_MUTEX(&buffer->sem);
buffer->needs_read_fill = 1;
buffer->ops = ops;
+ buffer->event = sd->s_event;
file->private_data = buffer;
} else
error = -ENOMEM;
@@ -357,12 +361,42 @@ static int sysfs_release(struct inode *
return 0;
}
+static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
+{
+ struct sysfs_buffer * buffer = filp->private_data;
+ struct sysfs_dirent * sd = filp->f_dentry->d_fsdata;
+ int res = 0;
+
+ poll_wait(filp, &sd->s_poll, wait);
+
+ if (buffer->event != sd->s_event) {
+ sd->s_event = buffer->event;
+ res = POLLERR;
+ }
+ return res;
+}
+
+void sysfs_notify(struct kobject * k, char *dir, char *attr)
+{
+ struct sysfs_dirent *sd = k->dentry->d_fsdata;
+ if (sd && dir)
+ sd = sysfs_find(sd, dir);
+ if (sd && attr)
+ sd = sysfs_find(sd, attr);
+ if (sd) {
+ sd->s_event++;
+ wake_up_interruptible(&sd->s_poll);
+ }
+}
+EXPORT_SYMBOL_GPL(sysfs_notify);
+
struct file_operations sysfs_file_operations = {
.read = sysfs_read_file,
.write = sysfs_write_file,
.llseek = generic_file_llseek,
.open = sysfs_open_file,
.release = sysfs_release,
+ .poll = sysfs_poll,
};
diff ./fs/sysfs/inode.c~current~ ./fs/sysfs/inode.c
--- ./fs/sysfs/inode.c~current~ 2005-11-08 15:52:47.000000000 +1100
+++ ./fs/sysfs/inode.c 2005-11-08 16:05:07.000000000 +1100
@@ -247,3 +247,23 @@ void sysfs_hash_and_remove(struct dentry
}
+struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name)
+{
+ struct sysfs_dirent * sd, * rv = NULL;
+
+ if (dir->s_dentry == NULL ||
+ dir->s_dentry->d_inode == NULL)
+ return NULL;
+
+ down(&dir->s_dentry->d_inode->i_sem);
+ list_for_each_entry(sd, &dir->s_children, s_sibling) {
+ if (!sd->s_element)
+ continue;
+ if (!strcmp(sysfs_get_name(sd), name)) {
+ rv = sd;
+ break;
+ }
+ }
+ up(&dir->s_dentry->d_inode->i_sem);
+ return rv;
+}
diff ./fs/sysfs/sysfs.h~current~ ./fs/sysfs/sysfs.h
--- ./fs/sysfs/sysfs.h~current~ 2005-11-08 16:07:23.000000000 +1100
+++ ./fs/sysfs/sysfs.h 2005-11-08 16:07:28.000000000 +1100
@@ -10,6 +10,7 @@ extern int sysfs_make_dirent(struct sysf
extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
extern void sysfs_hash_and_remove(struct dentry * dir, const char * name);
+extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **);
extern void sysfs_remove_subdir(struct dentry *);
diff ./include/linux/sysfs.h~current~ ./include/linux/sysfs.h
--- ./include/linux/sysfs.h~current~ 2005-11-08 15:44:52.000000000 +1100
+++ ./include/linux/sysfs.h 2005-11-08 16:01:41.000000000 +1100
@@ -11,6 +11,7 @@
#define _SYSFS_H_
#include <asm/atomic.h>
+#include <linux/wait.h>
struct kobject;
struct module;
@@ -74,6 +75,8 @@ struct sysfs_dirent {
umode_t s_mode;
struct dentry * s_dentry;
struct iattr * s_iattr;
+ int s_event;
+ wait_queue_head_t s_poll;
};
#define SYSFS_ROOT 0x0001
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2005-11-08 5:18 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-11-08 2:01 [PATCH 4/18] make /proc/mounts pollable Al Viro
2005-11-08 3:02 ` Neil Brown
2005-11-08 3:10 ` Greg KH
2005-11-08 5:18 ` Neil Brown
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).