* [PATCH - 2.6.15-rc5-mm3] Allow sysfs attribute files to be pollable.
@ 2005-12-20 23:14 Neil Brown
2005-12-20 23:28 ` Jesper Juhl
2005-12-21 13:49 ` Maneesh Soni
0 siblings, 2 replies; 9+ messages in thread
From: Neil Brown @ 2005-12-20 23:14 UTC (permalink / raw)
To: Greg KH, linux-kernel
I suggested an early of this patch some time ago to see if it was an
acceptable approach and got zero feedback, which presumably means it
is perfect:-)
I've now reviewed it, fixed up the bits I didn't like, and tested it.
It works and I am happy with in.
So: I would like to submit it for inclusion in a future kernel.
Comments, or acks, please :-)
NeilBrown
---------
This allows an attribute file in sysfs to be polled for activity.
It works like this:
Open the file
Read all the contents.
Call poll requesting POLLERR or POLLPRI (so select/exceptfds works)
When poll returns, close the file, and go to top of loop.
Events are signaled by an object manager calling
sysfs_notify(kobj, dir, attr);
If the dir is non-NULL, it is used to find a subdirectory which
contains the attribute (presumably created by sysfs_create_group).
This has a cost of one int per attribute, one wait_queuehead per kobject,
one int per open file.
The name "sysfs_notify" may be confused with the inotify
functionality. Maybe it would be nice to support inotify for sysfs
attributes as well?
This patch also uses sysfs_notify to allow /sys/block/md*/md/sync_action
to be pollable
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./drivers/md/md.c | 1
./fs/sysfs/dir.c | 1
./fs/sysfs/file.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++
./fs/sysfs/inode.c | 20 +++++++++++++++++++
./fs/sysfs/sysfs.h | 1
./include/linux/kobject.h | 2 +
./include/linux/sysfs.h | 7 ++++++
./lib/kobject.c | 1
8 files changed, 80 insertions(+)
diff ./drivers/md/md.c~current~ ./drivers/md/md.c
--- ./drivers/md/md.c~current~ 2005-12-21 09:42:07.000000000 +1100
+++ ./drivers/md/md.c 2005-12-21 09:43:52.000000000 +1100
@@ -162,6 +162,7 @@ void md_new_event(mddev_t *mddev)
{
atomic_inc(&md_event_count);
wake_up(&md_event_waiters);
+ sysfs_notify(&mddev->kobj, NULL, "sync_action");
}
/*
diff ./fs/sysfs/dir.c~current~ ./fs/sysfs/dir.c
--- ./fs/sysfs/dir.c~current~ 2005-12-21 09:43:51.000000000 +1100
+++ ./fs/sysfs/dir.c 2005-12-21 09:43:52.000000000 +1100
@@ -43,6 +43,7 @@ static struct sysfs_dirent * sysfs_new_d
memset(sd, 0, sizeof(*sd));
atomic_set(&sd->s_count, 1);
+ atomic_set(&sd->s_event, 0);
INIT_LIST_HEAD(&sd->s_children);
list_add(&sd->s_sibling, &parent_sd->s_children);
sd->s_element = element;
diff ./fs/sysfs/file.c~current~ ./fs/sysfs/file.c
--- ./fs/sysfs/file.c~current~ 2005-12-21 09:43:51.000000000 +1100
+++ ./fs/sysfs/file.c 2005-12-21 09:44:28.000000000 +1100
@@ -7,6 +7,7 @@
#include <linux/kobject.h>
#include <linux/namei.h>
#include <linux/limits.h>
+#include <linux/poll.h>
#include <asm/uaccess.h>
#include <asm/semaphore.h>
@@ -59,6 +60,7 @@ struct sysfs_buffer {
struct sysfs_ops * ops;
struct semaphore sem;
int needs_read_fill;
+ int event;
};
@@ -74,6 +76,7 @@ struct sysfs_buffer {
*/
static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
{
+ struct sysfs_dirent * sd = dentry->d_fsdata;
struct attribute * attr = to_attr(dentry);
struct kobject * kobj = to_kobj(dentry->d_parent);
struct sysfs_ops * ops = buffer->ops;
@@ -85,6 +88,7 @@ static int fill_read_buffer(struct dentr
if (!buffer->page)
return -ENOMEM;
+ buffer->event = atomic_read(&sd->s_event);
count = ops->show(kobj,attr,buffer->page);
buffer->needs_read_fill = 0;
BUG_ON(count > (ssize_t)PAGE_SIZE);
@@ -357,12 +361,55 @@ static int sysfs_release(struct inode *
return 0;
}
+/* Sysfs attribute files are pollable. The idea is that you read
+ * the content and then you use 'poll' or 'select' to wait for
+ * the content to change. When the content changes (assuming the
+ * manager for the kobject supports notification), poll will
+ * return POLLERR|POLLPRI, and select will return the fd whether
+ * it is waiting for read, write, or exceptions.
+ * Once poll/select indicates that the value has changed, you
+ * need to close and re-open the file, as simply seeking and reading
+ * again will not get new data, or reset the state of 'poll'.
+ * Reminder: this only works for attributes which actively support
+ * it, and it is not possible to test an attribute from userspace
+ * to see if it supports poll.
+ */
+static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
+{
+ struct sysfs_buffer * buffer = filp->private_data;
+ struct kobject * kobj = to_kobj(filp->f_dentry->d_parent);
+ struct sysfs_dirent * sd = filp->f_dentry->d_fsdata;
+ int res = 0;
+
+ poll_wait(filp, &kobj->poll, wait);
+
+ if (buffer->event != atomic_read(&sd->s_event))
+ res = POLLERR|POLLPRI;
+
+ return res;
+}
+
+void sysfs_notify(struct kobject * k, char *dir, char *attr)
+{
+ struct sysfs_dirent * sd = k->dentry->d_fsdata;
+ if (sd && dir)
+ sd = sysfs_find(sd, dir);
+ if (sd && attr)
+ sd = sysfs_find(sd, attr);
+ if (sd) {
+ atomic_inc(&sd->s_event);
+ wake_up_interruptible(&k->poll);
+ }
+}
+EXPORT_SYMBOL_GPL(sysfs_notify);
+
struct file_operations sysfs_file_operations = {
.read = sysfs_read_file,
.write = sysfs_write_file,
.llseek = generic_file_llseek,
.open = sysfs_open_file,
.release = sysfs_release,
+ .poll = sysfs_poll,
};
diff ./fs/sysfs/inode.c~current~ ./fs/sysfs/inode.c
--- ./fs/sysfs/inode.c~current~ 2005-12-21 09:43:51.000000000 +1100
+++ ./fs/sysfs/inode.c 2005-12-21 09:43:52.000000000 +1100
@@ -247,3 +247,23 @@ void sysfs_hash_and_remove(struct dentry
}
+struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name)
+{
+ struct sysfs_dirent * sd, * rv = NULL;
+
+ if (dir->s_dentry == NULL ||
+ dir->s_dentry->d_inode == NULL)
+ return NULL;
+
+ down(&dir->s_dentry->d_inode->i_sem);
+ list_for_each_entry(sd, &dir->s_children, s_sibling) {
+ if (!sd->s_element)
+ continue;
+ if (!strcmp(sysfs_get_name(sd), name)) {
+ rv = sd;
+ break;
+ }
+ }
+ up(&dir->s_dentry->d_inode->i_sem);
+ return rv;
+}
diff ./fs/sysfs/sysfs.h~current~ ./fs/sysfs/sysfs.h
--- ./fs/sysfs/sysfs.h~current~ 2005-12-21 09:43:51.000000000 +1100
+++ ./fs/sysfs/sysfs.h 2005-12-21 09:43:52.000000000 +1100
@@ -10,6 +10,7 @@ extern int sysfs_make_dirent(struct sysf
extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
extern void sysfs_hash_and_remove(struct dentry * dir, const char * name);
+extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **);
extern void sysfs_remove_subdir(struct dentry *);
diff ./include/linux/kobject.h~current~ ./include/linux/kobject.h
--- ./include/linux/kobject.h~current~ 2005-12-21 09:43:51.000000000 +1100
+++ ./include/linux/kobject.h 2005-12-21 09:43:52.000000000 +1100
@@ -24,6 +24,7 @@
#include <linux/rwsem.h>
#include <linux/kref.h>
#include <linux/kernel.h>
+#include <linux/wait.h>
#include <asm/atomic.h>
#define KOBJ_NAME_LEN 20
@@ -54,6 +55,7 @@ struct kobject {
struct kset * kset;
struct kobj_type * ktype;
struct dentry * dentry;
+ wait_queue_head_t poll;
};
extern int kobject_set_name(struct kobject *, const char *, ...)
diff ./include/linux/sysfs.h~current~ ./include/linux/sysfs.h
--- ./include/linux/sysfs.h~current~ 2005-12-21 09:43:52.000000000 +1100
+++ ./include/linux/sysfs.h 2005-12-21 09:43:52.000000000 +1100
@@ -74,6 +74,7 @@ struct sysfs_dirent {
umode_t s_mode;
struct dentry * s_dentry;
struct iattr * s_iattr;
+ atomic_t s_event;
};
#define SYSFS_ROOT 0x0001
@@ -118,6 +119,7 @@ int sysfs_remove_bin_file(struct kobject
int sysfs_create_group(struct kobject *, const struct attribute_group *);
void sysfs_remove_group(struct kobject *, const struct attribute_group *);
+void sysfs_notify(struct kobject * k, char *dir, char *attr);
#else /* CONFIG_SYSFS */
static inline int sysfs_create_dir(struct kobject * k)
@@ -185,6 +187,11 @@ static inline void sysfs_remove_group(st
;
}
+static inline void sysfs_notify(struct kobject * k, char *dir, char *attr)
+{
+ ;
+}
+
#endif /* CONFIG_SYSFS */
#endif /* _SYSFS_H_ */
diff ./lib/kobject.c~current~ ./lib/kobject.c
--- ./lib/kobject.c~current~ 2005-12-21 09:43:52.000000000 +1100
+++ ./lib/kobject.c 2005-12-21 09:43:52.000000000 +1100
@@ -124,6 +124,7 @@ void kobject_init(struct kobject * kobj)
{
kref_init(&kobj->kref);
INIT_LIST_HEAD(&kobj->entry);
+ init_waitqueue_head(&kobj->poll);
kobj->kset = kset_get(kobj->kset);
}
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH - 2.6.15-rc5-mm3] Allow sysfs attribute files to be pollable. 2005-12-20 23:14 [PATCH - 2.6.15-rc5-mm3] Allow sysfs attribute files to be pollable Neil Brown @ 2005-12-20 23:28 ` Jesper Juhl 2005-12-21 0:45 ` Neil Brown 2005-12-21 13:49 ` Maneesh Soni 1 sibling, 1 reply; 9+ messages in thread From: Jesper Juhl @ 2005-12-20 23:28 UTC (permalink / raw) To: Neil Brown; +Cc: Greg KH, linux-kernel On 12/21/05, Neil Brown <neilb@suse.de> wrote: > > I suggested an early of this patch some time ago to see if it was an > acceptable approach and got zero feedback, which presumably means it > is perfect:-) > > I've now reviewed it, fixed up the bits I didn't like, and tested it. > It works and I am happy with in. > > So: I would like to submit it for inclusion in a future kernel. > > Comments, or acks, please :-) > [snip] > +/* Sysfs attribute files are pollable. The idea is that you read > + * the content and then you use 'poll' or 'select' to wait for > + * the content to change. When the content changes (assuming the > + * manager for the kobject supports notification), poll will > + * return POLLERR|POLLPRI, and select will return the fd whether > + * it is waiting for read, write, or exceptions. > + * Once poll/select indicates that the value has changed, you > + * need to close and re-open the file, as simply seeking and reading > + * again will not get new data, or reset the state of 'poll'. What if the value changes again between me closing and re-opening the file? > + * Reminder: this only works for attributes which actively support > + * it, and it is not possible to test an attribute from userspace > + * to see if it supports poll. > + */ -- Jesper Juhl <jesper.juhl@gmail.com> Don't top-post http://www.catb.org/~esr/jargon/html/T/top-post.html Plain text mails only, please http://www.expita.com/nomime.html ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH - 2.6.15-rc5-mm3] Allow sysfs attribute files to be pollable. 2005-12-20 23:28 ` Jesper Juhl @ 2005-12-21 0:45 ` Neil Brown 2005-12-21 8:46 ` Jesper Juhl 0 siblings, 1 reply; 9+ messages in thread From: Neil Brown @ 2005-12-21 0:45 UTC (permalink / raw) To: Jesper Juhl; +Cc: Greg KH, linux-kernel On Wednesday December 21, jesper.juhl@gmail.com wrote: > On 12/21/05, Neil Brown <neilb@suse.de> wrote: > > > > I suggested an early of this patch some time ago to see if it was an > > acceptable approach and got zero feedback, which presumably means it > > is perfect:-) > > > > I've now reviewed it, fixed up the bits I didn't like, and tested it. > > It works and I am happy with in. > > > > So: I would like to submit it for inclusion in a future kernel. > > > > Comments, or acks, please :-) > > > [snip] > > +/* Sysfs attribute files are pollable. The idea is that you read > > + * the content and then you use 'poll' or 'select' to wait for > > + * the content to change. When the content changes (assuming the > > + * manager for the kobject supports notification), poll will > > + * return POLLERR|POLLPRI, and select will return the fd whether > > + * it is waiting for read, write, or exceptions. > > + * Once poll/select indicates that the value has changed, you > > + * need to close and re-open the file, as simply seeking and reading > > + * again will not get new data, or reset the state of 'poll'. > > What if the value changes again between me closing and re-opening the file? You miss an intermediate event I guess. If you have a stream of events where you absolutely want to see every one of them, then you want something like a character-device (or one of several other alternative). That is not what this is for. However very often you don't need to see every single event. You just need to know when state has changed so you can respond to the new state. That is what this patch is for. Does that answer your question? Thanks, NeilBrown ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH - 2.6.15-rc5-mm3] Allow sysfs attribute files to be pollable. 2005-12-21 0:45 ` Neil Brown @ 2005-12-21 8:46 ` Jesper Juhl 0 siblings, 0 replies; 9+ messages in thread From: Jesper Juhl @ 2005-12-21 8:46 UTC (permalink / raw) To: Neil Brown; +Cc: Greg KH, linux-kernel On 12/21/05, Neil Brown <neilb@suse.de> wrote: > On Wednesday December 21, jesper.juhl@gmail.com wrote: > > On 12/21/05, Neil Brown <neilb@suse.de> wrote: > > > > > > I suggested an early of this patch some time ago to see if it was an > > > acceptable approach and got zero feedback, which presumably means it > > > is perfect:-) > > > > > > I've now reviewed it, fixed up the bits I didn't like, and tested it. > > > It works and I am happy with in. > > > > > > So: I would like to submit it for inclusion in a future kernel. > > > > > > Comments, or acks, please :-) > > > > > [snip] > > > +/* Sysfs attribute files are pollable. The idea is that you read > > > + * the content and then you use 'poll' or 'select' to wait for > > > + * the content to change. When the content changes (assuming the > > > + * manager for the kobject supports notification), poll will > > > + * return POLLERR|POLLPRI, and select will return the fd whether > > > + * it is waiting for read, write, or exceptions. > > > + * Once poll/select indicates that the value has changed, you > > > + * need to close and re-open the file, as simply seeking and reading > > > + * again will not get new data, or reset the state of 'poll'. > > > > What if the value changes again between me closing and re-opening the file? > > You miss an intermediate event I guess. > > If you have a stream of events where you absolutely want to see every > one of them, then you want something like a character-device (or one > of several other alternative). > That is not what this is for. > > However very often you don't need to see every single event. You just > need to know when state has changed so you can respond to the new > state. > That is what this patch is for. > > Does that answer your question? > Yes it does. Thanks. -- Jesper Juhl <jesper.juhl@gmail.com> Don't top-post http://www.catb.org/~esr/jargon/html/T/top-post.html Plain text mails only, please http://www.expita.com/nomime.html ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH - 2.6.15-rc5-mm3] Allow sysfs attribute files to be pollable. 2005-12-20 23:14 [PATCH - 2.6.15-rc5-mm3] Allow sysfs attribute files to be pollable Neil Brown 2005-12-20 23:28 ` Jesper Juhl @ 2005-12-21 13:49 ` Maneesh Soni 2005-12-22 3:43 ` Neil Brown 1 sibling, 1 reply; 9+ messages in thread From: Maneesh Soni @ 2005-12-21 13:49 UTC (permalink / raw) To: Neil Brown; +Cc: Greg KH, linux-kernel On Wed, Dec 21, 2005 at 10:14:29AM +1100, Neil Brown wrote: > > I suggested an early of this patch some time ago to see if it was an > acceptable approach and got zero feedback, which presumably means it > is perfect:-) > > I've now reviewed it, fixed up the bits I didn't like, and tested it. > It works and I am happy with in. > > So: I would like to submit it for inclusion in a future kernel. > > Comments, or acks, please :-) > > NeilBrown > > > --------- > This allows an attribute file in sysfs to be polled for activity. > > It works like this: > Open the file > Read all the contents. > Call poll requesting POLLERR or POLLPRI (so select/exceptfds works) > When poll returns, close the file, and go to top of loop. > I am no "poll/select" expert, but is reading the contents always a requirement for "poll"? If not then probably it is not a good idea to put such rules. > Events are signaled by an object manager calling > sysfs_notify(kobj, dir, attr); > > If the dir is non-NULL, it is used to find a subdirectory which > contains the attribute (presumably created by sysfs_create_group). > > This has a cost of one int per attribute, one wait_queuehead per kobject, > one int per open file. > So, all the attribute files for a given kobject will use the same wait queue? What happens if there are multiple attribute files polled for the same kobject. > The name "sysfs_notify" may be confused with the inotify > functionality. Maybe it would be nice to support inotify for sysfs > attributes as well? > > This patch also uses sysfs_notify to allow /sys/block/md*/md/sync_action > to be pollable > > Signed-off-by: Neil Brown <neilb@suse.de> > > ### Diffstat output > ./drivers/md/md.c | 1 > ./fs/sysfs/dir.c | 1 > ./fs/sysfs/file.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++ > ./fs/sysfs/inode.c | 20 +++++++++++++++++++ > ./fs/sysfs/sysfs.h | 1 > ./include/linux/kobject.h | 2 + > ./include/linux/sysfs.h | 7 ++++++ > ./lib/kobject.c | 1 > 8 files changed, 80 insertions(+) > > diff ./drivers/md/md.c~current~ ./drivers/md/md.c > --- ./drivers/md/md.c~current~ 2005-12-21 09:42:07.000000000 +1100 > +++ ./drivers/md/md.c 2005-12-21 09:43:52.000000000 +1100 > @@ -162,6 +162,7 @@ void md_new_event(mddev_t *mddev) > { > atomic_inc(&md_event_count); > wake_up(&md_event_waiters); > + sysfs_notify(&mddev->kobj, NULL, "sync_action"); > } > > /* > > diff ./fs/sysfs/dir.c~current~ ./fs/sysfs/dir.c > --- ./fs/sysfs/dir.c~current~ 2005-12-21 09:43:51.000000000 +1100 > +++ ./fs/sysfs/dir.c 2005-12-21 09:43:52.000000000 +1100 > @@ -43,6 +43,7 @@ static struct sysfs_dirent * sysfs_new_d > > memset(sd, 0, sizeof(*sd)); > atomic_set(&sd->s_count, 1); > + atomic_set(&sd->s_event, 0); > INIT_LIST_HEAD(&sd->s_children); > list_add(&sd->s_sibling, &parent_sd->s_children); > sd->s_element = element; > > diff ./fs/sysfs/file.c~current~ ./fs/sysfs/file.c > --- ./fs/sysfs/file.c~current~ 2005-12-21 09:43:51.000000000 +1100 > +++ ./fs/sysfs/file.c 2005-12-21 09:44:28.000000000 +1100 > @@ -7,6 +7,7 @@ > #include <linux/kobject.h> > #include <linux/namei.h> > #include <linux/limits.h> > +#include <linux/poll.h> > > #include <asm/uaccess.h> > #include <asm/semaphore.h> > @@ -59,6 +60,7 @@ struct sysfs_buffer { > struct sysfs_ops * ops; > struct semaphore sem; > int needs_read_fill; > + int event; > }; > > > @@ -74,6 +76,7 @@ struct sysfs_buffer { > */ > static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer) > { > + struct sysfs_dirent * sd = dentry->d_fsdata; > struct attribute * attr = to_attr(dentry); > struct kobject * kobj = to_kobj(dentry->d_parent); > struct sysfs_ops * ops = buffer->ops; > @@ -85,6 +88,7 @@ static int fill_read_buffer(struct dentr > if (!buffer->page) > return -ENOMEM; > > + buffer->event = atomic_read(&sd->s_event); > count = ops->show(kobj,attr,buffer->page); > buffer->needs_read_fill = 0; > BUG_ON(count > (ssize_t)PAGE_SIZE); > @@ -357,12 +361,55 @@ static int sysfs_release(struct inode * > return 0; > } > > +/* Sysfs attribute files are pollable. The idea is that you read > + * the content and then you use 'poll' or 'select' to wait for > + * the content to change. When the content changes (assuming the > + * manager for the kobject supports notification), poll will > + * return POLLERR|POLLPRI, and select will return the fd whether > + * it is waiting for read, write, or exceptions. > + * Once poll/select indicates that the value has changed, you > + * need to close and re-open the file, as simply seeking and reading > + * again will not get new data, or reset the state of 'poll'. > + * Reminder: this only works for attributes which actively support > + * it, and it is not possible to test an attribute from userspace > + * to see if it supports poll. > + */ > +static unsigned int sysfs_poll(struct file *filp, poll_table *wait) > +{ > + struct sysfs_buffer * buffer = filp->private_data; > + struct kobject * kobj = to_kobj(filp->f_dentry->d_parent); > + struct sysfs_dirent * sd = filp->f_dentry->d_fsdata; > + int res = 0; > + > + poll_wait(filp, &kobj->poll, wait); > + > + if (buffer->event != atomic_read(&sd->s_event)) > + res = POLLERR|POLLPRI; > + > + return res; > +} > + > +void sysfs_notify(struct kobject * k, char *dir, char *attr) > +{ > + struct sysfs_dirent * sd = k->dentry->d_fsdata; > + if (sd && dir) > + sd = sysfs_find(sd, dir); > + if (sd && attr) > + sd = sysfs_find(sd, attr); > + if (sd) { > + atomic_inc(&sd->s_event); > + wake_up_interruptible(&k->poll); > + } > +} > +EXPORT_SYMBOL_GPL(sysfs_notify); > + > struct file_operations sysfs_file_operations = { > .read = sysfs_read_file, > .write = sysfs_write_file, > .llseek = generic_file_llseek, > .open = sysfs_open_file, > .release = sysfs_release, > + .poll = sysfs_poll, > }; > > > > diff ./fs/sysfs/inode.c~current~ ./fs/sysfs/inode.c > --- ./fs/sysfs/inode.c~current~ 2005-12-21 09:43:51.000000000 +1100 > +++ ./fs/sysfs/inode.c 2005-12-21 09:43:52.000000000 +1100 > @@ -247,3 +247,23 @@ void sysfs_hash_and_remove(struct dentry > } > > > +struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name) > +{ > + struct sysfs_dirent * sd, * rv = NULL; > + > + if (dir->s_dentry == NULL || > + dir->s_dentry->d_inode == NULL) > + return NULL; > + > + down(&dir->s_dentry->d_inode->i_sem); > + list_for_each_entry(sd, &dir->s_children, s_sibling) { > + if (!sd->s_element) > + continue; > + if (!strcmp(sysfs_get_name(sd), name)) { > + rv = sd; > + break; > + } > + } > + up(&dir->s_dentry->d_inode->i_sem); > + return rv; > +} I think there might be some issues here, if some other thread wants to remove the kobject, while this thread is trying to notify. Testing with some parallel add/delete operations should tell. One can pin the sysfs_dirent corresponding to the attribute file by doing sysfs_get() but that might create/need more complication related to parent sysfs_dirent and so on. Because as of now, sysfs_dirents have 0, 1 or 2 as ref count. At the time of creation it is 1, and at the time of linking with dentry it is 2. Once it becomes 0, the sysfs_dirent is freed. This was just to keep the refcounting simple and use the already available VFS dentry refcounting. In this case IMO, the better option is to do just lookup_one_len(), get the dentry and then get the sysfs_dirent as dentry->d_fsdata. And once done, do dput() which will release the references taken. I think its time to queue a sysfs locking document in my todo list ;-) > > diff ./fs/sysfs/sysfs.h~current~ ./fs/sysfs/sysfs.h > --- ./fs/sysfs/sysfs.h~current~ 2005-12-21 09:43:51.000000000 +1100 > +++ ./fs/sysfs/sysfs.h 2005-12-21 09:43:52.000000000 +1100 > @@ -10,6 +10,7 @@ extern int sysfs_make_dirent(struct sysf > > extern int sysfs_add_file(struct dentry *, const struct attribute *, int); > extern void sysfs_hash_and_remove(struct dentry * dir, const char * name); > +extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name); > > extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **); > extern void sysfs_remove_subdir(struct dentry *); > > diff ./include/linux/kobject.h~current~ ./include/linux/kobject.h > --- ./include/linux/kobject.h~current~ 2005-12-21 09:43:51.000000000 +1100 > +++ ./include/linux/kobject.h 2005-12-21 09:43:52.000000000 +1100 > @@ -24,6 +24,7 @@ > #include <linux/rwsem.h> > #include <linux/kref.h> > #include <linux/kernel.h> > +#include <linux/wait.h> > #include <asm/atomic.h> > > #define KOBJ_NAME_LEN 20 > @@ -54,6 +55,7 @@ struct kobject { > struct kset * kset; > struct kobj_type * ktype; > struct dentry * dentry; > + wait_queue_head_t poll; > }; > > extern int kobject_set_name(struct kobject *, const char *, ...) > > diff ./include/linux/sysfs.h~current~ ./include/linux/sysfs.h > --- ./include/linux/sysfs.h~current~ 2005-12-21 09:43:52.000000000 +1100 > +++ ./include/linux/sysfs.h 2005-12-21 09:43:52.000000000 +1100 > @@ -74,6 +74,7 @@ struct sysfs_dirent { > umode_t s_mode; > struct dentry * s_dentry; > struct iattr * s_iattr; > + atomic_t s_event; > }; > > #define SYSFS_ROOT 0x0001 > @@ -118,6 +119,7 @@ int sysfs_remove_bin_file(struct kobject > int sysfs_create_group(struct kobject *, const struct attribute_group *); > void sysfs_remove_group(struct kobject *, const struct attribute_group *); > > +void sysfs_notify(struct kobject * k, char *dir, char *attr); > #else /* CONFIG_SYSFS */ > > static inline int sysfs_create_dir(struct kobject * k) > @@ -185,6 +187,11 @@ static inline void sysfs_remove_group(st > ; > } > > +static inline void sysfs_notify(struct kobject * k, char *dir, char *attr) > +{ > + ; > +} > + > #endif /* CONFIG_SYSFS */ > > #endif /* _SYSFS_H_ */ > > diff ./lib/kobject.c~current~ ./lib/kobject.c > --- ./lib/kobject.c~current~ 2005-12-21 09:43:52.000000000 +1100 > +++ ./lib/kobject.c 2005-12-21 09:43:52.000000000 +1100 > @@ -124,6 +124,7 @@ void kobject_init(struct kobject * kobj) > { > kref_init(&kobj->kref); > INIT_LIST_HEAD(&kobj->entry); > + init_waitqueue_head(&kobj->poll); > kobj->kset = kset_get(kobj->kset); > } > > -- Maneesh Soni Linux Technology Center, IBM India Software Labs, Bangalore, India email: maneesh@in.ibm.com Phone: 91-80-25044990 ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH - 2.6.15-rc5-mm3] Allow sysfs attribute files to be pollable. 2005-12-21 13:49 ` Maneesh Soni @ 2005-12-22 3:43 ` Neil Brown 2005-12-22 5:47 ` Maneesh Soni 0 siblings, 1 reply; 9+ messages in thread From: Neil Brown @ 2005-12-22 3:43 UTC (permalink / raw) To: maneesh; +Cc: Greg KH, linux-kernel On Wednesday December 21, maneesh@in.ibm.com wrote: > > > > It works like this: > > Open the file > > Read all the contents. > > Call poll requesting POLLERR or POLLPRI (so select/exceptfds works) > > When poll returns, close the file, and go to top of loop. > > > > I am no "poll/select" expert, but is reading the contents always a > requirement for "poll"? If not then probably it is not a good idea to > put such rules. You don't have to read the contents unless you want to know what is in the file. You could just open the file and call 'poll' and wait for it to tell you something has happened. However this isn't likely to be really useful. It isn't the 'something has happened' event that is particularly interesting. It is the 'the state is now X' information that is interesting. So you read the file to find out what the state is. If that isn't the state you were looking for (or if you have finished responding to that state), you poll/select, and then try again. > > > Events are signaled by an object manager calling > > sysfs_notify(kobj, dir, attr); > > > > If the dir is non-NULL, it is used to find a subdirectory which > > contains the attribute (presumably created by sysfs_create_group). > > > > This has a cost of one int per attribute, one wait_queuehead per kobject, > > one int per open file. > > > So, all the attribute files for a given kobject will use the same > wait queue? What happens if there are multiple attribute files > polled for the same kobject. wait_queues are sharable. Having one wait queue for a number of events can work quite well. Suppose a kobject has two (or more) attributes, and two processes poll on those attributes, one each. When either attribute gets changed and sysfs_notify is called, both of the processes will be woken up. They will check if their attribute of interest has changed. If it has, the poll syscall will return to userspace. If it hasn't the process will just go to sleep again. So, if a kobject has many attributes, and there are many concurrent processes listening on different attributes, then sharing a wait_queue may cause a lot of needly wakeup/return-to-sleep events. But it is fairly unlikely. wait_queues are not tiny, and having one per kobject is more economical than one per attribute. I hope that helps. > > diff ./fs/sysfs/inode.c~current~ ./fs/sysfs/inode.c > > --- ./fs/sysfs/inode.c~current~ 2005-12-21 09:43:51.000000000 +1100 > > +++ ./fs/sysfs/inode.c 2005-12-21 09:43:52.000000000 +1100 > > @@ -247,3 +247,23 @@ void sysfs_hash_and_remove(struct dentry > > } > > > > > > +struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name) > > +{ > > + struct sysfs_dirent * sd, * rv = NULL; > > + > > + if (dir->s_dentry == NULL || > > + dir->s_dentry->d_inode == NULL) > > + return NULL; > > + > > + down(&dir->s_dentry->d_inode->i_sem); > > + list_for_each_entry(sd, &dir->s_children, s_sibling) { > > + if (!sd->s_element) > > + continue; > > + if (!strcmp(sysfs_get_name(sd), name)) { > > + rv = sd; > > + break; > > + } > > + } > > + up(&dir->s_dentry->d_inode->i_sem); > > + return rv; > > +} > > I think there might be some issues here, if some other thread wants to > remove the kobject, while this thread is trying to notify. Testing > with some parallel add/delete operations should tell. My idea - which I thought I had stuck in a comment, but apparently not - is that the owner of the kobject should have it's own locking to ensure that it doesn't go away while the sysfs_notify is being called (md does in the places where it calls sysfs_notify). I guess it makes sense to make sysfs_notify safe against object owners doing silly things, but it wasn't clear to me either how, or that it was necessary. > > In this case IMO, the better option is to do just lookup_one_len(), get > the dentry and then get the sysfs_dirent as dentry->d_fsdata. And once > done, do dput() which will release the references taken. You may well be right. I'll try and see what happens. > > I think its time to queue a sysfs locking document in my todo list ;-) > That would be nice :-) Thanks for your input. NeilBrown ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH - 2.6.15-rc5-mm3] Allow sysfs attribute files to be pollable. 2005-12-22 3:43 ` Neil Brown @ 2005-12-22 5:47 ` Maneesh Soni 2005-12-22 6:02 ` Neil Brown 0 siblings, 1 reply; 9+ messages in thread From: Maneesh Soni @ 2005-12-22 5:47 UTC (permalink / raw) To: Neil Brown; +Cc: Greg KH, linux-kernel On Thu, Dec 22, 2005 at 02:43:12PM +1100, Neil Brown wrote: > On Wednesday December 21, maneesh@in.ibm.com wrote: > > > > > > It works like this: > > > Open the file > > > Read all the contents. > > > Call poll requesting POLLERR or POLLPRI (so select/exceptfds works) > > > When poll returns, close the file, and go to top of loop. > > > > > > > I am no "poll/select" expert, but is reading the contents always a > > requirement for "poll"? If not then probably it is not a good idea to > > put such rules. > > You don't have to read the contents unless you want to know what is in > the file. You could just open the file and call 'poll' and wait for > it to tell you something has happened. However this isn't likely to > be really useful. > It isn't the 'something has happened' event that is particularly > interesting. It is the 'the state is now X' information that is > interesting. > So you read the file to find out what the state is. If that isn't the > state you were looking for (or if you have finished responding to that > state), you poll/select, and then try again. > ok.. that makes sense. But in this case [open() and then poll()], should buffer->event() be initialized in sysfs_open()-->check_perm(), instead of fill_read_buffer() ? I think this scheme should work for [open(), read() and then poll()] also. But how about the other rule, ie once woken-up the user has to close, re-open and re-read the file. Can this also be avoided, as probably this is also not poll semantics? > > > > > Events are signaled by an object manager calling > > > sysfs_notify(kobj, dir, attr); > > > > > > If the dir is non-NULL, it is used to find a subdirectory which > > > contains the attribute (presumably created by sysfs_create_group). > > > > > > This has a cost of one int per attribute, one wait_queuehead per kobject, > > > one int per open file. > > > > > So, all the attribute files for a given kobject will use the same > > wait queue? What happens if there are multiple attribute files > > polled for the same kobject. > > wait_queues are sharable. Having one wait queue for a number of > events can work quite well. > > Suppose a kobject has two (or more) attributes, and two processes poll on > those attributes, one each. > When either attribute gets changed and sysfs_notify is called, both > of the processes will be woken up. They will check if their > attribute of interest has changed. If it has, the poll syscall will > return to userspace. If it hasn't the process will just go to sleep > again. > > So, if a kobject has many attributes, and there are many concurrent > processes listening on different attributes, then sharing a wait_queue > may cause a lot of needly wakeup/return-to-sleep events. But it is > fairly unlikely. > wait_queues are not tiny, and having one per kobject is more > economical than one per attribute. > > I hope that helps. > > > > diff ./fs/sysfs/inode.c~current~ ./fs/sysfs/inode.c > > > --- ./fs/sysfs/inode.c~current~ 2005-12-21 09:43:51.000000000 +1100 > > > +++ ./fs/sysfs/inode.c 2005-12-21 09:43:52.000000000 +1100 > > > @@ -247,3 +247,23 @@ void sysfs_hash_and_remove(struct dentry > > > } > > > > > > > > > +struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name) > > > +{ > > > + struct sysfs_dirent * sd, * rv = NULL; > > > + > > > + if (dir->s_dentry == NULL || > > > + dir->s_dentry->d_inode == NULL) > > > + return NULL; > > > + > > > + down(&dir->s_dentry->d_inode->i_sem); > > > + list_for_each_entry(sd, &dir->s_children, s_sibling) { > > > + if (!sd->s_element) > > > + continue; > > > + if (!strcmp(sysfs_get_name(sd), name)) { > > > + rv = sd; > > > + break; > > > + } > > > + } > > > + up(&dir->s_dentry->d_inode->i_sem); > > > + return rv; > > > +} > > > > I think there might be some issues here, if some other thread wants to > > remove the kobject, while this thread is trying to notify. Testing > > with some parallel add/delete operations should tell. > > My idea - which I thought I had stuck in a comment, but apparently not > - is that the owner of the kobject should have it's own locking to > ensure that it doesn't go away while the sysfs_notify is being > called (md does in the places where it calls sysfs_notify). > I guess it makes sense to make sysfs_notify safe against object owners > doing silly things, but it wasn't clear to me either how, or that it > was necessary. > > That's right, some times these kobjects do strange things like the kobject is alive but it decides to get rid of attribute files. So, I think if the VFS/dentry locking is used, it would be safer. > > In this case IMO, the better option is to do just lookup_one_len(), get > > the dentry and then get the sysfs_dirent as dentry->d_fsdata. And once > > done, do dput() which will release the references taken. > > You may well be right. I'll try and see what happens. > Thanks Maneesh -- Maneesh Soni Linux Technology Center, IBM India Software Labs, Bangalore, India email: maneesh@in.ibm.com Phone: 91-80-25044990 ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH - 2.6.15-rc5-mm3] Allow sysfs attribute files to be pollable. 2005-12-22 5:47 ` Maneesh Soni @ 2005-12-22 6:02 ` Neil Brown 2005-12-22 7:05 ` Maneesh Soni 0 siblings, 1 reply; 9+ messages in thread From: Neil Brown @ 2005-12-22 6:02 UTC (permalink / raw) To: maneesh; +Cc: Greg KH, linux-kernel On Thursday December 22, maneesh@in.ibm.com wrote: > On Thu, Dec 22, 2005 at 02:43:12PM +1100, Neil Brown wrote: > > You don't have to read the contents unless you want to know what is in > > the file. You could just open the file and call 'poll' and wait for > > it to tell you something has happened. However this isn't likely to > > be really useful. > > It isn't the 'something has happened' event that is particularly > > interesting. It is the 'the state is now X' information that is > > interesting. > > So you read the file to find out what the state is. If that isn't the > > state you were looking for (or if you have finished responding to that > > state), you poll/select, and then try again. > > > > ok.. that makes sense. But in this case [open() and then poll()], should > buffer->event() be initialized in sysfs_open()-->check_perm(), instead > of fill_read_buffer() ? I think this scheme should work for [open(), read() > and then poll()] also. We are definitely using poll in a non-standard way as it is generally for "you can read now" or "you can write now", and we are (ab)using it to say "there is new information". Note that this is essentially copying the semantics of 'poll' on /proc/mounts. I would see poll returning as meaning "there is state information that you haven't read". When you first open the file, you haven't read anything, so poll should return immediately - which it currently does. After you read something, poll won't return again until there is something new to be read. I think this is probably the best semantics, but if you try hard you might be able to convince me otherwise... > > But how about the other rule, ie once woken-up the user has to close, > re-open and re-read the file. Can this also be avoided, as probably this is also > not poll semantics? This semantic is part of sysfs. The way sysfs currently works, you open a file, and read it, and that is the only value you see. If you rewind and read again, you still get the old value, even if it "should" have changed. The value is cached and the cache is never refreshed. sysfs could be changed to flush the cache on rewind, but I don't know that it is worth it. If it was changed, the poll functionality would automatically do the right thing. Thanks again, NeilBrown ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH - 2.6.15-rc5-mm3] Allow sysfs attribute files to be pollable. 2005-12-22 6:02 ` Neil Brown @ 2005-12-22 7:05 ` Maneesh Soni 0 siblings, 0 replies; 9+ messages in thread From: Maneesh Soni @ 2005-12-22 7:05 UTC (permalink / raw) To: Neil Brown; +Cc: Greg KH, linux-kernel On Thu, Dec 22, 2005 at 05:02:02PM +1100, Neil Brown wrote: > On Thursday December 22, maneesh@in.ibm.com wrote: > > On Thu, Dec 22, 2005 at 02:43:12PM +1100, Neil Brown wrote: > > > You don't have to read the contents unless you want to know what is in > > > the file. You could just open the file and call 'poll' and wait for > > > it to tell you something has happened. However this isn't likely to > > > be really useful. > > > It isn't the 'something has happened' event that is particularly > > > interesting. It is the 'the state is now X' information that is > > > interesting. > > > So you read the file to find out what the state is. If that isn't the > > > state you were looking for (or if you have finished responding to that > > > state), you poll/select, and then try again. > > > > > > > ok.. that makes sense. But in this case [open() and then poll()], should > > buffer->event() be initialized in sysfs_open()-->check_perm(), instead > > of fill_read_buffer() ? I think this scheme should work for [open(), read() > > and then poll()] also. > > We are definitely using poll in a non-standard way as it is generally > for "you can read now" or "you can write now", and we are (ab)using it > to say "there is new information". Note that this is essentially > copying the semantics of 'poll' on /proc/mounts. > > I would see poll returning as meaning "there is state information that > you haven't read". > > When you first open the file, you haven't read anything, so poll > should return immediately - which it currently does. If the current patch already follows the semantics you just described (ie if polled right after open, return immediately) then no problem. > After you read something, poll won't return again until there is > something new to be read. > > I think this is probably the best semantics, but if you try hard you > might be able to convince me otherwise... > > > > > But how about the other rule, ie once woken-up the user has to close, > > re-open and re-read the file. Can this also be avoided, as probably this is also > > not poll semantics? > > This semantic is part of sysfs. The way sysfs currently works, you > open a file, and read it, and that is the only value you see. If you > rewind and read again, you still get the old value, even if it > "should" have changed. The value is cached and the cache is never > refreshed. > > sysfs could be changed to flush the cache on rewind, but I don't know > that it is worth it. If it was changed, the poll functionality would > automatically do the right thing. > IMHO, it is worthy enough if it can allow "poll" to have usual semantics. Thanks Maneesh -- Maneesh Soni Linux Technology Center, IBM India Software Labs, Bangalore, India email: maneesh@in.ibm.com Phone: 91-80-25044990 ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2005-12-22 7:08 UTC | newest] Thread overview: 9+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2005-12-20 23:14 [PATCH - 2.6.15-rc5-mm3] Allow sysfs attribute files to be pollable Neil Brown 2005-12-20 23:28 ` Jesper Juhl 2005-12-21 0:45 ` Neil Brown 2005-12-21 8:46 ` Jesper Juhl 2005-12-21 13:49 ` Maneesh Soni 2005-12-22 3:43 ` Neil Brown 2005-12-22 5:47 ` Maneesh Soni 2005-12-22 6:02 ` Neil Brown 2005-12-22 7:05 ` Maneesh Soni
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox