linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [REVIEW][PATCH] Making poll generally useful for sysctls
       [not found]         ` <CAMOw1v4gszzV7F3z1m+RWEe9UDgR2Jrp9wX_w9z_1UkXT=FL5Q@mail.gmail.com>
@ 2012-03-24  0:25           ` Eric W. Biederman
  2012-03-24  6:20             ` Lucas De Marchi
  0 siblings, 1 reply; 6+ messages in thread
From: Eric W. Biederman @ 2012-03-24  0:25 UTC (permalink / raw)
  To: Lucas De Marchi; +Cc: Linux Kernel, linux-fsdevel


Lucas can you take a look at the patch below. I don't have a test
harness to test this but this should make poll generally useful
for all sysctls as well as fix the module removal case.

In particular if you could test to see if the code still works as
expected for the hostname and domainname cases that would be great.

I don't anticipate any problems but real world testing is always good.

 fs/proc/proc_sysctl.c   |   34 +++++++++++++++++-----------------
 include/linux/sysctl.h  |   22 +++-------------------
 kernel/utsname_sysctl.c |   14 ++++----------
 3 files changed, 24 insertions(+), 46 deletions(-)
---
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 47b474b..98fd5c9 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -16,13 +16,15 @@ static const struct inode_operations proc_sys_inode_operations;
 static const struct file_operations proc_sys_dir_file_operations;
 static const struct inode_operations proc_sys_dir_operations;
 
-void proc_sys_poll_notify(struct ctl_table_poll *poll)
+static inline void *proc_sys_poll_event(struct ctl_table *table)
 {
-	if (!poll)
-		return;
+	return (void *)(unsigned long)atomic_read(&table->event);
+}
 
-	atomic_inc(&poll->event);
-	wake_up_interruptible(&poll->wait);
+void proc_sys_poll_notify(struct ctl_table_header *head, struct ctl_table *table)
+{
+	atomic_inc(&table->event);
+	wake_up_interruptible(&head->wait);
 }
 
 static struct ctl_table root_table[] = {
@@ -169,6 +171,7 @@ static void init_header(struct ctl_table_header *head,
 		for (entry = table; entry->procname; entry++, node++) {
 			rb_init_node(&node->node);
 			node->header = head;
+			atomic_set(&entry->event, 1);
 		}
 	}
 }
@@ -240,6 +243,8 @@ static void start_unregistering(struct ctl_table_header *p)
 		/* anything non-NULL; we'll never dereference it */
 		p->unregistering = ERR_PTR(-EINVAL);
 	}
+	/* Don't let poll sleep forever on deleted entries */
+	wake_up_interruptible(&p->wait);
 	/*
 	 * do not remove from the list until nobody holds it; walking the
 	 * list in do_sysctl() relies on that.
@@ -505,6 +510,9 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 	error = table->proc_handler(table, write, buf, &res, ppos);
 	if (!error)
 		error = res;
+
+	if (write)
+		proc_sys_poll_notify(head, table);
 out:
 	sysctl_head_finish(head);
 
@@ -532,8 +540,7 @@ static int proc_sys_open(struct inode *inode, struct file *filp)
 	if (IS_ERR(head))
 		return PTR_ERR(head);
 
-	if (table->poll)
-		filp->private_data = proc_sys_poll_event(table->poll);
+	filp->private_data = proc_sys_poll_event(table);
 
 	sysctl_head_finish(head);
 
@@ -552,21 +559,14 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
 	if (IS_ERR(head))
 		return POLLERR | POLLHUP;
 
-	if (!table->proc_handler)
-		goto out;
-
-	if (!table->poll)
-		goto out;
-
 	event = (unsigned long)filp->private_data;
-	poll_wait(filp, &table->poll->wait, wait);
+	poll_wait(filp, &head->wait, wait);
 
-	if (event != atomic_read(&table->poll->event)) {
-		filp->private_data = proc_sys_poll_event(table->poll);
+	if (event != atomic_read(&table->event)) {
+		filp->private_data = proc_sys_poll_event(table);
 		ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
 	}
 
-out:
 	sysctl_head_finish(head);
 
 	return ret;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index c34b4c8..8647ee0 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -992,34 +992,17 @@ extern int proc_do_large_bitmap(struct ctl_table *, int,
  * cover common cases.
  */
 
-/* Support for userspace poll() to watch for changes */
-struct ctl_table_poll {
-	atomic_t event;
-	wait_queue_head_t wait;
-};
-
-static inline void *proc_sys_poll_event(struct ctl_table_poll *poll)
-{
-	return (void *)(unsigned long)atomic_read(&poll->event);
-}
-
-#define __CTL_TABLE_POLL_INITIALIZER(name) {				\
-	.event = ATOMIC_INIT(0),					\
-	.wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) }
-
-#define DEFINE_CTL_TABLE_POLL(name)					\
-	struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name)
 
 /* A sysctl table is an array of struct ctl_table: */
 struct ctl_table 
 {
 	const char *procname;		/* Text ID for /proc/sys, or zero */
 	void *data;
+	atomic_t event;
 	int maxlen;
 	umode_t mode;
 	struct ctl_table *child;	/* Deprecated */
 	proc_handler *proc_handler;	/* Callback for text formatting */
-	struct ctl_table_poll *poll;
 	void *extra1;
 	void *extra2;
 };
@@ -1042,6 +1025,7 @@ struct ctl_table_header
 		};
 		struct rcu_head rcu;
 	};
+	wait_queue_head_t wait;
 	struct completion *unregistering;
 	struct ctl_table *ctl_table_arg;
 	struct ctl_table_root *root;
@@ -1076,7 +1060,7 @@ struct ctl_path {
 
 #ifdef CONFIG_SYSCTL
 
-void proc_sys_poll_notify(struct ctl_table_poll *poll);
+void proc_sys_poll_notify(struct ctl_table_header *head, struct ctl_table *table);
 
 extern void setup_sysctl_set(struct ctl_table_set *p,
 	struct ctl_table_root *root,
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 63da38c..3e91a50 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -53,18 +53,12 @@ static int proc_do_uts_string(ctl_table *table, int write,
 	r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
 	put_uts(table, write, uts_table.data);
 
-	if (write)
-		proc_sys_poll_notify(table->poll);
-
 	return r;
 }
 #else
 #define proc_do_uts_string NULL
 #endif
 
-static DEFINE_CTL_TABLE_POLL(hostname_poll);
-static DEFINE_CTL_TABLE_POLL(domainname_poll);
-
 static struct ctl_table uts_kern_table[] = {
 	{
 		.procname	= "ostype",
@@ -93,7 +87,6 @@ static struct ctl_table uts_kern_table[] = {
 		.maxlen		= sizeof(init_uts_ns.name.nodename),
 		.mode		= 0644,
 		.proc_handler	= proc_do_uts_string,
-		.poll		= &hostname_poll,
 	},
 	{
 		.procname	= "domainname",
@@ -101,7 +94,6 @@ static struct ctl_table uts_kern_table[] = {
 		.maxlen		= sizeof(init_uts_ns.name.domainname),
 		.mode		= 0644,
 		.proc_handler	= proc_do_uts_string,
-		.poll		= &domainname_poll,
 	},
 	{}
 };
@@ -115,6 +107,8 @@ static struct ctl_table uts_root_table[] = {
 	{}
 };
 
+static struct ctl_table_header *uts_header;
+
 #ifdef CONFIG_PROC_SYSCTL
 /*
  * Notify userspace about a change in a certain entry of uts_kern_table,
@@ -124,13 +118,13 @@ void uts_proc_notify(enum uts_proc proc)
 {
 	struct ctl_table *table = &uts_kern_table[proc];
 
-	proc_sys_poll_notify(table->poll);
+	proc_sys_poll_notify(uts_header, table);
 }
 #endif
 
 static int __init utsname_sysctl_init(void)
 {
-	register_sysctl_table(uts_root_table);
+	uts_header = register_sysctl_table(uts_root_table);
 	return 0;
 }
 

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [REVIEW][PATCH] Making poll generally useful for sysctls
  2012-03-24  0:25           ` [REVIEW][PATCH] Making poll generally useful for sysctls Eric W. Biederman
@ 2012-03-24  6:20             ` Lucas De Marchi
  2012-03-24  7:58               ` Eric W. Biederman
  0 siblings, 1 reply; 6+ messages in thread
From: Lucas De Marchi @ 2012-03-24  6:20 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: Linux Kernel, linux-fsdevel

On Fri, Mar 23, 2012 at 9:25 PM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
>
> Lucas can you take a look at the patch below. I don't have a test
> harness to test this but this should make poll generally useful
> for all sysctls as well as fix the module removal case.
>
> In particular if you could test to see if the code still works as
> expected for the hostname and domainname cases that would be great.
>
> I don't anticipate any problems but real world testing is always good.
>
>  fs/proc/proc_sysctl.c   |   34 +++++++++++++++++-----------------
>  include/linux/sysctl.h  |   22 +++-------------------
>  kernel/utsname_sysctl.c |   14 ++++----------
>  3 files changed, 24 insertions(+), 46 deletions(-)
> ---
> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
> index 47b474b..98fd5c9 100644
> --- a/fs/proc/proc_sysctl.c
> +++ b/fs/proc/proc_sysctl.c
> @@ -16,13 +16,15 @@ static const struct inode_operations proc_sys_inode_operations;
>  static const struct file_operations proc_sys_dir_file_operations;
>  static const struct inode_operations proc_sys_dir_operations;
>
> -void proc_sys_poll_notify(struct ctl_table_poll *poll)
> +static inline void *proc_sys_poll_event(struct ctl_table *table)
>  {
> -       if (!poll)
> -               return;
> +       return (void *)(unsigned long)atomic_read(&table->event);
> +}
>
> -       atomic_inc(&poll->event);
> -       wake_up_interruptible(&poll->wait);
> +void proc_sys_poll_notify(struct ctl_table_header *head, struct ctl_table *table)
> +{
> +       atomic_inc(&table->event);
> +       wake_up_interruptible(&head->wait);
>  }
>
>  static struct ctl_table root_table[] = {
> @@ -169,6 +171,7 @@ static void init_header(struct ctl_table_header *head,
>                for (entry = table; entry->procname; entry++, node++) {
>                        rb_init_node(&node->node);
>                        node->header = head;
> +                       atomic_set(&entry->event, 1);
>                }
>        }
>  }
> @@ -240,6 +243,8 @@ static void start_unregistering(struct ctl_table_header *p)
>                /* anything non-NULL; we'll never dereference it */
>                p->unregistering = ERR_PTR(-EINVAL);
>        }
> +       /* Don't let poll sleep forever on deleted entries */
> +       wake_up_interruptible(&p->wait);
>        /*
>         * do not remove from the list until nobody holds it; walking the
>         * list in do_sysctl() relies on that.
> @@ -505,6 +510,9 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
>        error = table->proc_handler(table, write, buf, &res, ppos);
>        if (!error)
>                error = res;
> +
> +       if (write)
> +               proc_sys_poll_notify(head, table);
>  out:
>        sysctl_head_finish(head);
>
> @@ -532,8 +540,7 @@ static int proc_sys_open(struct inode *inode, struct file *filp)
>        if (IS_ERR(head))
>                return PTR_ERR(head);
>
> -       if (table->poll)
> -               filp->private_data = proc_sys_poll_event(table->poll);
> +       filp->private_data = proc_sys_poll_event(table);
>
>        sysctl_head_finish(head);
>
> @@ -552,21 +559,14 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
>        if (IS_ERR(head))
>                return POLLERR | POLLHUP;
>
> -       if (!table->proc_handler)
> -               goto out;
> -
> -       if (!table->poll)
> -               goto out;
> -
>        event = (unsigned long)filp->private_data;
> -       poll_wait(filp, &table->poll->wait, wait);
> +       poll_wait(filp, &head->wait, wait);
>
> -       if (event != atomic_read(&table->poll->event)) {
> -               filp->private_data = proc_sys_poll_event(table->poll);
> +       if (event != atomic_read(&table->event)) {
> +               filp->private_data = proc_sys_poll_event(table);
>                ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
>        }
>
> -out:
>        sysctl_head_finish(head);
>
>        return ret;
> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
> index c34b4c8..8647ee0 100644
> --- a/include/linux/sysctl.h
> +++ b/include/linux/sysctl.h
> @@ -992,34 +992,17 @@ extern int proc_do_large_bitmap(struct ctl_table *, int,
>  * cover common cases.
>  */
>
> -/* Support for userspace poll() to watch for changes */
> -struct ctl_table_poll {
> -       atomic_t event;
> -       wait_queue_head_t wait;
> -};
> -
> -static inline void *proc_sys_poll_event(struct ctl_table_poll *poll)
> -{
> -       return (void *)(unsigned long)atomic_read(&poll->event);
> -}
> -
> -#define __CTL_TABLE_POLL_INITIALIZER(name) {                           \
> -       .event = ATOMIC_INIT(0),                                        \
> -       .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) }
> -
> -#define DEFINE_CTL_TABLE_POLL(name)                                    \
> -       struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name)
>
>  /* A sysctl table is an array of struct ctl_table: */
>  struct ctl_table
>  {
>        const char *procname;           /* Text ID for /proc/sys, or zero */
>        void *data;
> +       atomic_t event;
>        int maxlen;
>        umode_t mode;
>        struct ctl_table *child;        /* Deprecated */
>        proc_handler *proc_handler;     /* Callback for text formatting */
> -       struct ctl_table_poll *poll;
>        void *extra1;
>        void *extra2;
>  };
> @@ -1042,6 +1025,7 @@ struct ctl_table_header
>                };
>                struct rcu_head rcu;
>        };
> +       wait_queue_head_t wait;

If you have a waitqueue per table instead of per entry you will get
spurious notifications when other entries change. The easiest way to
test this is to poll /proc/sys/kernel/hostname and change your
domainname.

I couldn't apply this patch to any tree (linus/master + my previous
patch, your master, 3.3 + my previous patch), so I couldn't test. On
top of your tree:

[lucas@vader kernel]$ git am /tmp/a.patch
Applying: Making poll generally useful for sysctls
error: patch failed: fs/proc/proc_sysctl.c:16
error: fs/proc/proc_sysctl.c: patch does not apply
error: patch failed: include/linux/sysctl.h:992
error: include/linux/sysctl.h: patch does not apply
Patch failed at 0001 Making poll generally useful for sysctls


Lucas De Marchi

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [REVIEW][PATCH] Making poll generally useful for sysctls
  2012-03-24  6:20             ` Lucas De Marchi
@ 2012-03-24  7:58               ` Eric W. Biederman
  2012-03-26 17:44                 ` Lucas De Marchi
  0 siblings, 1 reply; 6+ messages in thread
From: Eric W. Biederman @ 2012-03-24  7:58 UTC (permalink / raw)
  To: Lucas De Marchi; +Cc: Linux Kernel, linux-fsdevel

Lucas De Marchi <lucas.demarchi@profusion.mobi> writes:

>>  /* A sysctl table is an array of struct ctl_table: */
>>  struct ctl_table
>>  {
>>        const char *procname;           /* Text ID for /proc/sys, or zero */
>>        void *data;
>> +       atomic_t event;
>>        int maxlen;
>>        umode_t mode;
>>        struct ctl_table *child;        /* Deprecated */
>>        proc_handler *proc_handler;     /* Callback for text formatting */
>> -       struct ctl_table_poll *poll;
>>        void *extra1;
>>        void *extra2;
>>  };
>> @@ -1042,6 +1025,7 @@ struct ctl_table_header
>>                };
>>                struct rcu_head rcu;
>>        };
>> +       wait_queue_head_t wait;
>
> If you have a waitqueue per table instead of per entry you will get
> spurious notifications when other entries change. The easiest way to
> test this is to poll /proc/sys/kernel/hostname and change your
> domainname.

You will get spurious wakeups but not spurious notifications to
userspace since event is still per entry.

For my money that seemed a nice compromise of code simplicity, and
generality.  We could of course do something much closer to what
sysfs does and allocate and refcount something similar to your
ctl_table_poll when we have a ctl_table opened.  But that just looked
like a pain.

Of course we already have spurious notifications for hostname and
domainname when multiple uts namespaces are involved, but that
is a different problem.

> I couldn't apply this patch to any tree (linus/master + my previous
> patch, your master, 3.3 + my previous patch), so I couldn't test. On
> top of your tree:

How odd.  It should have applied cleanly to my tree and it applies
with just a two line offset top of Linus's latest with my tree merged
in.  Those two lines of offset coming from the two extra includes
that came in through the merge.

patch -p1 --dry-run <  ~/tmp/sysctl-poll-test.patch 
patching file fs/proc/proc_sysctl.c
Hunk #1 succeeded at 18 (offset 2 lines).
Hunk #2 succeeded at 173 (offset 2 lines).
Hunk #3 succeeded at 245 (offset 2 lines).
Hunk #4 succeeded at 512 (offset 2 lines).
Hunk #5 succeeded at 542 (offset 2 lines).
Hunk #6 succeeded at 561 (offset 2 lines).
patching file include/linux/sysctl.h
patching file kernel/utsname_sysctl.c

> [lucas@vader kernel]$ git am /tmp/a.patch
> Applying: Making poll generally useful for sysctls
> error: patch failed: fs/proc/proc_sysctl.c:16
> error: fs/proc/proc_sysctl.c: patch does not apply
> error: patch failed: include/linux/sysctl.h:992
> error: include/linux/sysctl.h: patch does not apply
> Patch failed at 0001 Making poll generally useful for sysctls

Here is rebased version of the patch just in case that helps.

---

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 21d836f..739615c 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -18,13 +18,15 @@ static const struct inode_operations proc_sys_inode_operations;
 static const struct file_operations proc_sys_dir_file_operations;
 static const struct inode_operations proc_sys_dir_operations;
 
-void proc_sys_poll_notify(struct ctl_table_poll *poll)
+static inline void *proc_sys_poll_event(struct ctl_table *table)
 {
-	if (!poll)
-		return;
+	return (void *)(unsigned long)atomic_read(&table->event);
+}
 
-	atomic_inc(&poll->event);
-	wake_up_interruptible(&poll->wait);
+void proc_sys_poll_notify(struct ctl_table_header *head, struct ctl_table *table)
+{
+	atomic_inc(&table->event);
+	wake_up_interruptible(&head->wait);
 }
 
 static struct ctl_table root_table[] = {
@@ -171,6 +173,7 @@ static void init_header(struct ctl_table_header *head,
 		for (entry = table; entry->procname; entry++, node++) {
 			rb_init_node(&node->node);
 			node->header = head;
+			atomic_set(&entry->event, 1);
 		}
 	}
 }
@@ -242,6 +245,8 @@ static void start_unregistering(struct ctl_table_header *p)
 		/* anything non-NULL; we'll never dereference it */
 		p->unregistering = ERR_PTR(-EINVAL);
 	}
+	/* Don't let poll sleep forever on deleted entries */
+	wake_up_interruptible(&p->wait);
 	/*
 	 * do not remove from the list until nobody holds it; walking the
 	 * list in do_sysctl() relies on that.
@@ -507,6 +512,9 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 	error = table->proc_handler(table, write, buf, &res, ppos);
 	if (!error)
 		error = res;
+
+	if (write)
+		proc_sys_poll_notify(head, table);
 out:
 	sysctl_head_finish(head);
 
@@ -534,8 +542,7 @@ static int proc_sys_open(struct inode *inode, struct file *filp)
 	if (IS_ERR(head))
 		return PTR_ERR(head);
 
-	if (table->poll)
-		filp->private_data = proc_sys_poll_event(table->poll);
+	filp->private_data = proc_sys_poll_event(table);
 
 	sysctl_head_finish(head);
 
@@ -554,21 +561,14 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
 	if (IS_ERR(head))
 		return POLLERR | POLLHUP;
 
-	if (!table->proc_handler)
-		goto out;
-
-	if (!table->poll)
-		goto out;
-
 	event = (unsigned long)filp->private_data;
-	poll_wait(filp, &table->poll->wait, wait);
+	poll_wait(filp, &head->wait, wait);
 
-	if (event != atomic_read(&table->poll->event)) {
-		filp->private_data = proc_sys_poll_event(table->poll);
+	if (event != atomic_read(&table->event)) {
+		filp->private_data = proc_sys_poll_event(table);
 		ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
 	}
 
-out:
 	sysctl_head_finish(head);
 
 	return ret;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index c34b4c8..8647ee0 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -992,34 +992,17 @@ extern int proc_do_large_bitmap(struct ctl_table *, int,
  * cover common cases.
  */
 
-/* Support for userspace poll() to watch for changes */
-struct ctl_table_poll {
-	atomic_t event;
-	wait_queue_head_t wait;
-};
-
-static inline void *proc_sys_poll_event(struct ctl_table_poll *poll)
-{
-	return (void *)(unsigned long)atomic_read(&poll->event);
-}
-
-#define __CTL_TABLE_POLL_INITIALIZER(name) {				\
-	.event = ATOMIC_INIT(0),					\
-	.wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) }
-
-#define DEFINE_CTL_TABLE_POLL(name)					\
-	struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name)
 
 /* A sysctl table is an array of struct ctl_table: */
 struct ctl_table 
 {
 	const char *procname;		/* Text ID for /proc/sys, or zero */
 	void *data;
+	atomic_t event;
 	int maxlen;
 	umode_t mode;
 	struct ctl_table *child;	/* Deprecated */
 	proc_handler *proc_handler;	/* Callback for text formatting */
-	struct ctl_table_poll *poll;
 	void *extra1;
 	void *extra2;
 };
@@ -1042,6 +1025,7 @@ struct ctl_table_header
 		};
 		struct rcu_head rcu;
 	};
+	wait_queue_head_t wait;
 	struct completion *unregistering;
 	struct ctl_table *ctl_table_arg;
 	struct ctl_table_root *root;
@@ -1076,7 +1060,7 @@ struct ctl_path {
 
 #ifdef CONFIG_SYSCTL
 
-void proc_sys_poll_notify(struct ctl_table_poll *poll);
+void proc_sys_poll_notify(struct ctl_table_header *head, struct ctl_table *table);
 
 extern void setup_sysctl_set(struct ctl_table_set *p,
 	struct ctl_table_root *root,
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 63da38c..3e91a50 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -53,18 +53,12 @@ static int proc_do_uts_string(ctl_table *table, int write,
 	r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
 	put_uts(table, write, uts_table.data);
 
-	if (write)
-		proc_sys_poll_notify(table->poll);
-
 	return r;
 }
 #else
 #define proc_do_uts_string NULL
 #endif
 
-static DEFINE_CTL_TABLE_POLL(hostname_poll);
-static DEFINE_CTL_TABLE_POLL(domainname_poll);
-
 static struct ctl_table uts_kern_table[] = {
 	{
 		.procname	= "ostype",
@@ -93,7 +87,6 @@ static struct ctl_table uts_kern_table[] = {
 		.maxlen		= sizeof(init_uts_ns.name.nodename),
 		.mode		= 0644,
 		.proc_handler	= proc_do_uts_string,
-		.poll		= &hostname_poll,
 	},
 	{
 		.procname	= "domainname",
@@ -101,7 +94,6 @@ static struct ctl_table uts_kern_table[] = {
 		.maxlen		= sizeof(init_uts_ns.name.domainname),
 		.mode		= 0644,
 		.proc_handler	= proc_do_uts_string,
-		.poll		= &domainname_poll,
 	},
 	{}
 };
@@ -115,6 +107,8 @@ static struct ctl_table uts_root_table[] = {
 	{}
 };
 
+static struct ctl_table_header *uts_header;
+
 #ifdef CONFIG_PROC_SYSCTL
 /*
  * Notify userspace about a change in a certain entry of uts_kern_table,
@@ -124,13 +118,13 @@ void uts_proc_notify(enum uts_proc proc)
 {
 	struct ctl_table *table = &uts_kern_table[proc];
 
-	proc_sys_poll_notify(table->poll);
+	proc_sys_poll_notify(uts_header, table);
 }
 #endif
 
 static int __init utsname_sysctl_init(void)
 {
-	register_sysctl_table(uts_root_table);
+	uts_header = register_sysctl_table(uts_root_table);
 	return 0;
 }
 

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [REVIEW][PATCH] Making poll generally useful for sysctls
  2012-03-24  7:58               ` Eric W. Biederman
@ 2012-03-26 17:44                 ` Lucas De Marchi
  2012-03-27  4:02                   ` Lucas De Marchi
  0 siblings, 1 reply; 6+ messages in thread
From: Lucas De Marchi @ 2012-03-26 17:44 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: Linux Kernel, linux-fsdevel

Hi Eric,

On Sat, Mar 24, 2012 at 4:58 AM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Lucas De Marchi <lucas.demarchi@profusion.mobi> writes:
>
>>>  /* A sysctl table is an array of struct ctl_table: */
>>>  struct ctl_table
>>>  {
>>>        const char *procname;           /* Text ID for /proc/sys, or zero */
>>>        void *data;
>>> +       atomic_t event;
>>>        int maxlen;
>>>        umode_t mode;
>>>        struct ctl_table *child;        /* Deprecated */
>>>        proc_handler *proc_handler;     /* Callback for text formatting */
>>> -       struct ctl_table_poll *poll;
>>>        void *extra1;
>>>        void *extra2;
>>>  };
>>> @@ -1042,6 +1025,7 @@ struct ctl_table_header
>>>                };
>>>                struct rcu_head rcu;
>>>        };
>>> +       wait_queue_head_t wait;
>>
>> If you have a waitqueue per table instead of per entry you will get
>> spurious notifications when other entries change. The easiest way to
>> test this is to poll /proc/sys/kernel/hostname and change your
>> domainname.
>
> You will get spurious wakeups but not spurious notifications to
> userspace since event is still per entry.

Yeah, indeed.

> For my money that seemed a nice compromise of code simplicity, and
> generality.  We could of course do something much closer to what
> sysfs does and allocate and refcount something similar to your
> ctl_table_poll when we have a ctl_table opened.  But that just looked
> like a pain.

I don't think we want spurious wakeups in favor of a slightly simpler code.


>
> Of course we already have spurious notifications for hostname and
> domainname when multiple uts namespaces are involved, but that
> is a different problem.
>
>> I couldn't apply this patch to any tree (linus/master + my previous
>> patch, your master, 3.3 + my previous patch), so I couldn't test. On
>> top of your tree:
>
> How odd.  It should have applied cleanly to my tree and it applies
> with just a two line offset top of Linus's latest with my tree merged
> in.  Those two lines of offset coming from the two extra includes
> that came in through the merge.
>
> patch -p1 --dry-run <  ~/tmp/sysctl-poll-test.patch
> patching file fs/proc/proc_sysctl.c
> Hunk #1 succeeded at 18 (offset 2 lines).
> Hunk #2 succeeded at 173 (offset 2 lines).
> Hunk #3 succeeded at 245 (offset 2 lines).
> Hunk #4 succeeded at 512 (offset 2 lines).
> Hunk #5 succeeded at 542 (offset 2 lines).
> Hunk #6 succeeded at 561 (offset 2 lines).
> patching file include/linux/sysctl.h
> patching file kernel/utsname_sysctl.c
>
>> [lucas@vader kernel]$ git am /tmp/a.patch
>> Applying: Making poll generally useful for sysctls
>> error: patch failed: fs/proc/proc_sysctl.c:16
>> error: fs/proc/proc_sysctl.c: patch does not apply
>> error: patch failed: include/linux/sysctl.h:992
>> error: include/linux/sysctl.h: patch does not apply
>> Patch failed at 0001 Making poll generally useful for sysctls
>
> Here is rebased version of the patch just in case that helps.

Now I can apply, but I can't boot: we hit a NULL dereference in
__wake_up_common(), called by proc_sys_poll_notify(). It seems that
you forgot to initialize the waitqueue with
__WAIT_QUEUE_HEAD_INITIALIZER().


Lucas De Marchi
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [REVIEW][PATCH] Making poll generally useful for sysctls
  2012-03-26 17:44                 ` Lucas De Marchi
@ 2012-03-27  4:02                   ` Lucas De Marchi
  2012-03-28  2:00                     ` Eric W. Biederman
  0 siblings, 1 reply; 6+ messages in thread
From: Lucas De Marchi @ 2012-03-27  4:02 UTC (permalink / raw)
  To: Lucas De Marchi; +Cc: Eric W. Biederman, Linux Kernel, linux-fsdevel

On Mon, 26 Mar 2012 14:44:50 -0300
Lucas De Marchi <lucas.demarchi@profusion.mobi> wrote:

> Hi Eric,
> 
> On Sat, Mar 24, 2012 at 4:58 AM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
> > Lucas De Marchi <lucas.demarchi@profusion.mobi> writes:
> >
> >>>  /* A sysctl table is an array of struct ctl_table: */
> >>>  struct ctl_table
> >>>  {
> >>>        const char *procname;           /* Text ID for /proc/sys, or zero */
> >>>        void *data;
> >>> +       atomic_t event;
> >>>        int maxlen;
> >>>        umode_t mode;
> >>>        struct ctl_table *child;        /* Deprecated */
> >>>        proc_handler *proc_handler;     /* Callback for text formatting */
> >>> -       struct ctl_table_poll *poll;
> >>>        void *extra1;
> >>>        void *extra2;
> >>>  };
> >>> @@ -1042,6 +1025,7 @@ struct ctl_table_header
> >>>                };
> >>>                struct rcu_head rcu;
> >>>        };
> >>> +       wait_queue_head_t wait;
> >>
> >> If you have a waitqueue per table instead of per entry you will get
> >> spurious notifications when other entries change. The easiest way to
> >> test this is to poll /proc/sys/kernel/hostname and change your
> >> domainname.
> >
> > You will get spurious wakeups but not spurious notifications to
> > userspace since event is still per entry.
> 
> Yeah, indeed.
> 
> > For my money that seemed a nice compromise of code simplicity, and
> > generality.  We could of course do something much closer to what
> > sysfs does and allocate and refcount something similar to your
> > ctl_table_poll when we have a ctl_table opened.  But that just looked
> > like a pain.
> 
> I don't think we want spurious wakeups in favor of a slightly simpler code.
> 
> 
> >
> > Of course we already have spurious notifications for hostname and
> > domainname when multiple uts namespaces are involved, but that
> > is a different problem.
> >
> >> I couldn't apply this patch to any tree (linus/master + my previous
> >> patch, your master, 3.3 + my previous patch), so I couldn't test. On
> >> top of your tree:
> >
> > How odd.  It should have applied cleanly to my tree and it applies
> > with just a two line offset top of Linus's latest with my tree merged
> > in.  Those two lines of offset coming from the two extra includes
> > that came in through the merge.
> >
> > patch -p1 --dry-run <  ~/tmp/sysctl-poll-test.patch
> > patching file fs/proc/proc_sysctl.c
> > Hunk #1 succeeded at 18 (offset 2 lines).
> > Hunk #2 succeeded at 173 (offset 2 lines).
> > Hunk #3 succeeded at 245 (offset 2 lines).
> > Hunk #4 succeeded at 512 (offset 2 lines).
> > Hunk #5 succeeded at 542 (offset 2 lines).
> > Hunk #6 succeeded at 561 (offset 2 lines).
> > patching file include/linux/sysctl.h
> > patching file kernel/utsname_sysctl.c
> >
> >> [lucas@vader kernel]$ git am /tmp/a.patch
> >> Applying: Making poll generally useful for sysctls
> >> error: patch failed: fs/proc/proc_sysctl.c:16
> >> error: fs/proc/proc_sysctl.c: patch does not apply
> >> error: patch failed: include/linux/sysctl.h:992
> >> error: include/linux/sysctl.h: patch does not apply
> >> Patch failed at 0001 Making poll generally useful for sysctls
> >
> > Here is rebased version of the patch just in case that helps.
> 
> Now I can apply, but I can't boot: we hit a NULL dereference in
> __wake_up_common(), called by proc_sys_poll_notify(). It seems that
> you forgot to initialize the waitqueue with
> __WAIT_QUEUE_HEAD_INITIALIZER().

Trying again I came up with the following simple oneliner on top
of your patch. With it I can boot successfully and poll any file
under /proc/sys (I didn't try many, but there's no reason it would not
work).

The nice part of this patch is that suddenly all sysctl entries can be
monitored through poll() instead of having to add adhoc code. However
that spurious wake ups are not very nice. Eric, what if we keep the
waitqueue inside the entry and initialize it there, just like we did
for ->event? This would mean iterating through them on unregister
though.

Lucas De Marchi

--------------------------

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 739615c..85ae957 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -168,6 +168,7 @@ static void init_header(struct ctl_table_header *head,
 	head->set = set;
 	head->parent = NULL;
 	head->node = node;
+	init_waitqueue_head(&head->wait);
 	if (node) {
 		struct ctl_table *entry;
 		for (entry = table; entry->procname; entry++, node++) {
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [REVIEW][PATCH] Making poll generally useful for sysctls
  2012-03-27  4:02                   ` Lucas De Marchi
@ 2012-03-28  2:00                     ` Eric W. Biederman
  0 siblings, 0 replies; 6+ messages in thread
From: Eric W. Biederman @ 2012-03-28  2:00 UTC (permalink / raw)
  To: Lucas De Marchi; +Cc: Linux Kernel, linux-fsdevel

Lucas De Marchi <lucas.demarchi@profusion.mobi> writes:

> On Mon, 26 Mar 2012 14:44:50 -0300
> Lucas De Marchi <lucas.demarchi@profusion.mobi> wrote:
>
>> Hi Eric,
>> 
>> On Sat, Mar 24, 2012 at 4:58 AM, Eric W. Biederman
>> <ebiederm@xmission.com> wrote:
>>
>> > Here is rebased version of the patch just in case that helps.
>> 
>> Now I can apply, but I can't boot: we hit a NULL dereference in
>> __wake_up_common(), called by proc_sys_poll_notify(). It seems that
>> you forgot to initialize the waitqueue with
>> __WAIT_QUEUE_HEAD_INITIALIZER().
>
> Trying again I came up with the following simple oneliner on top
> of your patch. With it I can boot successfully and poll any file
> under /proc/sys (I didn't try many, but there's no reason it would not
> work).

Thanks. I feel silly for that pretty obvious oversight.

There is another bug I am seeing in the sysctl poll code.  It needs to
be .read that updates filp->private_data to event, and not .poll.
Otherwise we have what should be a level triggered interface acting like
an edge triggered interface.

Any chance I could get you to cook up a patch for that bug?

> The nice part of this patch is that suddenly all sysctl entries can be
> monitored through poll() instead of having to add adhoc code. However
> that spurious wake ups are not very nice. Eric, what if we keep the
> waitqueue inside the entry and initialize it there, just like we did
> for ->event? This would mean iterating through them on unregister
> though.

Iterating through the all of the table entries on unregister is
not a problem, some code paths for namespace support are doing that
already.  Putting the wait queue in struct ctl_table is something
we can't do.  struct ctl_table can be freed before the final fput
on a file descriptor and fs/select.c will try to remove freed
wait queue heads, which would get us back to where we came in.

What we can do is use struct ctl_node instead. Either bloating struct
ctl_node or adding putting a pointer to struct ctl_table_poll.  The
only tricky part is that I don't believe I have any size information
on how many ctl_node entries I have.  So that information would have
to be gathered and kept as well.

After having looked at how large wait_queue_head_t I am reluctant
to pay the price for keeping a wait queue for nodes that we are not
polling.  So I am thinking allocate in .poll and free in unregister,
but I don't think I am ambitious enough to code that up.

Eric

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-03-28  2:00 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20120313005855.GA24639@redhat.com>
     [not found] ` <CA+55aFwb2WRRamWPVY-ETDG_LO0QR9C8epk8tSAJVZhPFSNzhA@mail.gmail.com>
     [not found]   ` <20120318192755.GB6589@ZenIV.linux.org.uk>
     [not found]     ` <CAMOw1v4qa_AxJgc+qAsY6M=K--2JDvO-+CNj8OwKuE7piRViGw@mail.gmail.com>
     [not found]       ` <m1vclw1fx8.fsf@fess.ebiederm.org>
     [not found]         ` <CAMOw1v4gszzV7F3z1m+RWEe9UDgR2Jrp9wX_w9z_1UkXT=FL5Q@mail.gmail.com>
2012-03-24  0:25           ` [REVIEW][PATCH] Making poll generally useful for sysctls Eric W. Biederman
2012-03-24  6:20             ` Lucas De Marchi
2012-03-24  7:58               ` Eric W. Biederman
2012-03-26 17:44                 ` Lucas De Marchi
2012-03-27  4:02                   ` Lucas De Marchi
2012-03-28  2:00                     ` Eric W. Biederman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).