[patch] kernel events layer

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [patch] kernel events layer
@ 2004-07-23 17:41 Robert Love
  2004-07-23 18:25 ` Tim Hockin
                   ` (4 more replies)
  0 siblings, 5 replies; 41+ messages in thread
From: Robert Love @ 2004-07-23 17:41 UTC (permalink / raw)
  To: akpm, linux-kernel

Andrew, et al,

OK, Kernel Summit and my OLS talk are over, so here are the goods.

Following patch implements the kernel events layer, which is a simple
wrapper around netlink to allow asynchronous communication from the
kernel to user-space of events, errors, logging, and so on.

Current intention is to hook the kernel via this interface into D-BUS,
although the patch is intended to be agnostic to any of that and policy
free.

D-BUS can be found here:

	http://dbus.freedesktop.org/

Other user-space utilities (including code to utilize this) can be found
here:

	http://vrfy.org/projects/kdbusd/

This patch only implements a single event, processor temperature
detection.  Other useful events include md sync, filesystem mount,
driver errors, etc.  We can add those later, on a case-by-case basis.  I
would like to be more careful with adding events than we are with adding
printk's, with some aim toward a stable interface.

Usage of the new interface is simple:

	send_kmessage(group, interface, message, ...)

Credit to Arjan for the initial implementation, Kay Sievers for some
updates, and the netlink code.

	Robert Love


Kernel to user-space communication layer using netlink
X-Signed-Off-By: Robert Love <rml@ximian.com>

 arch/i386/kernel/cpu/mcheck/p4.c |    9 ++
 include/linux/kmessage.h         |   15 ++++
 include/linux/netlink.h          |    1 
 kernel/Makefile                  |    2 
 kernel/kmessage.c                |  134 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 159 insertions(+), 2 deletions(-)

diff -urN linux-2.6.8-rc2/arch/i386/kernel/cpu/mcheck/p4.c linux/arch/i386/kernel/cpu/mcheck/p4.c
--- linux-2.6.8-rc2/arch/i386/kernel/cpu/mcheck/p4.c	2004-06-16 01:19:37.000000000 -0400
+++ linux/arch/i386/kernel/cpu/mcheck/p4.c	2004-07-23 11:56:43.443944768 -0400
@@ -9,6 +9,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/smp.h>
+#include <linux/kmessage.h>
 
 #include <asm/processor.h> 
 #include <asm/system.h>
@@ -59,9 +60,15 @@
 	if (l & 0x1) {
 		printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
 		printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
-				cpu);
+			cpu);
+		send_kmessage(KMSG_POWER,
+			"/org/kernel/devices/system/cpu/temperature", "high",
+			"Cpu: %d\n", cpu);
 	} else {
 		printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+		send_kmessage(KMSG_POWER,
+			"/org/kernel/devices/system/cpu/temperature", "normal",
+			"Cpu: %d\n", cpu);
 	}
 }
 
diff -urN linux-2.6.8-rc2/include/linux/kmessage.h linux/include/linux/kmessage.h
--- linux-2.6.8-rc2/include/linux/kmessage.h	1969-12-31 19:00:00.000000000 -0500
+++ linux/include/linux/kmessage.h	2004-07-23 11:56:43.443944768 -0400
@@ -0,0 +1,15 @@
+#ifndef _LINUX_KMESSAGE_H
+#define _LINUX_KMESSAGE_H
+
+void send_kmessage(int type, const char *object, const char *signal,
+		   const char *fmt, ...);
+
+/* kmessage types */
+
+#define KMSG_GENERAL	0
+#define KMSG_STORAGE	1
+#define KMSG_POWER	2
+#define KMSG_FS		3
+#define KMSG_HOTPLUG	4
+
+#endif	/* _LINUX_KMESSAGE_H */
diff -urN linux-2.6.8-rc2/include/linux/netlink.h linux/include/linux/netlink.h
--- linux-2.6.8-rc2/include/linux/netlink.h	2004-07-20 15:40:14.000000000 -0400
+++ linux/include/linux/netlink.h	2004-07-23 11:56:43.490937624 -0400
@@ -17,6 +17,7 @@
 #define NETLINK_ROUTE6		11	/* af_inet6 route comm channel */
 #define NETLINK_IP6_FW		13
 #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
+#define NETLINK_KMESSAGE	15	/* Kernel messages to userspace */
 #define NETLINK_TAPBASE		16	/* 16 to 31 are ethertap */
 
 #define MAX_LINKS 32		
diff -urN linux-2.6.8-rc2/kernel/kmessage.c linux/kernel/kmessage.c
--- linux-2.6.8-rc2/kernel/kmessage.c	1969-12-31 19:00:00.000000000 -0500
+++ linux/kernel/kmessage.c	2004-07-23 11:58:25.704398816 -0400
@@ -0,0 +1,134 @@
+/*
+ * Kernel event delivery over a netlink socket
+ * 
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *  Authors:
+ *	Arjan van de Ven	<arjanv@redhat.com>
+ *	Kay Sievers		<kay.sievers@vrfy.org>
+ *	Robert Love		<rml@novell.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/string.h>
+#include <linux/kmessage.h>
+#include <net/sock.h>
+
+/* There is one global netlink socket */
+static struct sock *kmessage_sock = NULL;
+
+static void netlink_receive(struct sock *sk, int len)
+{
+	struct sk_buff *skb;
+
+	 /* just drop them all */
+	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL)
+		kfree_skb(skb);
+}
+
+static int netlink_send(__u32 groups, const char *buffer, int len)
+{
+	struct sk_buff *skb;
+	char *data_start;
+
+	if (!kmessage_sock)
+		return -EIO;
+
+	if (!buffer)
+		return -EINVAL;
+
+	if (len > PAGE_SIZE)
+		return -EINVAL;
+
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+	data_start = skb_put(skb, len);
+	memcpy(data_start, buffer, len);
+
+	return netlink_broadcast(kmessage_sock, skb, 0, groups, GFP_ATOMIC);
+}
+
+void send_kmessage(int type, const char *object, const char *signal,
+		   const char *fmt, ...)
+{
+	char *buffer;
+	int len;
+	int ret;
+
+	if (!object)
+		return;
+
+	if (!signal)
+		return;
+
+	if (strlen(object) > PAGE_SIZE)
+		return;
+
+	buffer = (char *) get_zeroed_page(GFP_ATOMIC);
+	if (!buffer)
+		return;
+
+	len = sprintf(buffer, "From: %s\n", object);
+	len += sprintf(&buffer[len], "Signal: %s\n", signal);
+
+	/* possible anxiliary data */
+	if (fmt) {
+		va_list args;
+
+		va_start(args, fmt);
+		len += vscnprintf(&buffer[len], PAGE_SIZE-len-1, fmt, args);
+		va_end(args);
+	}
+	buffer[len++] = '\0';
+
+	ret = netlink_send((1 << type), buffer, len);
+	free_page((unsigned long) buffer);
+}
+
+EXPORT_SYMBOL_GPL(send_kmessage);
+
+static int kmessage_init(void)
+{
+	kmessage_sock = netlink_kernel_create(NETLINK_KMESSAGE,
+					      netlink_receive);
+
+	if (!kmessage_sock) {
+		printk(KERN_ERR "kmessage: "
+		       "unable to create netlink socket; aborting\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void kmessage_exit(void)
+{
+	if (kmessage_sock)
+		sock_release(kmessage_sock->sk_socket);
+}
+
+MODULE_DESCRIPTION("Kernel message delivery to userspace");
+MODULE_AUTHOR("Arjan van de Ven <arjanv@redhat.com>");
+MODULE_LICENSE("GPL");
+
+module_init(kmessage_init);
+module_exit(kmessage_exit);
diff -urN linux-2.6.8-rc2/kernel/Makefile linux/kernel/Makefile
--- linux-2.6.8-rc2/kernel/Makefile	2004-07-20 15:40:14.000000000 -0400
+++ linux/kernel/Makefile	2004-07-23 11:56:43.490937624 -0400
@@ -3,7 +3,7 @@
 #
 
 obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
-	    exit.o itimer.o time.o softirq.o resource.o \
+	    exit.o itimer.o time.o softirq.o resource.o kmessage.o\
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o intermodule.o extable.o params.o posix-timers.o \



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-23 17:41 [patch] kernel events layer Robert Love
@ 2004-07-23 18:25 ` Tim Hockin
  2004-07-23 18:31 ` Muli Ben-Yehuda
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 41+ messages in thread
From: Tim Hockin @ 2004-07-23 18:25 UTC (permalink / raw)
  To: Robert Love

On Fri, Jul 23, 2004 at 01:41:57PM -0400, Robert Love wrote:
> OK, Kernel Summit and my OLS talk are over, so here are the goods.

It's good to see something concrete in this vein.  Is this interface going
to be intended for things like error states?  The first thing that jumps
to mind is all the evlog stuff that was argued about last year.

Is this interface intended to be used in the name of driver "hardening"
and fault handling?

> +		send_kmessage(KMSG_POWER,
> +			"/org/kernel/devices/system/cpu/temperature", "high",
> +			"Cpu: %d\n", cpu);

I have to ask why the path needs to include /org ?  It seems pretty much
like useless stuff.  In fact, why does it need to specify /org/kernel?
Userspace can safely assume that anything that comes out of the netlink
socket is from the kernel, no?

If userspace is going to use this "object" path as a globalish identifier,
it can prepend hatever it needs.  Really, it should prepend some sort of
network id, if this stuff is ever going to find a network, so eliminating
the /org/kernel might just be precedent.

At worst case, why type it in every call to send_kmessage?  If they ALL
start with /org/kernel, just add that inside the send_kmessage() guts.

Further, if you want to eliminate stupid typo errors, these paths cn be
further macro-ized.

	send_kmessage(KMSG_POWER, KMSUBSYS_CPU, "temperature", "high",
		"Cpu: %d", cpu);

KMSUBSYS_CPU can be recognized and expanded to "/devices/system/cpu".
That way, no one ever misspels it, leaving you stuck with it.  Also note
that requiring the caller to pass a '\n' seems pretty dumb.

Just my initial thoughts.  I need to read the paper, still.

Tim

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-23 17:41 [patch] kernel events layer Robert Love
  2004-07-23 18:25 ` Tim Hockin
@ 2004-07-23 18:31 ` Muli Ben-Yehuda
  2004-07-23 18:35   ` Robert Love
  2004-07-23 21:32 ` Dan Aloni
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 41+ messages in thread
From: Muli Ben-Yehuda @ 2004-07-23 18:31 UTC (permalink / raw)
  To: Robert Love; +Cc: akpm, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1095 bytes --]

On Fri, Jul 23, 2004 at 01:41:57PM -0400, Robert Love wrote:
> +void send_kmessage(int type, const char *object, const char *signal,
> +		   const char *fmt, ...)
> +{
> +	char *buffer;
> +	int len;
> +	int ret;
> +
> +	if (!object)
> +		return;
> +
> +	if (!signal)
> +		return;
> +
> +	if (strlen(object) > PAGE_SIZE)
> +		return;
> +
> +	buffer = (char *) get_zeroed_page(GFP_ATOMIC);
> +	if (!buffer)
> +		return;
> +
> +	len = sprintf(buffer, "From: %s\n", object);
> +	len += sprintf(&buffer[len], "Signal: %s\n", signal);
> +
> +	/* possible anxiliary data */
> +	if (fmt) {
> +		va_list args;
> +
> +		va_start(args, fmt);
> +		len += vscnprintf(&buffer[len], PAGE_SIZE-len-1, fmt, args);
> +		va_end(args);
> +	}
> +	buffer[len++] = '\0';
> +
> +	ret = netlink_send((1 << type), buffer, len);

Should we be ignoring the return value of netlink_send here, or
propogating a possible error to the callers?

> +	free_page((unsigned long) buffer);
> +}

Cheers, 
Muli 
-- 
Muli Ben-Yehuda
http://www.mulix.org | http://mulix.livejournal.com/


[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-23 18:31 ` Muli Ben-Yehuda
@ 2004-07-23 18:35   ` Robert Love
  0 siblings, 0 replies; 41+ messages in thread
From: Robert Love @ 2004-07-23 18:35 UTC (permalink / raw)
  To: Muli Ben-Yehuda; +Cc: akpm, linux-kernel

On Fri, 2004-07-23 at 21:31 +0300, Muli Ben-Yehuda wrote:

> Should we be ignoring the return value of netlink_send here, or
> propogating a possible error to the callers?

If the callers want it, we can definitely return it.  Sure.

	Robert Love



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-23 17:41 [patch] kernel events layer Robert Love
  2004-07-23 18:25 ` Tim Hockin
  2004-07-23 18:31 ` Muli Ben-Yehuda
@ 2004-07-23 21:32 ` Dan Aloni
  2004-07-24  2:47   ` Robert Love
  2004-07-24  3:02 ` Michael Clark
  2004-07-24  3:03 ` Andrew Morton
  4 siblings, 1 reply; 41+ messages in thread
From: Dan Aloni @ 2004-07-23 21:32 UTC (permalink / raw)
  To: Robert Love; +Cc: akpm, linux-kernel

On Fri, Jul 23, 2004 at 01:41:57PM -0400, Robert Love wrote:

> +void send_kmessage(int type, const char *object, const char *signal,
> +		   const char *fmt, ...)
> +{
> +	char *buffer;
> +	int len;
> +	int ret;
> +
> +	if (!object)
> +		return;
> +
> +	if (!signal)
> +		return;
> +
> +	if (strlen(object) > PAGE_SIZE)
> +		return;
> +
> +	buffer = (char *) get_zeroed_page(GFP_ATOMIC);
> +	if (!buffer)
> +		return;
> +
> +	len = sprintf(buffer, "From: %s\n", object);
> +	len += sprintf(&buffer[len], "Signal: %s\n", signal);

IMHO you either should not assume anything about the length of the object 
string, _or_ do the complete safe string assembly e.g:

        len += snprintf(buffer, PAGE_SIZE, "From: %s\nSignal: %s\n", 
                        object, signal);


-- 
Dan Aloni
da-x@colinux.org

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  3:03 ` Andrew Morton
@ 2004-07-24  2:14   ` Robert Love
  2004-07-24  5:15     ` Chris Wedgwood
  2004-07-24  3:11   ` [patch] kernel events layer, updated Robert Love
  1 sibling, 1 reply; 41+ messages in thread
From: Robert Love @ 2004-07-24  2:14 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

On Fri, 2004-07-23 at 20:03 -0700, Andrew Morton wrote:

> OK.  Can you give us a ballpark estimate of how many send_kmessage() calls
> we're likely to have in two years time?

Predicting the future is hard, but I suspect this number to be small.
Let's say 10 in core kernel code?

If this takes off as a solution for error reporting, that number will be
much larger in drivers.

> - The GFP_ATOMIC page allocation is unfortunate.  Please pass in the
>   gfp_flags, or change it to GFP_KERNEL and provide a separate
>   send_kmessage_atomic()?

I like the latter.

> - Methinks the kernel won't build if the user set CONFIG_NETLINK_DEV=n

I will test and fix.

> - When fixing that up, please add CONFIG_KERNEL_EVENTS or whatever,
>   provide the appropriate do-nothing stubs if it's disabled.  For the tiny
>   systems.

OK.

> - send_kmessage() is racy against kmessage_exit().  I'm not sure that's
>   worth fixing - if you agree then it would set minds at ease to simply
>   remove kmessage_exit().

The race is definitely not worth fixing.  If it bothers you, then
removing kmessage_exit() makes sense.  I will do that.

> - This code will never work as a module, so why include the
>   MODULE_AUTHOR/DESCRIPTION/etc?

Can be removed.

> - What led to the decision to export send_kmessage() to only GPL modules?

I am a fanatic about freedom?  Seriously, I will talk to Arjan about
changing it.  I do not care either way.

Updated patch forthcoming.

Thanks,

	Robert Love



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-23 21:32 ` Dan Aloni
@ 2004-07-24  2:47   ` Robert Love
  2004-07-24  4:42     ` Keith Owens
  0 siblings, 1 reply; 41+ messages in thread
From: Robert Love @ 2004-07-24  2:47 UTC (permalink / raw)
  To: Dan Aloni; +Cc: akpm, linux-kernel

On Sat, 2004-07-24 at 00:32 +0300, Dan Aloni wrote:

> IMHO you either should not assume anything about the length of the object 
> string, _or_ do the complete safe string assembly e.g:
> 
>         len += snprintf(buffer, PAGE_SIZE, "From: %s\nSignal: %s\n", 
>                         object, signal);
> 

Fair enough.  I guess what we want, exactly, is:

 len = snprintf(buffer, PAGE_SIZE, "From: %s\n", object);
 len += snprintf(&buffer[len], PAGE_SIZE - len "Signal: %s\n", signal);

I will add that to the next revision.

	Robert Love



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-23 17:41 [patch] kernel events layer Robert Love
                   ` (2 preceding siblings ...)
  2004-07-23 21:32 ` Dan Aloni
@ 2004-07-24  3:02 ` Michael Clark
  2004-07-24  3:14   ` Robert Love
  2004-07-24  3:03 ` Andrew Morton
  4 siblings, 1 reply; 41+ messages in thread
From: Michael Clark @ 2004-07-24  3:02 UTC (permalink / raw)
  To: Robert Love; +Cc: akpm, linux-kernel

On 07/24/04 01:41, Robert Love wrote:

> @@ -59,9 +60,15 @@
>  	if (l & 0x1) {
>  		printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
>  		printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
> -				cpu);
> +			cpu);
> +		send_kmessage(KMSG_POWER,
> +			"/org/kernel/devices/system/cpu/temperature", "high",
> +			"Cpu: %d\n", cpu);

Should there be some sharing with the device naming of sysfs or are
will we introduce a new one? ie sysfs uses:

devices/system/cpu/cpu0/<blah>

Would it be a better way to have a version that takes struct kobject
to enforce consistency in the device naming scheme. This also means
userspace would automatically know where to look in /sys if futher
info was needed.

Question is does it make sense to use this infrastructure without sysfs
as hald, etc require it. ie depends CONFIG_SYSFS

Perhaps a send_kmessage_kobject ?

~mc

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-23 17:41 [patch] kernel events layer Robert Love
                   ` (3 preceding siblings ...)
  2004-07-24  3:02 ` Michael Clark
@ 2004-07-24  3:03 ` Andrew Morton
  2004-07-24  2:14   ` Robert Love
  2004-07-24  3:11   ` [patch] kernel events layer, updated Robert Love
  4 siblings, 2 replies; 41+ messages in thread
From: Andrew Morton @ 2004-07-24  3:03 UTC (permalink / raw)
  To: Robert Love; +Cc: linux-kernel

Robert Love <rml@ximian.com> wrote:
>
> Andrew, et al,
> 
> OK, Kernel Summit and my OLS talk are over, so here are the goods.
> 
> Following patch implements the kernel events layer, which is a simple
> wrapper around netlink to allow asynchronous communication from the
> kernel to user-space of events, errors, logging, and so on.
> 
> Current intention is to hook the kernel via this interface into D-BUS,
> although the patch is intended to be agnostic to any of that and policy
> free.
> 
> D-BUS can be found here:
> 
> 	http://dbus.freedesktop.org/
> 
> Other user-space utilities (including code to utilize this) can be found
> here:
> 
> 	http://vrfy.org/projects/kdbusd/
> 
> This patch only implements a single event, processor temperature
> detection.  Other useful events include md sync, filesystem mount,
> driver errors, etc.  We can add those later, on a case-by-case basis.  I
> would like to be more careful with adding events than we are with adding
> printk's, with some aim toward a stable interface.

OK.  Can you give us a ballpark estimate of how many send_kmessage() calls
we're likely to have in two years time?

> Usage of the new interface is simple:
> 
> 	send_kmessage(group, interface, message, ...)
> 
> Credit to Arjan for the initial implementation, Kay Sievers for some
> updates, and the netlink code.

- The GFP_ATOMIC page allocation is unfortunate.  Please pass in the
  gfp_flags, or change it to GFP_KERNEL and provide a separate
  send_kmessage_atomic()?

- Methinks the kernel won't build if the user set CONFIG_NETLINK_DEV=n

- When fixing that up, please add CONFIG_KERNEL_EVENTS or whatever,
  provide the appropriate do-nothing stubs if it's disabled.  For the tiny
  systems.

- send_kmessage() is racy against kmessage_exit().  I'm not sure that's
  worth fixing - if you agree then it would set minds at ease to simply
  remove kmessage_exit().

- This code will never work as a module, so why include the
  MODULE_AUTHOR/DESCRIPTION/etc?

- What led to the decision to export send_kmessage() to only GPL modules?


^ permalink raw reply	[flat|nested] 41+ messages in thread

* [patch] kernel events layer, updated
  2004-07-24  3:03 ` Andrew Morton
  2004-07-24  2:14   ` Robert Love
@ 2004-07-24  3:11   ` Robert Love
  2004-07-24  7:58     ` Deepak Saxena
  1 sibling, 1 reply; 41+ messages in thread
From: Robert Love @ 2004-07-24  3:11 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, zaitcev

OK, Andrew, here we go again.  I met all the issues except the GPL-only
export.  Waiting to here from Arjan.  I am all for it.

To rehash, the following patch implements the kernel events layer, which
is a simple wrapper around netlink to allow asynchronous communication
from the kernel to user-space of events, errors, logging, and so on.  A
D-BUS daemon is to link the kernel events into the D-BUS.

Changes since the last post:

	- rename everything to kevent.  it is shorter and sweeter.
	- use GFP_KERNEL
	- add send_kevent_atomic, which uses GFP_ATOMIC
	- add CONFIG_KERNEL_EVENTS
	- depend on CONFIG_NET (which implies netlink:
	  CONFIG_NETLINK_DEV is for the old device node interface,
	  Andrew)
	- remove MODULE_* cruft
	- remove kmessage_exit() to pacify concerns of race
	- make send_kevent and send_kevent_atomic return error code
	- use snprintf, not sprintf, for building the buffer

Patch is against 2.6.8-rc2.  Tested both with and without
CONFIG_KERNEL_EVENTS.

Feel the love,

	Robert Love


Kernel to user-space communication layer using netlink
X-Signed-Off-By: Robert Love <rml@ximian.com>

 arch/i386/kernel/cpu/mcheck/p4.c |    9 ++
 include/linux/kevent.h           |   37 ++++++++++
 include/linux/netlink.h          |    1 
 init/Kconfig                     |   14 +++
 kernel/Makefile                  |    1 
 kernel/kevent.c                  |  141 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 202 insertions(+), 1 deletion(-)

diff -urN linux-2.6.8-rc2/arch/i386/kernel/cpu/mcheck/p4.c linux/arch/i386/kernel/cpu/mcheck/p4.c
--- linux-2.6.8-rc2/arch/i386/kernel/cpu/mcheck/p4.c	2004-06-16 01:19:37.000000000 -0400
+++ linux/arch/i386/kernel/cpu/mcheck/p4.c	2004-07-23 22:55:20.000000000 -0400
@@ -9,6 +9,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/smp.h>
+#include <linux/kevent.h>
 
 #include <asm/processor.h> 
 #include <asm/system.h>
@@ -59,9 +60,15 @@
 	if (l & 0x1) {
 		printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
 		printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
-				cpu);
+			cpu);
+		send_kevent(KMSG_POWER,
+			"/org/kernel/devices/system/cpu/temperature", "high",
+			"Cpu: %d\n", cpu);
 	} else {
 		printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+		send_kevent(KMSG_POWER,
+			"/org/kernel/devices/system/cpu/temperature", "normal",
+			"Cpu: %d\n", cpu);
 	}
 }
 
diff -urN linux-2.6.8-rc2/include/linux/kevent.h linux/include/linux/kevent.h
--- linux-2.6.8-rc2/include/linux/kevent.h	1969-12-31 19:00:00.000000000 -0500
+++ linux/include/linux/kevent.h	2004-07-23 22:53:45.000000000 -0400
@@ -0,0 +1,37 @@
+#ifndef _LINUX_KEVENT_H
+#define _LINUX_KEVENT_H
+
+#include <linux/config.h>
+
+/* kevent types - these are used as the multicast group */
+#define KMSG_GENERAL	0
+#define KMSG_STORAGE	1
+#define KMSG_POWER	2
+#define KMSG_FS		3
+#define KMSG_HOTPLUG	4
+
+#ifdef CONFIG_KERNEL_EVENTS
+
+int send_kevent(int type, const char *object, const char *signal,
+		const char *fmt, ...);
+
+int send_kevent_atomic(int type, const char *object, const char *signal,
+		const char *fmt, ...);
+
+#else
+
+static inline int send_kevent(int type, const char *object, const char *signal,
+		const char *fmt, ...)
+{
+	return 0;
+}
+
+static inline int send_kevent_atomic(int type, const char *object,
+		const char *signal, const char *fmt, ...)
+{
+	return 0;
+}
+
+#endif /* ! CONFIG_KERNEL_EVENTS */
+
+#endif	/* _LINUX_KEVENT_H */
diff -urN linux-2.6.8-rc2/include/linux/netlink.h linux/include/linux/netlink.h
--- linux-2.6.8-rc2/include/linux/netlink.h	2004-07-23 22:18:04.000000000 -0400
+++ linux/include/linux/netlink.h	2004-07-23 22:21:18.000000000 -0400
@@ -17,6 +17,7 @@
 #define NETLINK_ROUTE6		11	/* af_inet6 route comm channel */
 #define NETLINK_IP6_FW		13
 #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
+#define NETLINK_KMESSAGE	15	/* Kernel messages to userspace */
 #define NETLINK_TAPBASE		16	/* 16 to 31 are ethertap */
 
 #define MAX_LINKS 32		
diff -urN linux-2.6.8-rc2/init/Kconfig linux/init/Kconfig
--- linux-2.6.8-rc2/init/Kconfig	2004-07-23 22:18:04.000000000 -0400
+++ linux/init/Kconfig	2004-07-23 22:44:26.000000000 -0400
@@ -160,6 +160,20 @@
 	  logging of avc messages output).  Does not do system-call
 	  auditing without CONFIG_AUDITSYSCALL.
 
+config KERNEL_EVENTS
+	bool "Kernel Events Layer"
+	depends on NET
+	default y
+	help
+	  This option enables the kernel events layer, which is a simple
+	  mechanism for kernel-to-user communication over a netlink socket.
+	  The goal of the kernel events layer is to provide a simple and
+	  efficient logging, error, and events system.  Specifically, code
+	  is available to link the events into D-BUS.  Say Y, unless you
+	  are building a system requiring minimal memory consumption.
+
+	  D-BUS is available at http://dbus.freedesktop.org/
+
 config AUDITSYSCALL
 	bool "Enable system-call auditing support"
 	depends on AUDIT && (X86 || PPC64 || ARCH_S390 || IA64)
diff -urN linux-2.6.8-rc2/kernel/kevent.c linux/kernel/kevent.c
--- linux-2.6.8-rc2/kernel/kevent.c	1969-12-31 19:00:00.000000000 -0500
+++ linux/kernel/kevent.c	2004-07-23 22:49:21.000000000 -0400
@@ -0,0 +1,141 @@
+/*
+ * kernel/kevent.c - kernel event delivery over a netlink socket
+ * 
+ * Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+ *
+ * Licensed under the GNU GPL v2.
+ *
+ * Authors:
+ *	Arjan van de Ven	<arjanv@redhat.com>
+ *	Kay Sievers		<kay.sievers@vrfy.org>
+ *	Robert Love		<rml@novell.com>
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/string.h>
+#include <linux/kevent.h>
+#include <net/sock.h>
+
+/* There is one global netlink socket */
+static struct sock *kevent_sock = NULL;
+
+static void netlink_receive(struct sock *sk, int len)
+{
+	struct sk_buff *skb;
+
+	 /* just drop them all */
+	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL)
+		kfree_skb(skb);
+}
+
+static int netlink_send(__u32 groups, int gfp_mask, const char *buffer, int len)
+{
+	struct sk_buff *skb;
+	char *data_start;
+
+	if (!kevent_sock)
+		return -EIO;
+
+	if (!buffer)
+		return -EINVAL;
+
+	if (len > PAGE_SIZE)
+		return -EINVAL;
+
+	skb = alloc_skb(len, gfp_mask);
+	if (!skb)
+		return -ENOMEM;
+	data_start = skb_put(skb, len);
+	memcpy(data_start, buffer, len);
+
+	return netlink_broadcast(kevent_sock, skb, 0, groups, gfp_mask);
+}
+
+static int do_send_kevent(int type, int gfp_mask, const char *object,
+			  const char *signal, const char *fmt, va_list args)
+{
+	char *buffer;
+	int len;
+	int ret;
+
+	if (!object)
+		return -EINVAL;
+
+	if (!signal)
+		return -EINVAL;
+
+	if (strlen(object) > PAGE_SIZE)
+		return -EINVAL;
+
+	buffer = (char *) get_zeroed_page(gfp_mask);
+	if (!buffer)
+		return -ENOMEM;
+
+	len = snprintf(buffer, PAGE_SIZE, "From: %s\n", object);
+	len += snprintf(&buffer[len], PAGE_SIZE - len, "Signal: %s\n", signal);
+
+	/* possible anxiliary data */
+	if (fmt)
+		len += vscnprintf(&buffer[len], PAGE_SIZE-len-1, fmt, args);
+	buffer[len++] = '\0';
+
+	ret = netlink_send((1 << type), gfp_mask, buffer, len);
+	free_page((unsigned long) buffer);
+
+	return ret;
+}
+
+/**
+ * send_kevent - send a message to user-space via the kernel events layer
+ */
+int send_kevent(int type, const char *object, const char *signal,
+		const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = do_send_kevent(type, GFP_KERNEL, object, signal, fmt, args);
+	va_end(args);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(send_kevent);
+
+/**
+ * send_kevent_atomic - send a message to user-space via the kernel events layer
+ */
+int send_kevent_atomic(int type, const char *object, const char *signal,
+		       const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = do_send_kevent(type, GFP_ATOMIC, object, signal, fmt, args);
+	va_end(args);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(send_kevent_atomic);
+
+static int kevent_init(void)
+{
+	kevent_sock = netlink_kernel_create(NETLINK_KMESSAGE, netlink_receive);
+
+	if (!kevent_sock) {
+		printk(KERN_ERR "kevent: "
+		       "unable to create netlink socket; aborting\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+module_init(kevent_init);
diff -urN linux-2.6.8-rc2/kernel/Makefile linux/kernel/Makefile
--- linux-2.6.8-rc2/kernel/Makefile	2004-07-23 22:18:04.000000000 -0400
+++ linux/kernel/Makefile	2004-07-23 22:33:36.000000000 -0400
@@ -23,6 +23,7 @@
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_AUDIT) += audit.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+obj-$(CONFIG_KERNEL_EVENTS) += kevent.o
 
 ifneq ($(CONFIG_IA64),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is




^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  3:02 ` Michael Clark
@ 2004-07-24  3:14   ` Robert Love
  2004-07-24  9:15     ` Michael Clark
                       ` (2 more replies)
  0 siblings, 3 replies; 41+ messages in thread
From: Robert Love @ 2004-07-24  3:14 UTC (permalink / raw)
  To: Michael Clark; +Cc: akpm, linux-kernel

On Sat, 2004-07-24 at 11:02 +0800, Michael Clark wrote:

> Should there be some sharing with the device naming of sysfs or are
> will we introduce a new one? ie sysfs uses:
>
> devices/system/cpu/cpu0/<blah>
>
> Would it be a better way to have a version that takes struct kobject
> to enforce consistency in the device naming scheme. This also means
> userspace would automatically know where to look in /sys if futher
> info was needed.

No, we want to give an interface that matches the sort of provider URI
used by object systems such as CORBA, D-BUS, and DCOP.  We also do _not_
want to put policy in the kernel.

The easiest way to avoid that is simply to use a name similar to the
path name.

Passing the sysfs name would probably be a good potential argument to
the signal, though.  The temperature signal in the patch is just an
example.

> Question is does it make sense to use this infrastructure without sysfs
> as hald, etc require it. ie depends CONFIG_SYSFS

That sounds like policy to me.

Especially if drivers start using this for error logging, there are no
ties to sysfs.  Configuration dependencies tend to be hard build-time
deps anyhow.

	Robert Love

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  2:47   ` Robert Love
@ 2004-07-24  4:42     ` Keith Owens
  2004-07-24  5:00       ` Robert Love
                         ` (2 more replies)
  0 siblings, 3 replies; 41+ messages in thread
From: Keith Owens @ 2004-07-24  4:42 UTC (permalink / raw)
  To: Robert Love; +Cc: Dan Aloni, akpm, linux-kernel

On Fri, 23 Jul 2004 22:47:06 -0400, 
Robert Love <rml@ximian.com> wrote:
>On Sat, 2004-07-24 at 00:32 +0300, Dan Aloni wrote:
>
>> IMHO you either should not assume anything about the length of the object 
>> string, _or_ do the complete safe string assembly e.g:
>> 
>>         len += snprintf(buffer, PAGE_SIZE, "From: %s\nSignal: %s\n", 
>>                         object, signal);
>> 
>
>Fair enough.  I guess what we want, exactly, is:
>
> len = snprintf(buffer, PAGE_SIZE, "From: %s\n", object);
> len += snprintf(&buffer[len], PAGE_SIZE - len "Signal: %s\n", signal);
>
>I will add that to the next revision.

man snprintf

  "If the output was truncated due to this limit then the return value
  is the number of characters (not including the trailing '\0') which
  would have been written to the final string if enough space had been
  available. Thus, a return value of size or more means that the output
  was truncated".

Never use the return value from snprintf to work out the next buffer
position, it is not reliable when the data is truncated.  The example
above uses a second call to snprintf which will generate a warning for
truncated data and fail safe, but not all code is that trustworthy.  I
always use strlen to get the real buffer length.

  snprintf(buffer, PAGE_SIZE, "From: %s\n", object);
  len = strlen(buffer);
  snprintf(buffer+len, PAGE_SIZE - len, "Signal: %s\n", signal);

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  4:42     ` Keith Owens
@ 2004-07-24  5:00       ` Robert Love
  2004-07-24  8:11         ` Andrew Morton
  2004-07-24  6:53       ` Paul Jackson
  2004-07-24 11:37       ` Bernd Petrovitsch
  2 siblings, 1 reply; 41+ messages in thread
From: Robert Love @ 2004-07-24  5:00 UTC (permalink / raw)
  To: Keith Owens; +Cc: Dan Aloni, akpm, linux-kernel

On Sat, 2004-07-24 at 14:42 +1000, Keith Owens wrote:

> Never use the return value from snprintf to work out the next buffer
> position, it is not reliable when the data is truncated.  The example
> above uses a second call to snprintf which will generate a warning for
> truncated data and fail safe, but not all code is that trustworthy.  I
> always use strlen to get the real buffer length.

If we are going to use snprintf, we might as well use it right.

Updated patch attached.  Thanks.

Thinking about it, though, the from and signal buffers are never ever
going to approach PAGE_SIZE so this check is really being anal.  We
certainly want to be safe later, when we construct the message, but
here ... oh well.  Safe is better than sorry.

	Robert Love


Kernel to user-space communication layer using netlink
X-Signed-Off-By: Robert Love <rml@ximian.com>

 arch/i386/kernel/cpu/mcheck/p4.c |    9 ++
 include/linux/kevent.h           |   37 ++++++++++
 include/linux/netlink.h          |    1 
 init/Kconfig                     |   14 +++
 kernel/Makefile                  |    1 
 kernel/kevent.c                  |  143 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 204 insertions(+), 1 deletion(-)

diff -urN linux-2.6.8-rc2/arch/i386/kernel/cpu/mcheck/p4.c linux/arch/i386/kernel/cpu/mcheck/p4.c
--- linux-2.6.8-rc2/arch/i386/kernel/cpu/mcheck/p4.c	2004-06-16 01:19:37.000000000 -0400
+++ linux/arch/i386/kernel/cpu/mcheck/p4.c	2004-07-23 22:55:20.000000000 -0400
@@ -9,6 +9,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/smp.h>
+#include <linux/kevent.h>
 
 #include <asm/processor.h> 
 #include <asm/system.h>
@@ -59,9 +60,15 @@
 	if (l & 0x1) {
 		printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
 		printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
-				cpu);
+			cpu);
+		send_kevent(KMSG_POWER,
+			"/org/kernel/devices/system/cpu/temperature", "high",
+			"Cpu: %d\n", cpu);
 	} else {
 		printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+		send_kevent(KMSG_POWER,
+			"/org/kernel/devices/system/cpu/temperature", "normal",
+			"Cpu: %d\n", cpu);
 	}
 }
 
diff -urN linux-2.6.8-rc2/include/linux/kevent.h linux/include/linux/kevent.h
--- linux-2.6.8-rc2/include/linux/kevent.h	1969-12-31 19:00:00.000000000 -0500
+++ linux/include/linux/kevent.h	2004-07-23 22:53:45.000000000 -0400
@@ -0,0 +1,37 @@
+#ifndef _LINUX_KEVENT_H
+#define _LINUX_KEVENT_H
+
+#include <linux/config.h>
+
+/* kevent types - these are used as the multicast group */
+#define KMSG_GENERAL	0
+#define KMSG_STORAGE	1
+#define KMSG_POWER	2
+#define KMSG_FS		3
+#define KMSG_HOTPLUG	4
+
+#ifdef CONFIG_KERNEL_EVENTS
+
+int send_kevent(int type, const char *object, const char *signal,
+		const char *fmt, ...);
+
+int send_kevent_atomic(int type, const char *object, const char *signal,
+		const char *fmt, ...);
+
+#else
+
+static inline int send_kevent(int type, const char *object, const char *signal,
+		const char *fmt, ...)
+{
+	return 0;
+}
+
+static inline int send_kevent_atomic(int type, const char *object,
+		const char *signal, const char *fmt, ...)
+{
+	return 0;
+}
+
+#endif /* ! CONFIG_KERNEL_EVENTS */
+
+#endif	/* _LINUX_KEVENT_H */
diff -urN linux-2.6.8-rc2/include/linux/netlink.h linux/include/linux/netlink.h
--- linux-2.6.8-rc2/include/linux/netlink.h	2004-07-23 22:18:04.000000000 -0400
+++ linux/include/linux/netlink.h	2004-07-23 22:21:18.000000000 -0400
@@ -17,6 +17,7 @@
 #define NETLINK_ROUTE6		11	/* af_inet6 route comm channel */
 #define NETLINK_IP6_FW		13
 #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
+#define NETLINK_KMESSAGE	15	/* Kernel messages to userspace */
 #define NETLINK_TAPBASE		16	/* 16 to 31 are ethertap */
 
 #define MAX_LINKS 32		
diff -urN linux-2.6.8-rc2/init/Kconfig linux/init/Kconfig
--- linux-2.6.8-rc2/init/Kconfig	2004-07-23 22:18:04.000000000 -0400
+++ linux/init/Kconfig	2004-07-23 22:44:26.000000000 -0400
@@ -160,6 +160,20 @@
 	  logging of avc messages output).  Does not do system-call
 	  auditing without CONFIG_AUDITSYSCALL.
 
+config KERNEL_EVENTS
+	bool "Kernel Events Layer"
+	depends on NET
+	default y
+	help
+	  This option enables the kernel events layer, which is a simple
+	  mechanism for kernel-to-user communication over a netlink socket.
+	  The goal of the kernel events layer is to provide a simple and
+	  efficient logging, error, and events system.  Specifically, code
+	  is available to link the events into D-BUS.  Say Y, unless you
+	  are building a system requiring minimal memory consumption.
+
+	  D-BUS is available at http://dbus.freedesktop.org/
+
 config AUDITSYSCALL
 	bool "Enable system-call auditing support"
 	depends on AUDIT && (X86 || PPC64 || ARCH_S390 || IA64)
diff -urN linux-2.6.8-rc2/kernel/kevent.c linux/kernel/kevent.c
--- linux-2.6.8-rc2/kernel/kevent.c	1969-12-31 19:00:00.000000000 -0500
+++ linux/kernel/kevent.c	2004-07-24 00:57:44.180662592 -0400
@@ -0,0 +1,143 @@
+/*
+ * kernel/kevent.c - kernel event delivery over a netlink socket
+ * 
+ * Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+ *
+ * Licensed under the GNU GPL v2.
+ *
+ * Authors:
+ *	Arjan van de Ven	<arjanv@redhat.com>
+ *	Kay Sievers		<kay.sievers@vrfy.org>
+ *	Robert Love		<rml@novell.com>
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/string.h>
+#include <linux/kevent.h>
+#include <net/sock.h>
+
+/* There is one global netlink socket */
+static struct sock *kevent_sock = NULL;
+
+static void netlink_receive(struct sock *sk, int len)
+{
+	struct sk_buff *skb;
+
+	 /* just drop them all */
+	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL)
+		kfree_skb(skb);
+}
+
+static int netlink_send(__u32 groups, int gfp_mask, const char *buffer, int len)
+{
+	struct sk_buff *skb;
+	char *data_start;
+
+	if (!kevent_sock)
+		return -EIO;
+
+	if (!buffer)
+		return -EINVAL;
+
+	if (len > PAGE_SIZE)
+		return -EINVAL;
+
+	skb = alloc_skb(len, gfp_mask);
+	if (!skb)
+		return -ENOMEM;
+	data_start = skb_put(skb, len);
+	memcpy(data_start, buffer, len);
+
+	return netlink_broadcast(kevent_sock, skb, 0, groups, gfp_mask);
+}
+
+static int do_send_kevent(int type, int gfp_mask, const char *object,
+			  const char *signal, const char *fmt, va_list args)
+{
+	char *buffer;
+	int len;
+	int ret;
+
+	if (!object)
+		return -EINVAL;
+
+	if (!signal)
+		return -EINVAL;
+
+	if (strlen(object) > PAGE_SIZE)
+		return -EINVAL;
+
+	buffer = (char *) get_zeroed_page(gfp_mask);
+	if (!buffer)
+		return -ENOMEM;
+
+	snprintf(buffer, PAGE_SIZE, "From: %s\n", object);
+	len = strlen(buffer);
+	snprintf(buffer + len, PAGE_SIZE - len, "Signal: %s\n", signal);
+	len = strlen(buffer);
+
+	/* possible anxiliary data */
+	if (fmt)
+		len += vscnprintf(&buffer[len], PAGE_SIZE-len-1, fmt, args);
+	buffer[len++] = '\0';
+
+	ret = netlink_send((1 << type), gfp_mask, buffer, len);
+	free_page((unsigned long) buffer);
+
+	return ret;
+}
+
+/**
+ * send_kevent - send a message to user-space via the kernel events layer
+ */
+int send_kevent(int type, const char *object, const char *signal,
+		const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = do_send_kevent(type, GFP_KERNEL, object, signal, fmt, args);
+	va_end(args);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(send_kevent);
+
+/**
+ * send_kevent_atomic - send a message to user-space via the kernel events layer
+ */
+int send_kevent_atomic(int type, const char *object, const char *signal,
+		       const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = do_send_kevent(type, GFP_ATOMIC, object, signal, fmt, args);
+	va_end(args);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(send_kevent_atomic);
+
+static int kevent_init(void)
+{
+	kevent_sock = netlink_kernel_create(NETLINK_KMESSAGE, netlink_receive);
+
+	if (!kevent_sock) {
+		printk(KERN_ERR "kevent: "
+		       "unable to create netlink socket; aborting\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+module_init(kevent_init);
diff -urN linux-2.6.8-rc2/kernel/Makefile linux/kernel/Makefile
--- linux-2.6.8-rc2/kernel/Makefile	2004-07-23 22:18:04.000000000 -0400
+++ linux/kernel/Makefile	2004-07-23 22:33:36.000000000 -0400
@@ -23,6 +23,7 @@
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_AUDIT) += audit.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+obj-$(CONFIG_KERNEL_EVENTS) += kevent.o
 
 ifneq ($(CONFIG_IA64),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  2:14   ` Robert Love
@ 2004-07-24  5:15     ` Chris Wedgwood
  2004-07-24  5:41       ` Robert Love
  0 siblings, 1 reply; 41+ messages in thread
From: Chris Wedgwood @ 2004-07-24  5:15 UTC (permalink / raw)
  To: Robert Love; +Cc: Andrew Morton, linux-kernel

On Fri, Jul 23, 2004 at 10:14:57PM -0400, Robert Love wrote:

> Predicting the future is hard, but I suspect this number to be
> small.  Let's say 10 in core kernel code?

Seems fair.

> If this takes off as a solution for error reporting, that number
> will be much larger in drivers.

This part worries me a lot.  I would alsmost rather all possible
messages get stuck somewhere common so driver writes can't add these
ad-hoc and we can avoid a proliferation of either similar or pointless
messages.

Forcing these into a common place lets people eyeball if a new
messages really is necessary --- and it makes writing applications to
deal with these things easier (since you don't have to scan the entire
kernel tree).

    --cw

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  8:11         ` Andrew Morton
@ 2004-07-24  5:37           ` Robert Love
  2004-07-24  6:02             ` Robert Love
  0 siblings, 1 reply; 41+ messages in thread
From: Robert Love @ 2004-07-24  5:37 UTC (permalink / raw)
  To: Andrew Morton; +Cc: kaos, da-x, linux-kernel

On Sat, 2004-07-24 at 01:11 -0700, Andrew Morton wrote:
> Robert Love <rml@ximian.com> wrote:
> >
> > +	buffer = (char *) get_zeroed_page(gfp_mask);
> 
> Why zeroed?

Paranoia over a buffer we send to user-space.  But the skb is created
using only the 'len' bytes, so it should be safe.  Switched to
alloc_page().

> > +	if (!buffer)
> > +		return -ENOMEM;
> > +
> > +	snprintf(buffer, PAGE_SIZE, "From: %s\n", object);
> > +	len = strlen(buffer);
> > +	snprintf(buffer + len, PAGE_SIZE - len, "Signal: %s\n", signal);
> > +	len = strlen(buffer);
> 
> A single snprintf here would suit.

Doh!

Updated patch attached - also converts the 'type' value to an enum,
which is safer and something a debugger in user-space can resolve.

	Robert Love


Kernel to user-space communication layer using netlink
Signed-off-by: Robert Love <rml@ximian.com>

 arch/i386/kernel/cpu/mcheck/p4.c |    9 ++
 include/linux/kevent.h           |   39 ++++++++++
 include/linux/netlink.h          |    1 
 init/Kconfig                     |   14 +++
 kernel/Makefile                  |    1 
 kernel/kevent.c                  |  141 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 204 insertions(+), 1 deletion(-)

diff -urN linux-2.6.8-rc2/arch/i386/kernel/cpu/mcheck/p4.c linux/arch/i386/kernel/cpu/mcheck/p4.c
--- linux-2.6.8-rc2/arch/i386/kernel/cpu/mcheck/p4.c	2004-06-16 01:19:37.000000000 -0400
+++ linux/arch/i386/kernel/cpu/mcheck/p4.c	2004-07-24 01:25:07.301870200 -0400
@@ -9,6 +9,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/smp.h>
+#include <linux/kevent.h>
 
 #include <asm/processor.h> 
 #include <asm/system.h>
@@ -59,9 +60,15 @@
 	if (l & 0x1) {
 		printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
 		printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
-				cpu);
+			cpu);
+		send_kevent(KEVENT_GENERAL,
+			"/org/kernel/arch/kernel/cpu/temperature", "high",
+			"Cpu: %d\n", cpu);
 	} else {
 		printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+		send_kevent(KEVENT_GENERAL,
+			"/org/kernel/arch/kernel/cpu/temperature", "normal",
+			"Cpu: %d\n", cpu);
 	}
 }
 
diff -urN linux-2.6.8-rc2/include/linux/kevent.h linux/include/linux/kevent.h
--- linux-2.6.8-rc2/include/linux/kevent.h	1969-12-31 19:00:00.000000000 -0500
+++ linux/include/linux/kevent.h	2004-07-24 01:31:33.077223432 -0400
@@ -0,0 +1,39 @@
+#ifndef _LINUX_KEVENT_H
+#define _LINUX_KEVENT_H
+
+#include <linux/config.h>
+
+/* kevent types - these are used as the multicast group */
+enum kevent {
+	KEVENT_GENERAL	=	0,
+	KEVENT_STORAGE	=	1,
+	KEVENT_POWER	=	2,
+	KEVENT_FS	= 	3,
+	KEVENT_HOTPLUG	=	4,
+};
+
+#ifdef CONFIG_KERNEL_EVENTS
+
+int send_kevent(enum kevent type, const char *object,
+		const char *signal, const char *fmt, ...);
+
+int send_kevent_atomic(enum kevent type, const char *object,
+		const char *signal, const char *fmt, ...);
+
+#else
+
+static inline int send_kevent(enum kevent type,  const char *object,
+		const char *signal, const char *fmt, ...)
+{
+	return 0;
+}
+
+static inline int send_kevent_atomic(int type, const char *object,
+		const char *signal, const char *fmt, ...)
+{
+	return 0;
+}
+
+#endif /* ! CONFIG_KERNEL_EVENTS */
+
+#endif	/* _LINUX_KEVENT_H */
diff -urN linux-2.6.8-rc2/include/linux/netlink.h linux/include/linux/netlink.h
--- linux-2.6.8-rc2/include/linux/netlink.h	2004-07-23 22:18:04.000000000 -0400
+++ linux/include/linux/netlink.h	2004-07-24 01:25:07.302870048 -0400
@@ -17,6 +17,7 @@
 #define NETLINK_ROUTE6		11	/* af_inet6 route comm channel */
 #define NETLINK_IP6_FW		13
 #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
+#define NETLINK_KEVENT		15	/* Kernel messages to userspace */
 #define NETLINK_TAPBASE		16	/* 16 to 31 are ethertap */
 
 #define MAX_LINKS 32		
diff -urN linux-2.6.8-rc2/init/Kconfig linux/init/Kconfig
--- linux-2.6.8-rc2/init/Kconfig	2004-07-23 22:18:04.000000000 -0400
+++ linux/init/Kconfig	2004-07-24 01:25:07.302870048 -0400
@@ -160,6 +160,20 @@
 	  logging of avc messages output).  Does not do system-call
 	  auditing without CONFIG_AUDITSYSCALL.
 
+config KERNEL_EVENTS
+	bool "Kernel Events Layer"
+	depends on NET
+	default y
+	help
+	  This option enables the kernel events layer, which is a simple
+	  mechanism for kernel-to-user communication over a netlink socket.
+	  The goal of the kernel events layer is to provide a simple and
+	  efficient logging, error, and events system.  Specifically, code
+	  is available to link the events into D-BUS.  Say Y, unless you
+	  are building a system requiring minimal memory consumption.
+
+	  D-BUS is available at http://dbus.freedesktop.org/
+
 config AUDITSYSCALL
 	bool "Enable system-call auditing support"
 	depends on AUDIT && (X86 || PPC64 || ARCH_S390 || IA64)
diff -urN linux-2.6.8-rc2/kernel/kevent.c linux/kernel/kevent.c
--- linux-2.6.8-rc2/kernel/kevent.c	1969-12-31 19:00:00.000000000 -0500
+++ linux/kernel/kevent.c	2004-07-24 01:31:38.561389712 -0400
@@ -0,0 +1,141 @@
+/*
+ * kernel/kevent.c - kernel event delivery over a netlink socket
+ * 
+ * Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+ *
+ * Licensed under the GNU GPL v2.
+ *
+ * Authors:
+ *	Arjan van de Ven	<arjanv@redhat.com>
+ *	Kay Sievers		<kay.sievers@vrfy.org>
+ *	Robert Love		<rml@novell.com>
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/string.h>
+#include <linux/kevent.h>
+#include <net/sock.h>
+
+/* There is one global netlink socket */
+static struct sock *kevent_sock = NULL;
+
+static void netlink_receive(struct sock *sk, int len)
+{
+	struct sk_buff *skb;
+
+	 /* just drop them all */
+	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL)
+		kfree_skb(skb);
+}
+
+static int netlink_send(__u32 groups, int gfp_mask, const char *buffer, int len)
+{
+	struct sk_buff *skb;
+	char *data_start;
+
+	if (!kevent_sock)
+		return -EIO;
+
+	if (!buffer)
+		return -EINVAL;
+
+	if (len > PAGE_SIZE)
+		return -EINVAL;
+
+	skb = alloc_skb(len, gfp_mask);
+	if (!skb)
+		return -ENOMEM;
+	data_start = skb_put(skb, len);
+	memcpy(data_start, buffer, len);
+
+	return netlink_broadcast(kevent_sock, skb, 0, groups, gfp_mask);
+}
+
+static int do_send_kevent(enum kevent type, int gfp_mask, const char *object,
+			  const char *signal, const char *fmt, va_list args)
+{
+	char *buffer;
+	int len;
+	int ret;
+
+	if (!object)
+		return -EINVAL;
+
+	if (!signal)
+		return -EINVAL;
+
+	if (strlen(object) > PAGE_SIZE)
+		return -EINVAL;
+
+	buffer = (char *) alloc_page(gfp_mask);
+	if (!buffer)
+		return -ENOMEM;
+
+	snprintf(buffer, PAGE_SIZE, "From: %s\nSignal: %s\n", object, signal);
+	len = strlen(buffer);
+
+	/* possible auxiliary data */
+	if (fmt)
+		len += vscnprintf(buffer+len, PAGE_SIZE-len-1, fmt, args);
+	buffer[len++] = '\0';
+
+	ret = netlink_send((1 << type), gfp_mask, buffer, len);
+	free_page((unsigned long) buffer);
+
+	return ret;
+}
+
+/**
+ * send_kevent - send a message to user-space via the kernel events layer
+ */
+int send_kevent(enum kevent type, const char *object,
+		const char *signal, const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = do_send_kevent(type, GFP_KERNEL, object, signal, fmt, args);
+	va_end(args);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(send_kevent);
+
+/**
+ * send_kevent_atomic - send a message to user-space via the kernel events layer
+ */
+int send_kevent_atomic(enum kevent type, const char *object,
+		const char *signal, const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = do_send_kevent(type, GFP_ATOMIC, object, signal, fmt, args);
+	va_end(args);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(send_kevent_atomic);
+
+static int kevent_init(void)
+{
+	kevent_sock = netlink_kernel_create(NETLINK_KEVENT, netlink_receive);
+
+	if (!kevent_sock) {
+		printk(KERN_ERR "kevent: "
+		       "unable to create netlink socket; aborting\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+module_init(kevent_init);
diff -urN linux-2.6.8-rc2/kernel/Makefile linux/kernel/Makefile
--- linux-2.6.8-rc2/kernel/Makefile	2004-07-23 22:18:04.000000000 -0400
+++ linux/kernel/Makefile	2004-07-24 01:25:07.303869896 -0400
@@ -23,6 +23,7 @@
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_AUDIT) += audit.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+obj-$(CONFIG_KERNEL_EVENTS) += kevent.o
 
 ifneq ($(CONFIG_IA64),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  5:15     ` Chris Wedgwood
@ 2004-07-24  5:41       ` Robert Love
  2004-07-24  5:45         ` Chris Wedgwood
  0 siblings, 1 reply; 41+ messages in thread
From: Robert Love @ 2004-07-24  5:41 UTC (permalink / raw)
  To: Chris Wedgwood; +Cc: Andrew Morton, linux-kernel

On Fri, 2004-07-23 at 22:15 -0700, Chris Wedgwood wrote:

> This part worries me a lot.  I would alsmost rather all possible
> messages get stuck somewhere common so driver writes can't add these
> ad-hoc and we can avoid a proliferation of either similar or pointless
> messages.

I would be for this, although the situation is really no different than
today with printk()'s, which I would hope could be replaced in some
cases with the events (an either-or kind of deal).  Dunno.

> Forcing these into a common place lets people eyeball if a new
> messages really is necessary --- and it makes writing applications to
> deal with these things easier (since you don't have to scan the entire
> kernel tree).

This is a good idea for other reasons, too: the common base of errors
could be certified as supported by the error daemon, translated, etc.
etc.

I am not sure how realistic this goal is, but I do like it, at least for
the general case of the usual errors in drivers.

	Robert Love

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  5:41       ` Robert Love
@ 2004-07-24  5:45         ` Chris Wedgwood
  0 siblings, 0 replies; 41+ messages in thread
From: Chris Wedgwood @ 2004-07-24  5:45 UTC (permalink / raw)
  To: Robert Love; +Cc: Andrew Morton, linux-kernel

On Sat, Jul 24, 2004 at 01:41:42AM -0400, Robert Love wrote:

> I would be for this, although the situation is really no different
> than today with printk()'s, which I would hope could be replaced in
> some cases with the events (an either-or kind of deal).  Dunno.

except we don't (usually) have daemons listening for printk strings
and doing something very specific based upon them such as scanning new
media

> This is a good idea for other reasons, too: the common base of
> errors could be certified as supported by the error daemon,
> translated, etc.  etc.

by guess is most driver errors will belong to a small common subset

> I am not sure how realistic this goal is, but I do like it, at least
> for the general case of the usual errors in drivers.

all the more reason why they should be placed somewhere non-trivial so
we can discuss what exactly is required and suitable on a case-by-case
basis

my fear is that if we don't do this we will have n different events
for 'disk bad' for n different hba drivers (for example)


  --cw

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  5:37           ` Robert Love
@ 2004-07-24  6:02             ` Robert Love
  2004-07-24  9:43               ` Wichert Akkerman
  2004-07-24 20:21               ` James Morris
  0 siblings, 2 replies; 41+ messages in thread
From: Robert Love @ 2004-07-24  6:02 UTC (permalink / raw)
  To: Andrew Morton; +Cc: kaos, da-x, linux-kernel

On Sat, 2004-07-24 at 01:37 -0400, Robert Love wrote:

> Updated patch attached - also converts the 'type' value to an enum,
> which is safer and something a debugger in user-space can resolve.

Small fix with that.

This is the latest patch.

	Robert Love


Kernel to user-space communication layer using netlink
Signed-off-by: Robert Love <rml@ximian.com>

 arch/i386/kernel/cpu/mcheck/p4.c |    9 ++
 include/linux/kevent.h           |   39 ++++++++++
 include/linux/netlink.h          |    1 
 init/Kconfig                     |   14 +++
 kernel/Makefile                  |    1 
 kernel/kevent.c                  |  141 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 204 insertions(+), 1 deletion(-)

diff -urN linux-2.6.8-rc2/arch/i386/kernel/cpu/mcheck/p4.c linux/arch/i386/kernel/cpu/mcheck/p4.c
--- linux-2.6.8-rc2/arch/i386/kernel/cpu/mcheck/p4.c	2004-06-16 01:19:37.000000000 -0400
+++ linux/arch/i386/kernel/cpu/mcheck/p4.c	2004-07-24 01:25:07.301870200 -0400
@@ -9,6 +9,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/smp.h>
+#include <linux/kevent.h>
 
 #include <asm/processor.h> 
 #include <asm/system.h>
@@ -59,9 +60,15 @@
 	if (l & 0x1) {
 		printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
 		printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
-				cpu);
+			cpu);
+		send_kevent(KEVENT_GENERAL,
+			"/org/kernel/arch/kernel/cpu/temperature", "high",
+			"Cpu: %d\n", cpu);
 	} else {
 		printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+		send_kevent(KEVENT_GENERAL,
+			"/org/kernel/arch/kernel/cpu/temperature", "normal",
+			"Cpu: %d\n", cpu);
 	}
 }
 
diff -urN linux-2.6.8-rc2/include/linux/kevent.h linux/include/linux/kevent.h
--- linux-2.6.8-rc2/include/linux/kevent.h	1969-12-31 19:00:00.000000000 -0500
+++ linux/include/linux/kevent.h	2004-07-24 01:31:33.077223432 -0400
@@ -0,0 +1,39 @@
+#ifndef _LINUX_KEVENT_H
+#define _LINUX_KEVENT_H
+
+#include <linux/config.h>
+
+/* kevent types - these are used as the multicast group */
+enum kevent {
+	KEVENT_GENERAL	=	0,
+	KEVENT_STORAGE	=	1,
+	KEVENT_POWER	=	2,
+	KEVENT_FS	= 	3,
+	KEVENT_HOTPLUG	=	4,
+};
+
+#ifdef CONFIG_KERNEL_EVENTS
+
+int send_kevent(enum kevent type, const char *object,
+		const char *signal, const char *fmt, ...);
+
+int send_kevent_atomic(enum kevent type, const char *object,
+		const char *signal, const char *fmt, ...);
+
+#else
+
+static inline int send_kevent(enum kevent type,  const char *object,
+		const char *signal, const char *fmt, ...)
+{
+	return 0;
+}
+
+static inline int send_kevent_atomic(enum kevent type, const char *object,
+		const char *signal, const char *fmt, ...)
+{
+	return 0;
+}
+
+#endif /* ! CONFIG_KERNEL_EVENTS */
+
+#endif	/* _LINUX_KEVENT_H */
diff -urN linux-2.6.8-rc2/include/linux/netlink.h linux/include/linux/netlink.h
--- linux-2.6.8-rc2/include/linux/netlink.h	2004-07-23 22:18:04.000000000 -0400
+++ linux/include/linux/netlink.h	2004-07-24 01:25:07.302870048 -0400
@@ -17,6 +17,7 @@
 #define NETLINK_ROUTE6		11	/* af_inet6 route comm channel */
 #define NETLINK_IP6_FW		13
 #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
+#define NETLINK_KEVENT		15	/* Kernel messages to userspace */
 #define NETLINK_TAPBASE		16	/* 16 to 31 are ethertap */
 
 #define MAX_LINKS 32		
diff -urN linux-2.6.8-rc2/init/Kconfig linux/init/Kconfig
--- linux-2.6.8-rc2/init/Kconfig	2004-07-23 22:18:04.000000000 -0400
+++ linux/init/Kconfig	2004-07-24 01:25:07.302870048 -0400
@@ -160,6 +160,20 @@
 	  logging of avc messages output).  Does not do system-call
 	  auditing without CONFIG_AUDITSYSCALL.
 
+config KERNEL_EVENTS
+	bool "Kernel Events Layer"
+	depends on NET
+	default y
+	help
+	  This option enables the kernel events layer, which is a simple
+	  mechanism for kernel-to-user communication over a netlink socket.
+	  The goal of the kernel events layer is to provide a simple and
+	  efficient logging, error, and events system.  Specifically, code
+	  is available to link the events into D-BUS.  Say Y, unless you
+	  are building a system requiring minimal memory consumption.
+
+	  D-BUS is available at http://dbus.freedesktop.org/
+
 config AUDITSYSCALL
 	bool "Enable system-call auditing support"
 	depends on AUDIT && (X86 || PPC64 || ARCH_S390 || IA64)
diff -urN linux-2.6.8-rc2/kernel/kevent.c linux/kernel/kevent.c
--- linux-2.6.8-rc2/kernel/kevent.c	1969-12-31 19:00:00.000000000 -0500
+++ linux/kernel/kevent.c	2004-07-24 01:31:38.561389712 -0400
@@ -0,0 +1,141 @@
+/*
+ * kernel/kevent.c - kernel event delivery over a netlink socket
+ * 
+ * Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+ *
+ * Licensed under the GNU GPL v2.
+ *
+ * Authors:
+ *	Arjan van de Ven	<arjanv@redhat.com>
+ *	Kay Sievers		<kay.sievers@vrfy.org>
+ *	Robert Love		<rml@novell.com>
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/string.h>
+#include <linux/kevent.h>
+#include <net/sock.h>
+
+/* There is one global netlink socket */
+static struct sock *kevent_sock = NULL;
+
+static void netlink_receive(struct sock *sk, int len)
+{
+	struct sk_buff *skb;
+
+	 /* just drop them all */
+	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL)
+		kfree_skb(skb);
+}
+
+static int netlink_send(__u32 groups, int gfp_mask, const char *buffer, int len)
+{
+	struct sk_buff *skb;
+	char *data_start;
+
+	if (!kevent_sock)
+		return -EIO;
+
+	if (!buffer)
+		return -EINVAL;
+
+	if (len > PAGE_SIZE)
+		return -EINVAL;
+
+	skb = alloc_skb(len, gfp_mask);
+	if (!skb)
+		return -ENOMEM;
+	data_start = skb_put(skb, len);
+	memcpy(data_start, buffer, len);
+
+	return netlink_broadcast(kevent_sock, skb, 0, groups, gfp_mask);
+}
+
+static int do_send_kevent(enum kevent type, int gfp_mask, const char *object,
+			  const char *signal, const char *fmt, va_list args)
+{
+	char *buffer;
+	int len;
+	int ret;
+
+	if (!object)
+		return -EINVAL;
+
+	if (!signal)
+		return -EINVAL;
+
+	if (strlen(object) > PAGE_SIZE)
+		return -EINVAL;
+
+	buffer = (char *) alloc_page(gfp_mask);
+	if (!buffer)
+		return -ENOMEM;
+
+	snprintf(buffer, PAGE_SIZE, "From: %s\nSignal: %s\n", object, signal);
+	len = strlen(buffer);
+
+	/* possible auxiliary data */
+	if (fmt)
+		len += vscnprintf(buffer+len, PAGE_SIZE-len-1, fmt, args);
+	buffer[len++] = '\0';
+
+	ret = netlink_send((1 << type), gfp_mask, buffer, len);
+	free_page((unsigned long) buffer);
+
+	return ret;
+}
+
+/**
+ * send_kevent - send a message to user-space via the kernel events layer
+ */
+int send_kevent(enum kevent type, const char *object,
+		const char *signal, const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = do_send_kevent(type, GFP_KERNEL, object, signal, fmt, args);
+	va_end(args);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(send_kevent);
+
+/**
+ * send_kevent_atomic - send a message to user-space via the kernel events layer
+ */
+int send_kevent_atomic(enum kevent type, const char *object,
+		const char *signal, const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = do_send_kevent(type, GFP_ATOMIC, object, signal, fmt, args);
+	va_end(args);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(send_kevent_atomic);
+
+static int kevent_init(void)
+{
+	kevent_sock = netlink_kernel_create(NETLINK_KEVENT, netlink_receive);
+
+	if (!kevent_sock) {
+		printk(KERN_ERR "kevent: "
+		       "unable to create netlink socket; aborting\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+module_init(kevent_init);
diff -urN linux-2.6.8-rc2/kernel/Makefile linux/kernel/Makefile
--- linux-2.6.8-rc2/kernel/Makefile	2004-07-23 22:18:04.000000000 -0400
+++ linux/kernel/Makefile	2004-07-24 01:25:07.303869896 -0400
@@ -23,6 +23,7 @@
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_AUDIT) += audit.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+obj-$(CONFIG_KERNEL_EVENTS) += kevent.o
 
 ifneq ($(CONFIG_IA64),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  4:42     ` Keith Owens
  2004-07-24  5:00       ` Robert Love
@ 2004-07-24  6:53       ` Paul Jackson
  2004-07-24 11:37       ` Bernd Petrovitsch
  2 siblings, 0 replies; 41+ messages in thread
From: Paul Jackson @ 2004-07-24  6:53 UTC (permalink / raw)
  To: Keith Owens; +Cc: rml, da-x, akpm, linux-kernel

Keith wrote:
> Never use the return value from snprintf to work out the next buffer
> position, it is not reliable when the data is truncated.

That's why Juergen Quade added scnprintf and vscnprintf to lib/vsprintf.c:

 * If you want to have the exact
 * number of characters written into @buf as return value
 * (not including the trailing '\0'), use vscnprintf.

Andrew wrote:
> A single snprintf here would suit.

As Robert said ... Doh!

-- 
                          I won't rest till it's the best ...
                          Programmer, Linux Scalability
                          Paul Jackson <pj@sgi.com> 1.650.933.1373

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer, updated
  2004-07-24  3:11   ` [patch] kernel events layer, updated Robert Love
@ 2004-07-24  7:58     ` Deepak Saxena
  2004-07-24  8:23       ` Deepak Saxena
  0 siblings, 1 reply; 41+ messages in thread
From: Deepak Saxena @ 2004-07-24  7:58 UTC (permalink / raw)
  To: Robert Love; +Cc: Andrew Morton, linux-kernel, zaitcev

On Jul 23 2004, at 23:11, Robert Love was caught saying:
> @@ -59,9 +60,15 @@
>  	if (l & 0x1) {
>  		printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
>  		printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
> -				cpu);
> +			cpu);
> +		send_kevent(KMSG_POWER,
> +			"/org/kernel/devices/system/cpu/temperature", "high",
> +			"Cpu: %d\n", cpu);
>  	} else {
>  		printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
> +		send_kevent(KMSG_POWER,
> +			"/org/kernel/devices/system/cpu/temperature", "normal",
> +			"Cpu: %d\n", cpu);

Robert,

What is the the specified naming scheme for objects and in the case of 
devices, why not use the sysfs path as part of the object path? For example:

	"/org/kernel/system/cpu/cpu0"

Since we have unique paths for devices in syfs, this would remove the need 
of having the anxiliary "CPU: %d" as it is embedded in the object name. 

Also, why the "/org/kernel"? Since all message from the kernel
can only come from the kernel, why do we need this as part of the object
name?  Looking at the D-BUS spec, it looks like the "org.foo" is part of the 
D-BUS/HAL/freedesktop naming scheme, but this should not be pushed into
the kernel IMHO. As you yourself mentioned in your talk today, D-BUS
is just one daemon that could use the kevents interface, so I don't
think we want to push it's naming scheme into the kernel messages.
The kernel should use an object name that is unique in the context 
of the kernel (hence my suggestion to use sysfs path, but perhaps there
is something else?) and D-BUS should generate the appropriate object 
name that it expects.  The kernel is never going to send messages for 
objects in org.freedesktop or anything !org.kernel, so we are just 
stuffing extra bytes in the message that are very specific to a given 
userland implementation.

You also mentioned some interesting usage examples of HAL/D-BUS/kevents 
in your talk. Any possibility of getting a patch with the kernel specific
changes?  I ask b/c I would like to see how you imagine this being used in 
the context of things like device add/remove and other things device driver 
writers would be dealing with it.

Tnx,
~Deepak

-- 
Deepak Saxena - dsaxena at plexity dot net - http://www.plexity.net/

"Unlike me, many of you have accepted the situation of your imprisonment and
 will die here like rotten cabbages." - Number 6

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  5:00       ` Robert Love
@ 2004-07-24  8:11         ` Andrew Morton
  2004-07-24  5:37           ` Robert Love
  0 siblings, 1 reply; 41+ messages in thread
From: Andrew Morton @ 2004-07-24  8:11 UTC (permalink / raw)
  To: Robert Love; +Cc: kaos, da-x, linux-kernel

Robert Love <rml@ximian.com> wrote:
>
> +	buffer = (char *) get_zeroed_page(gfp_mask);

Why zeroed?

> +	if (!buffer)
> +		return -ENOMEM;
> +
> +	snprintf(buffer, PAGE_SIZE, "From: %s\n", object);
> +	len = strlen(buffer);
> +	snprintf(buffer + len, PAGE_SIZE - len, "Signal: %s\n", signal);
> +	len = strlen(buffer);

A single snprintf here would suit.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer, updated
  2004-07-24  7:58     ` Deepak Saxena
@ 2004-07-24  8:23       ` Deepak Saxena
  0 siblings, 0 replies; 41+ messages in thread
From: Deepak Saxena @ 2004-07-24  8:23 UTC (permalink / raw)
  To: Robert Love; +Cc: Andrew Morton, linux-kernel, zaitcev

On Jul 24 2004, at 00:58, Deepak Saxena was caught saying:
> The kernel should use an object name that is unique in the context 
> of the kernel (hence my suggestion to use sysfs path, but perhaps there
> is something else?) and D-BUS should generate the appropriate object 
> name that it expects.  The kernel is never going to send messages for 

Ermm..need sleep. What I meant is that D-BUS (kdbusd really) should
take care of generating the object name expected by D-BUS clients
from the kernel object name.  Looking at the kbdusd source, it expects
the kernel to provide a D-BUS object name it can stuff directly into
the dbus_message_new_signal() call.  This means forcing a specific 
kevent-handling mechanism's implementation on the kernel.  I don't think 
that's what we want to do as the sending of events and how those events 
happen to be parsed and handled in userspace should be kept separate. 

~Deepak

-- 
Deepak Saxena - dsaxena at plexity dot net - http://www.plexity.net/

"Unlike me, many of you have accepted the situation of your imprisonment and
 will die here like rotten cabbages." - Number 6

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  3:14   ` Robert Love
@ 2004-07-24  9:15     ` Michael Clark
  2004-07-24 15:08     ` Deepak Saxena
  2004-08-09 13:29     ` Pavel Machek
  2 siblings, 0 replies; 41+ messages in thread
From: Michael Clark @ 2004-07-24  9:15 UTC (permalink / raw)
  To: Robert Love; +Cc: akpm, linux-kernel

On 07/24/04 11:14, Robert Love wrote:
> On Sat, 2004-07-24 at 11:02 +0800, Michael Clark wrote:
> 
> 
>>Should there be some sharing with the device naming of sysfs or are
>>will we introduce a new one? ie sysfs uses:
>>
>>devices/system/cpu/cpu0/<blah>
>>
>>Would it be a better way to have a version that takes struct kobject
>>to enforce consistency in the device naming scheme. This also means
>>userspace would automatically know where to look in /sys if futher
>>info was needed.
> 
> 
> No, we want to give an interface that matches the sort of provider URI
> used by object systems such as CORBA, D-BUS, and DCOP.  We also do _not_
> want to put policy in the kernel.
> 
> The easiest way to avoid that is simply to use a name similar to the
> path name.

So if it is only similar - then a mapping would be needed to maintain
a link with the new kernel message naming model if userspace wants
to relate the message back to the kernel device it was related too.

> Passing the sysfs name would probably be a good potential argument to
> the signal, though.  The temperature signal in the patch is just an
> example.
> 
> 
>>Question is does it make sense to use this infrastructure without sysfs
>>as hald, etc require it. ie depends CONFIG_SYSFS
> 
> 
> That sounds like policy to me.

Perhaps it is, although consist naming of system objects and messages
related to them as a policy sounds like a good one.

I see the argument of not making this depend on sysfs although it could
perhaps be argued both ways. Certainly using the sysfs kojects gives
you the consistent naming for free. You are making a similar policy
assumption by using netlink, that all users of event logging will want
want networking compiled in. Just as fs's depend on block devices,
so can system object related messages depend on *the* system object naming
and toplogy framework.

> Especially if drivers start using this for error logging, there are no
> ties to sysfs.  Configuration dependencies tend to be hard build-time
> deps anyhow.

Perhaps these driver errors are most likely in the context of a system object
that would be registered with sysfs (drivers, device instances, buses, etc).
Which in your example is one such case.

I'm not saying it should be either way just that there are logical
benefits to having this dependancy. If not, it is a choice of duplication
of effort to name and/or maintain these seperate but "similar" to sysfs names.

The question you have to ask is - how many systems that use this will not
have sysfs compiled in? (ie. all 2.6 based desktop systems or any 2.6 embedded
device using hotplug and udev will have sysfs).

As I said it can be argued both ways and forgoing the depedancy on sysfs
may have other negative maintenance aspects of yet another naming scheme.
ie. does LANANA need to set up a registry for these?

just my 2c.

~mc

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  6:02             ` Robert Love
@ 2004-07-24  9:43               ` Wichert Akkerman
  2004-07-24 20:21               ` James Morris
  1 sibling, 0 replies; 41+ messages in thread
From: Wichert Akkerman @ 2004-07-24  9:43 UTC (permalink / raw)
  To: linux-kernel

All the current netlink messages are structs with basic data with a
variable list of attributes. This is easy to parse and all current
netlink tools support uniformly. Is it intentional that you are now
switching to text data that needs to be parsed? Your patch seems
to have RFC822-style headers that would work perfectly as standard
netlink attributes to a message.

Wichert.

-- 
Wichert Akkerman <wichert@wiggy.net>    It is simple to make things.
http://www.wiggy.net/                   It is hard to make things simple.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  4:42     ` Keith Owens
  2004-07-24  5:00       ` Robert Love
  2004-07-24  6:53       ` Paul Jackson
@ 2004-07-24 11:37       ` Bernd Petrovitsch
  2 siblings, 0 replies; 41+ messages in thread
From: Bernd Petrovitsch @ 2004-07-24 11:37 UTC (permalink / raw)
  To: Keith Owens; +Cc: Robert Love, Dan Aloni, akpm, linux-kernel

On Sat, 2004-07-24 at 06:42, Keith Owens wrote:
> On Fri, 23 Jul 2004 22:47:06 -0400, 
> Robert Love <rml@ximian.com> wrote:
> >On Sat, 2004-07-24 at 00:32 +0300, Dan Aloni wrote:
> >
> >> IMHO you either should not assume anything about the length of the object 
> >> string, _or_ do the complete safe string assembly e.g:
> >> 
> >>         len += snprintf(buffer, PAGE_SIZE, "From: %s\nSignal: %s\n", 
> >>                         object, signal);
> >> 
> >
> >Fair enough.  I guess what we want, exactly, is:
> >
> > len = snprintf(buffer, PAGE_SIZE, "From: %s\n", object);
> > len += snprintf(&buffer[len], PAGE_SIZE - len "Signal: %s\n", signal);
> >
> >I will add that to the next revision.
> 
> man snprintf
> 
>   "If the output was truncated due to this limit then the return value
>   is the number of characters (not including the trailing '\0') which
>   would have been written to the final string if enough space had been
>   available. Thus, a return value of size or more means that the output
>   was truncated".
> 
> Never use the return value from snprintf to work out the next buffer
> position, it is not reliable when the data is truncated.  The example
> above uses a second call to snprintf which will generate a warning for
> truncated data and fail safe, but not all code is that trustworthy.  I

The kernel snprintf() is (and the shown warning is actually debatable.
Actually snprintf()s interface is not that optimal for easy handling,
since the size parameter is unsigned and not signed. But it is specified
in SUSv3 that way.).

> always use strlen to get the real buffer length.
> 
>   snprintf(buffer, PAGE_SIZE, "From: %s\n", object);
>   len = strlen(buffer);
>   snprintf(buffer+len, PAGE_SIZE - len, "Signal: %s\n", signal);

Since "overflows" only occur if the destination buffer is full, one
could avoid the following snppritnf()s completely. Let alone the call of
strlen()
----  snip  ----
   len = snprintf(buffer, PAGE_SIZE, "From: %s\n", object);
   if (len < PAGE_SIZE) {
      len += snprintf(buffer+len, PAGE_SIZE - len,
                      "Signal: %s\n", signal);
   }
----  snip  ----

	Bernd
-- 
Firmix Software GmbH                   http://www.firmix.at/
mobil: +43 664 4416156                 fax: +43 1 7890849-55
          Embedded Linux Development and Services



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  3:14   ` Robert Love
  2004-07-24  9:15     ` Michael Clark
@ 2004-07-24 15:08     ` Deepak Saxena
  2004-07-24 15:45       ` Robert Love
  2004-08-09 13:29     ` Pavel Machek
  2 siblings, 1 reply; 41+ messages in thread
From: Deepak Saxena @ 2004-07-24 15:08 UTC (permalink / raw)
  To: Robert Love; +Cc: Michael Clark, akpm, linux-kernel

On Jul 23 2004, at 23:14, Robert Love was caught saying:
> On Sat, 2004-07-24 at 11:02 +0800, Michael Clark wrote:
> 
> > Should there be some sharing with the device naming of sysfs or are
> > will we introduce a new one? ie sysfs uses:
> >
> > devices/system/cpu/cpu0/<blah>
> >
> > Would it be a better way to have a version that takes struct kobject
> > to enforce consistency in the device naming scheme. This also means
> > userspace would automatically know where to look in /sys if futher
> > info was needed.
> 
> No, we want to give an interface that matches the sort of provider URI
> used by object systems such as CORBA, D-BUS, and DCOP.  We also do _not_
> want to put policy in the kernel.

What if I don't want something as heavyweight as D-BUS to handle this
for me and just want a simple parser that can tell me "this device
has x event". The kernel simply is telling user space that there
is an event and should not know/care how and by what it is handled.
Saying CORBA, D-BUS, DCOP use this naming scheme so we whould too
seems like policy to me. If I am a driver writer and I want to just 
send some state change notification, I do not want to care about such 
things. I want to be able to use a name that makes sense in the
context of the kernel and using a kobject sounds good b/c then I really
don't have to care about naming and will be less prone to errors
from typos and such. Remember, the user space deamons are not the ones 
that will actually call these functions. It is kernel code and the API
needs to be easilly useable by kernel programers.

> The easiest way to avoid that is simply to use a name similar to the
> path name.

What is the path name of a device from the kernels point of view?
Since device naming in /dev is left up to userland now, it has to
be something else that the kernel is aware of.

> Passing the sysfs name would probably be a good potential argument to
> the signal, though.  The temperature signal in the patch is just an
> example.

That sounds good, but what about a radically different approach?

What we are fundamentally trying to do is notify user space that a 
specific attribute of a specific object has had a state change. In your
example, the object is the cpu, and the attribute is "temperature". Instead 
of telling the user space daemon that the temperature is "high" (which is 
an incredibly arbitrary string), we pass the object name and attribute name 
to user space.  User space can then go read the appropriate sysfs file or take 
whatever other action is required to determine what the state change actually 
is.  In the case of a file close, the object name is the file path and the 
attribute could be the ctime, but it needs more thinking.  

> > Question is does it make sense to use this infrastructure without sysfs
> > as hald, etc require it. ie depends CONFIG_SYSFS
> 
> That sounds like policy to me.

How is this policy? We are simply saying this subsystem in the kernel
depends on having this other subystem in the kernel. JFFS2 requires
MTD to be configured since it is layered atop that subsystem.  I think
this would be no different. 

~Deepak

-- 
Deepak Saxena - dsaxena at plexity dot net - http://www.plexity.net/

"Unlike me, many of you have accepted the situation of your imprisonment and
 will die here like rotten cabbages." - Number 6

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24 15:08     ` Deepak Saxena
@ 2004-07-24 15:45       ` Robert Love
  2004-07-24 17:33         ` Ryan Anderson
                           ` (2 more replies)
  0 siblings, 3 replies; 41+ messages in thread
From: Robert Love @ 2004-07-24 15:45 UTC (permalink / raw)
  To: dsaxena; +Cc: Michael Clark, akpm, linux-kernel

On Sat, 2004-07-24 at 08:08 -0700, Deepak Saxena wrote:

> > No, we want to give an interface that matches the sort of provider URI
> > used by object systems such as CORBA, D-BUS, and DCOP.  We also do _not_
> > want to put policy in the kernel.
> 
> What if I don't want something as heavyweight as D-BUS to handle this
> for me and just want a simple parser that can tell me "this device
> has x event". The kernel simply is telling user space that there
> is an event and should not know/care how and by what it is handled.
> Saying CORBA, D-BUS, DCOP use this naming scheme so we whould too
> seems like policy to me. If I am a driver writer and I want to just 
> send some state change notification, I do not want to care about such 
> things. I want to be able to use a name that makes sense in the
> context of the kernel and using a kobject sounds good b/c then I really
> don't have to care about naming and will be less prone to errors
> from typos and such. Remember, the user space deamons are not the ones 
> that will actually call these functions. It is kernel code and the API
> needs to be easilly useable by kernel programers.

Not everything has a corresponding sysfs name, which really makes the
whole notion moot.

> > The easiest way to avoid that is simply to use a name similar to the
> > path name.
> 
> What is the path name of a device from the kernels point of view?
> Since device naming in /dev is left up to userland now, it has to
> be something else that the kernel is aware of.

I might not of been clear - path name of the file in the kernel source
tree.  So if you add an event to fs/open.c the path is
"/org/kernel/fs/open".  This is a pretty generic naming scheme that
ensures names will be unique within the kernel and will not conflict
with names outside the kernel (e.g. the global URI space of whatever is
used in user-space).

> > Passing the sysfs name would probably be a good potential argument to
> > the signal, though.  The temperature signal in the patch is just an
> > example.
> 
> That sounds good, but what about a radically different approach?
> 
> What we are fundamentally trying to do is notify user space that a 
> specific attribute of a specific object has had a state change. In your
> example, the object is the cpu, and the attribute is "temperature". Instead 
> of telling the user space daemon that the temperature is "high" (which is 
> an incredibly arbitrary string), we pass the object name and attribute name 
> to user space.  User space can then go read the appropriate sysfs file or take 
> whatever other action is required to determine what the state change actually 
> is.

Agreed.  Not passing the data and just passing a "change occurred" flag
is a good idea in many cases.  For example, for "new filesystem mounted"
I think it makes most sense to just send out a "new filesystem mounted"
signal and not include the data.  Let user-space rescan /proc/mtab in
response.

But we cannot do that for everything.

"high" is only an arbitrary string if it is not standardized.  If the
temperature event is defined to come from such and such an interface,
with such and such values, it is all very easy to use.  I mean, this is
how object systems work today.

> In the case of a file close, the object name is the file path and the 
> attribute could be the ctime, but it needs more thinking.  

This is where the sysfs naming scheme breaks down.  You now have two
different namespaces - kobjects and files on a filesystem.  We really
need something a lot simpler.  Let user-space map stuff around if we
want that.

> > > Question is does it make sense to use this infrastructure without sysfs
> > > as hald, etc require it. ie depends CONFIG_SYSFS
> > 
> > That sounds like policy to me.
> 
> How is this policy? We are simply saying this subsystem in the kernel
> depends on having this other subystem in the kernel. JFFS2 requires
> MTD to be configured since it is layered atop that subsystem.  I think
> this would be no different. 

It is policy because the question was saying "since A is usually used
with B in the context of foo, make A require B."  But
CONFIG_KERNEL_EVENTS does not require sysfs in any way.  Why force there
use together?

Best,

	Robert Love

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24 15:45       ` Robert Love
@ 2004-07-24 17:33         ` Ryan Anderson
  2004-07-24 17:46         ` Tim Hockin
  2004-07-24 17:54         ` Deepak Saxena
  2 siblings, 0 replies; 41+ messages in thread
From: Ryan Anderson @ 2004-07-24 17:33 UTC (permalink / raw)
  To: linux-kernel

On Sat, Jul 24, 2004 at 11:45:53AM -0400, Robert Love wrote:
> On Sat, 2004-07-24 at 08:08 -0700, Deepak Saxena wrote:
> > > The easiest way to avoid that is simply to use a name similar to the
> > > path name.
> > 
> > What is the path name of a device from the kernels point of view?
> > Since device naming in /dev is left up to userland now, it has to
> > be something else that the kernel is aware of.
> 
> I might not of been clear - path name of the file in the kernel source
> tree.  So if you add an event to fs/open.c the path is
> "/org/kernel/fs/open".  This is a pretty generic naming scheme that
> ensures names will be unique within the kernel and will not conflict
> with names outside the kernel (e.g. the global URI space of whatever is
> used in user-space).

So, when I do something like
	mv kernel/fs/x.c kernel/fs/y.c

I also have to do:
	sed -i -e s/kernel\/fs\/x.c/kernel\/fs\/y.c/g kernel/fs/y.c

Won't that, in effect, be breaking a defacto userspace API by changing
message paths, even if the semantic meaning, cause and possible
resolutions are all unchanged?

-- 

Ryan Anderson

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24 15:45       ` Robert Love
  2004-07-24 17:33         ` Ryan Anderson
@ 2004-07-24 17:46         ` Tim Hockin
  2004-07-24 18:19           ` Robert Love
  2004-07-24 17:54         ` Deepak Saxena
  2 siblings, 1 reply; 41+ messages in thread
From: Tim Hockin @ 2004-07-24 17:46 UTC (permalink / raw)
  To: Robert Love; +Cc: dsaxena, Michael Clark, akpm, linux-kernel

On Sat, Jul 24, 2004 at 11:45:53AM -0400, Robert Love wrote:
> 
> Not everything has a corresponding sysfs name, which really makes the
> whole notion moot.

The things that do can use it, though.  Here's a place where inconsistency
(if present) is pointless.1

> I might not of been clear - path name of the file in the kernel source
> tree.  So if you add an event to fs/open.c the path is
> "/org/kernel/fs/open".  This is a pretty generic naming scheme that
> ensures names will be unique within the kernel and will not conflict
> with names outside the kernel (e.g. the global URI space of whatever is
> used in user-space).

This immediately strikes me as a really bad idea.  Stuff moves between
files.  Two files might really want to signal an event from the same
source.  

> "high" is only an arbitrary string if it is not standardized.  If the
> temperature event is defined to come from such and such an interface,
> with such and such values, it is all very easy to use.  I mean, this is
> how object systems work today.

As long as we're religious about making every subsystem standardize these
names, it should be ok.  Another reason to macro-ize.  There are way too
many people touching too much code that might take advantage of a generic
kernel->user event to rely on soft rules.

> > In the case of a file close, the object name is the file path and the 
> > attribute could be the ctime, but it needs more thinking.  
> 
> This is where the sysfs naming scheme breaks down.  You now have two
> different namespaces - kobjects and files on a filesystem.  We really
> need something a lot simpler.  Let user-space map stuff around if we
> want that.

Assuming that we're going to be doing file notifications via this,
wouldn't something like "/fs/file close /path/to/foo" be right(er)?

Cheers
Tim

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24 15:45       ` Robert Love
  2004-07-24 17:33         ` Ryan Anderson
  2004-07-24 17:46         ` Tim Hockin
@ 2004-07-24 17:54         ` Deepak Saxena
  2004-07-24 18:13           ` Robert Love
  2 siblings, 1 reply; 41+ messages in thread
From: Deepak Saxena @ 2004-07-24 17:54 UTC (permalink / raw)
  To: Robert Love; +Cc: Michael Clark, akpm, linux-kernel

On Jul 24 2004, at 11:45, Robert Love was caught saying:
> 
> > > The easiest way to avoid that is simply to use a name similar to the
> > > path name.
> > 
> > What is the path name of a device from the kernels point of view?
> > Since device naming in /dev is left up to userland now, it has to
> > be something else that the kernel is aware of.
> 
> I might not of been clear - path name of the file in the kernel source
> tree.  So if you add an event to fs/open.c the path is
> "/org/kernel/fs/open".  This is a pretty generic naming scheme that
> ensures names will be unique within the kernel and will not conflict
> with names outside the kernel (e.g. the global URI space of whatever is
> used in user-space).

Oh ok, that makes much more sense now. "arch/kerne/cpu" is the
name of the file, from which that message came.

> > an incredibly arbitrary string), we pass the object name and attribute name 
> > to user space.  User space can then go read the appropriate sysfs file or take 
> > whatever other action is required to determine what the state change actually 
> > is.
> 
> Agreed.  Not passing the data and just passing a "change occurred" flag
> is a good idea in many cases.  For example, for "new filesystem mounted"
> I think it makes most sense to just send out a "new filesystem mounted"
> signal and not include the data.  Let user-space rescan /proc/mtab in
> response.
> 
> But we cannot do that for everything.
> 
> "high" is only an arbitrary string if it is not standardized.  If the
> temperature event is defined to come from such and such an interface,
> with such and such values, it is all very easy to use.  I mean, this is
> how object systems work today.

I think we agree.  So are there some existing docs that you/Ximian has 
on reccomended usage and object naming? I didn't see anything on 
freedesktop.org.  That's where a lot of my questions are coming from. We 
have this really simple events system, but how do we expect it to be used
in the kernel.

~Deepak


~Deepak

-- 
Deepak Saxena - dsaxena at plexity dot net - http://www.plexity.net/

"Unlike me, many of you have accepted the situation of your imprisonment and
 will die here like rotten cabbages." - Number 6

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24 17:54         ` Deepak Saxena
@ 2004-07-24 18:13           ` Robert Love
  2004-07-26 20:08             ` Rutger Nijlunsing
  0 siblings, 1 reply; 41+ messages in thread
From: Robert Love @ 2004-07-24 18:13 UTC (permalink / raw)
  To: dsaxena; +Cc: Michael Clark, akpm, linux-kernel

On Sat, 2004-07-24 at 10:54 -0700, Deepak Saxena wrote:

> Oh ok, that makes much more sense now. "arch/kerne/cpu" is the
> name of the file, from which that message came.

Yah.  It is pretty simple and it gives us a unique name without imposing
any naming policy.

I have, however, been thinking about using kobject paths. ;-)  I
actually like the idea now, but I do not think we can get a kobject for
most of the stuff we need, unfortunately.  Also, we need the kobject
name to be unique.  This is an interesting concept to keep in mind,
though.

> I think we agree.  So are there some existing docs that you/Ximian has 
> on reccomended usage and object naming? I didn't see anything on 
> freedesktop.org.  That's where a lot of my questions are coming from. We 
> have this really simple events system, but how do we expect it to be used
> in the kernel.

No, we don't have any usage recommendations.  Going forward, this is
something we all need to work on and agree with.

All I want is a way to get events to user-space asynchronously without
any hacks.  This fits the bill nicely.  ;-)

That said, I do have some basic ideas about usage.  I see two main uses,
asynchronous events (such as, filesystem mounted) and the more specific
case of errors (such as device failure).

Criteria for adding the event would be that user-space needs to know
about it, and would normally have to poll to get the information.  If
the event is so non-important that right now no one even knows about it
or cares about it, it may not be worth adding.

But let's look at filesystem mounted, since many user-space applications
are interested in this.  Right now, they poll /proc/mtab every few
seconds, parse it, and look for changes.  Gross, right?

So we can create an event in fs/mount.c, say "/org/kernel/fs/mount" with
the signal "change".  The payload could be a simple "mounted" and
"unmounted" or the exact details on what was mounted or unmounted or
even nothing.  I'd prefer to give no information and just have the event
cause a re-read of /proc/mtab.  But whatever.

So we want up with, in do_mount,

	send_kevent (KEVENT_FS, "/org/kernel/fs/mount",
		     "change", "mounted");

Or similar.

	Robert Love

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24 17:46         ` Tim Hockin
@ 2004-07-24 18:19           ` Robert Love
  2004-07-25 18:11             ` Tim Hockin
  0 siblings, 1 reply; 41+ messages in thread
From: Robert Love @ 2004-07-24 18:19 UTC (permalink / raw)
  To: Tim Hockin; +Cc: dsaxena, Michael Clark, akpm, linux-kernel

On Sat, 2004-07-24 at 10:46 -0700, Tim Hockin wrote:

Hey, Tim.

> The things that do can use it, though.  Here's a place where inconsistency
> (if present) is pointless.1

If some things can use the kobject path, we can use it in the argument
field.  I am cool with that - that is exactly what I want, in fact.  But
what we use as the naming convention needs to be something we can use
uniformly.  Unfortunately not everything has a kobject backing it, and
we cannot change that.

> This immediately strikes me as a really bad idea.  Stuff moves between
> files.  Two files might really want to signal an event from the same
> source.  

The signal name would be different.

> As long as we're religious about making every subsystem standardize these
> names, it should be ok.  Another reason to macro-ize.  There are way too
> many people touching too much code that might take advantage of a generic
> kernel->user event to rely on soft rules.

I like your macro-izing idea and the notion of standardizing.  Someone
else brought up a good example: we want _all_ disk drivers to emit the
exact same signal for e.g. "disk full" so user-space can react to it.
It needs to be consistent.  At least for driver error logging, we
definitely want standards and macro-izing.  The translation point is
another good reason for it.

	Robert Love

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  6:02             ` Robert Love
  2004-07-24  9:43               ` Wichert Akkerman
@ 2004-07-24 20:21               ` James Morris
  2004-07-25  2:12                 ` Robert Love
  1 sibling, 1 reply; 41+ messages in thread
From: James Morris @ 2004-07-24 20:21 UTC (permalink / raw)
  To: Robert Love; +Cc: Andrew Morton, kaos, da-x, linux-kernel

On Sat, 24 Jul 2004, Robert Love wrote:

> +static int kevent_init(void)
> +{
> +	kevent_sock = netlink_kernel_create(NETLINK_KEVENT, netlink_receive);

Consider a NULL netlink_receive function, as you're dropping any received
messages.  This will provide better interface semantics e.g.  connection
refused message on message transmission to kernel.


- James
-- 
James Morris
<jmorris@redhat.com>


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24 20:21               ` James Morris
@ 2004-07-25  2:12                 ` Robert Love
  0 siblings, 0 replies; 41+ messages in thread
From: Robert Love @ 2004-07-25  2:12 UTC (permalink / raw)
  To: James Morris; +Cc: Andrew Morton, kaos, da-x, linux-kernel

On Sat, 2004-07-24 at 16:21 -0400, James Morris wrote:

> Consider a NULL netlink_receive function, as you're dropping any received
> messages.  This will provide better interface semantics e.g.  connection
> refused message on message transmission to kernel.

Nice feature.   Thanks.

Updated patch follows.

	Robert Love


Kernel to user-space communication layer using netlink
Signed-off-by: Robert Love <rml@ximian.com>

 arch/i386/kernel/cpu/mcheck/p4.c |    9 ++
 include/linux/kevent.h           |   39 +++++++++++
 include/linux/netlink.h          |    1 
 init/Kconfig                     |   14 ++++
 kernel/Makefile                  |    1 
 kernel/kevent.c                  |  132 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 195 insertions(+), 1 deletion(-)

diff -urN linux-2.6.8-rc2/arch/i386/kernel/cpu/mcheck/p4.c linux/arch/i386/kernel/cpu/mcheck/p4.c
--- linux-2.6.8-rc2/arch/i386/kernel/cpu/mcheck/p4.c	2004-06-16 01:19:37.000000000 -0400
+++ linux/arch/i386/kernel/cpu/mcheck/p4.c	2004-07-24 20:41:56.000000000 -0400
@@ -9,6 +9,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/smp.h>
+#include <linux/kevent.h>
 
 #include <asm/processor.h> 
 #include <asm/system.h>
@@ -59,9 +60,15 @@
 	if (l & 0x1) {
 		printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
 		printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
-				cpu);
+			cpu);
+		send_kevent(KEVENT_GENERAL,
+			"/org/kernel/arch/kernel/cpu/temperature", "high",
+			"Cpu: %d\n", cpu);
 	} else {
 		printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+		send_kevent(KEVENT_GENERAL,
+			"/org/kernel/arch/kernel/cpu/temperature", "normal",
+			"Cpu: %d\n", cpu);
 	}
 }
 
diff -urN linux-2.6.8-rc2/include/linux/kevent.h linux/include/linux/kevent.h
--- linux-2.6.8-rc2/include/linux/kevent.h	1969-12-31 19:00:00.000000000 -0500
+++ linux/include/linux/kevent.h	2004-07-24 20:41:56.000000000 -0400
@@ -0,0 +1,39 @@
+#ifndef _LINUX_KEVENT_H
+#define _LINUX_KEVENT_H
+
+#include <linux/config.h>
+
+/* kevent types - these are used as the multicast group */
+enum kevent {
+	KEVENT_GENERAL	=	0,
+	KEVENT_STORAGE	=	1,
+	KEVENT_POWER	=	2,
+	KEVENT_FS	= 	3,
+	KEVENT_HOTPLUG	=	4,
+};
+
+#ifdef CONFIG_KERNEL_EVENTS
+
+int send_kevent(enum kevent type, const char *object,
+		const char *signal, const char *fmt, ...);
+
+int send_kevent_atomic(enum kevent type, const char *object,
+		const char *signal, const char *fmt, ...);
+
+#else
+
+static inline int send_kevent(enum kevent type,  const char *object,
+		const char *signal, const char *fmt, ...)
+{
+	return 0;
+}
+
+static inline int send_kevent_atomic(enum kevent type, const char *object,
+		const char *signal, const char *fmt, ...)
+{
+	return 0;
+}
+
+#endif /* ! CONFIG_KERNEL_EVENTS */
+
+#endif	/* _LINUX_KEVENT_H */
diff -urN linux-2.6.8-rc2/include/linux/netlink.h linux/include/linux/netlink.h
--- linux-2.6.8-rc2/include/linux/netlink.h	2004-07-23 22:18:04.000000000 -0400
+++ linux/include/linux/netlink.h	2004-07-24 20:41:56.000000000 -0400
@@ -17,6 +17,7 @@
 #define NETLINK_ROUTE6		11	/* af_inet6 route comm channel */
 #define NETLINK_IP6_FW		13
 #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
+#define NETLINK_KEVENT		15	/* Kernel messages to userspace */
 #define NETLINK_TAPBASE		16	/* 16 to 31 are ethertap */
 
 #define MAX_LINKS 32		
diff -urN linux-2.6.8-rc2/init/Kconfig linux/init/Kconfig
--- linux-2.6.8-rc2/init/Kconfig	2004-07-23 22:18:04.000000000 -0400
+++ linux/init/Kconfig	2004-07-24 20:41:56.000000000 -0400
@@ -160,6 +160,20 @@
 	  logging of avc messages output).  Does not do system-call
 	  auditing without CONFIG_AUDITSYSCALL.
 
+config KERNEL_EVENTS
+	bool "Kernel Events Layer"
+	depends on NET
+	default y
+	help
+	  This option enables the kernel events layer, which is a simple
+	  mechanism for kernel-to-user communication over a netlink socket.
+	  The goal of the kernel events layer is to provide a simple and
+	  efficient logging, error, and events system.  Specifically, code
+	  is available to link the events into D-BUS.  Say Y, unless you
+	  are building a system requiring minimal memory consumption.
+
+	  D-BUS is available at http://dbus.freedesktop.org/
+
 config AUDITSYSCALL
 	bool "Enable system-call auditing support"
 	depends on AUDIT && (X86 || PPC64 || ARCH_S390 || IA64)
diff -urN linux-2.6.8-rc2/kernel/kevent.c linux/kernel/kevent.c
--- linux-2.6.8-rc2/kernel/kevent.c	1969-12-31 19:00:00.000000000 -0500
+++ linux/kernel/kevent.c	2004-07-24 20:42:19.000000000 -0400
@@ -0,0 +1,132 @@
+/*
+ * kernel/kevent.c - kernel event delivery over a netlink socket
+ * 
+ * Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+ *
+ * Licensed under the GNU GPL v2.
+ *
+ * Authors:
+ *	Arjan van de Ven	<arjanv@redhat.com>
+ *	Kay Sievers		<kay.sievers@vrfy.org>
+ *	Robert Love		<rml@novell.com>
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/string.h>
+#include <linux/kevent.h>
+#include <net/sock.h>
+
+/* There is one global netlink socket */
+static struct sock *kevent_sock = NULL;
+
+static int netlink_send(__u32 groups, int gfp_mask, const char *buffer, int len)
+{
+	struct sk_buff *skb;
+	char *data_start;
+
+	if (!kevent_sock)
+		return -EIO;
+
+	if (!buffer)
+		return -EINVAL;
+
+	if (len > PAGE_SIZE)
+		return -EINVAL;
+
+	skb = alloc_skb(len, gfp_mask);
+	if (!skb)
+		return -ENOMEM;
+	data_start = skb_put(skb, len);
+	memcpy(data_start, buffer, len);
+
+	return netlink_broadcast(kevent_sock, skb, 0, groups, gfp_mask);
+}
+
+static int do_send_kevent(enum kevent type, int gfp_mask, const char *object,
+			  const char *signal, const char *fmt, va_list args)
+{
+	char *buffer;
+	int len;
+	int ret;
+
+	if (!object)
+		return -EINVAL;
+
+	if (!signal)
+		return -EINVAL;
+
+	if (strlen(object) > PAGE_SIZE)
+		return -EINVAL;
+
+	buffer = (char *) alloc_page(gfp_mask);
+	if (!buffer)
+		return -ENOMEM;
+
+	snprintf(buffer, PAGE_SIZE, "From: %s\nSignal: %s\n", object, signal);
+	len = strlen(buffer);
+
+	/* possible auxiliary data */
+	if (fmt)
+		len += vscnprintf(buffer+len, PAGE_SIZE-len-1, fmt, args);
+	buffer[len++] = '\0';
+
+	ret = netlink_send((1 << type), gfp_mask, buffer, len);
+	free_page((unsigned long) buffer);
+
+	return ret;
+}
+
+/**
+ * send_kevent - send a message to user-space via the kernel events layer
+ */
+int send_kevent(enum kevent type, const char *object,
+		const char *signal, const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = do_send_kevent(type, GFP_KERNEL, object, signal, fmt, args);
+	va_end(args);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(send_kevent);
+
+/**
+ * send_kevent_atomic - send a message to user-space via the kernel events layer
+ */
+int send_kevent_atomic(enum kevent type, const char *object,
+		const char *signal, const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = do_send_kevent(type, GFP_ATOMIC, object, signal, fmt, args);
+	va_end(args);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(send_kevent_atomic);
+
+static int kevent_init(void)
+{
+	kevent_sock = netlink_kernel_create(NETLINK_KEVENT, NULL);
+
+	if (!kevent_sock) {
+		printk(KERN_ERR "kevent: "
+		       "unable to create netlink socket; aborting\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+module_init(kevent_init);
diff -urN linux-2.6.8-rc2/kernel/Makefile linux/kernel/Makefile
--- linux-2.6.8-rc2/kernel/Makefile	2004-07-23 22:18:04.000000000 -0400
+++ linux/kernel/Makefile	2004-07-24 20:41:56.000000000 -0400
@@ -23,6 +23,7 @@
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_AUDIT) += audit.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+obj-$(CONFIG_KERNEL_EVENTS) += kevent.o
 
 ifneq ($(CONFIG_IA64),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24 18:19           ` Robert Love
@ 2004-07-25 18:11             ` Tim Hockin
  2004-07-25 19:08               ` Robert Love
  0 siblings, 1 reply; 41+ messages in thread
From: Tim Hockin @ 2004-07-25 18:11 UTC (permalink / raw)
  To: Robert Love; +Cc: dsaxena, Michael Clark, akpm, linux-kernel

On Sat, Jul 24, 2004 at 02:19:43PM -0400, Robert Love wrote:
> If some things can use the kobject path, we can use it in the argument
> field.  I am cool with that - that is exactly what I want, in fact.  But
> what we use as the naming convention needs to be something we can use
> uniformly.  Unfortunately not everything has a kobject backing it, and
> we cannot change that.

yeah.  I suppse that's true.  What's the meaning of the 'source' object,
generically?

> > This immediately strikes me as a really bad idea.  Stuff moves between
> > files.  Two files might really want to signal an event from the same
> > source.  
> 
> The signal name would be different.

I'm not sure why, yet, it just seems like a bad idea.

> I like your macro-izing idea and the notion of standardizing.  Someone
> else brought up a good example: we want _all_ disk drivers to emit the
> exact same signal for e.g. "disk full" so user-space can react to it.
> It needs to be consistent.  At least for driver error logging, we
> definitely want standards and macro-izing.  The translation point is
> another good reason for it.

So, when I was at Sun (no, I'm not at Sun anymore) there was lots of talk
of driver hardening and fault management.  At the time, the team in
question looked at the various event systems that currently exist in the
kernel or in some patches.  This list might be incomplete, but it's off
the top of my head.

- Netlink
- ACPI (/proc/acpi/event)
- hotplug
- IPMI (not merged maybe?)
- relayfs (not merged)
- evlog (last I saw, this was in big flux)

Now you're proposing netlink as the kevent subsystem.

Wouldn't it be nice if everything could converge?  Ok maybe not
EVERYTHING, but some of these?

These are the things I can see kevents being used for:

- Stateless messages which only matter if someone is listening.  Examples
  of this are "media changed" and stuff like that.

- Fault and error that matter no matter what, and can not afford to be
  dropped.  Examples are things like ECC errors, significant
  driver/subsystem errors, etc.

- System state messages, which really do want someone to be listening, but
  are otherwise discoverable.  Examples of this are "disk full" and
  similar.

So can kevents be used for all of these?  The fact that netlink does not
buffer events if there are no listeners (not saying it should..) makes it
unreliable for fault events.  Can these all be converged?

Sorry for rambling - kernel events has been on my mind for some time.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-25 18:11             ` Tim Hockin
@ 2004-07-25 19:08               ` Robert Love
  2004-07-27  5:09                 ` Daniel Stekloff
  0 siblings, 1 reply; 41+ messages in thread
From: Robert Love @ 2004-07-25 19:08 UTC (permalink / raw)
  To: Tim Hockin; +Cc: dsaxena, Michael Clark, akpm, linux-kernel

On Sun, 2004-07-25 at 11:11 -0700, Tim Hockin wrote:

> yeah.  I suppse that's true.  What's the meaning of the 'source' object,
> generically?

The source of the signal.  The object, in fact - very similar to kobject
in concept, but we cannot use that since one does not exist for all uses
(plus, Greg tells me it would be expensive to constantly build the
kobject path for each event).

The only hard requirement is that it is unique, like any URI.  But it
would be nice if it was self-descriptive and double nice if it looked
like URI's used by other object systems.  The "drivers/char/cdrom" style
fits that bill.

> So, when I was at Sun (no, I'm not at Sun anymore) there was lots of talk
> of driver hardening and fault management.  At the time, the team in
> question looked at the various event systems that currently exist in the
> kernel or in some patches.  This list might be incomplete, but it's off
> the top of my head.
> 
> - Netlink
> - ACPI (/proc/acpi/event)
> - hotplug
> - IPMI (not merged maybe?)
> - relayfs (not merged)
> - evlog (last I saw, this was in big flux)
> 
> Now you're proposing netlink as the kevent subsystem.
> 
> Wouldn't it be nice if everything could converge?  Ok maybe not
> EVERYTHING, but some of these?

Yup.

So the last two are not being merged.  It seems unlikely that they will
be, but if they were, we could use them as the backbone of the event
system.  The medium is not really what interests me (or you, either - I
think we both are interested in the results, right?).  Technically
speaking, though, netlink does seem the best choice.  I look at kevent
as serving the same purpose as these last two.

I don't know much about IPMI, but I thought it was a hardware spec.  I'm
not sure it counts here.  If it communicates with user-space, it would
be nice if it used netlink or /proc or even kevent and not its own
thing.

So that leaves ACPI and hotplug.  ACPI absolutely should switch to using
kevent.  It is the perfect user, right?  Send the ACPI events out via
kevent.  You wouldn't even need acpid - just have policy agents
listening on D-BUS or directly on the netlink socket or whatever.

As for hotplug, I'll leave that one to Greg.  He has some thoughts here.

> These are the things I can see kevents being used for:
> 
> - Stateless messages which only matter if someone is listening.  Examples
>   of this are "media changed" and stuff like that.
> 
> - Fault and error that matter no matter what, and can not afford to be
>   dropped.  Examples are things like ECC errors, significant
>   driver/subsystem errors, etc.
> 
> - System state messages, which really do want someone to be listening, but
>   are otherwise discoverable.  Examples of this are "disk full" and
>   similar.
> 
> So can kevents be used for all of these?  The fact that netlink does not
> buffer events if there are no listeners (not saying it should..) makes it
> unreliable for fault events.  Can these all be converged?

I think kevents can (and should) be used for all of these.  But the lack
of buffering is something we need to look into.

> Sorry for rambling - kernel events has been on my mind for some time.

Mine too.  All good input, thanks.

	Robert Love

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24 18:13           ` Robert Love
@ 2004-07-26 20:08             ` Rutger Nijlunsing
  2004-07-26 20:10               ` Robert Love
  0 siblings, 1 reply; 41+ messages in thread
From: Rutger Nijlunsing @ 2004-07-26 20:08 UTC (permalink / raw)
  To: Robert Love; +Cc: dsaxena, Michael Clark, akpm, linux-kernel

[snip]
> 
> Criteria for adding the event would be that user-space needs to know
> about it, and would normally have to poll to get the information.  If
> the event is so non-important that right now no one even knows about it
> or cares about it, it may not be worth adding.

So the events are some kind of structured printk()s, right? So why not
printk() as a side-effect of sending an event. Then we could change
relevant printk()s (but not the debug ones for example) and thereby
remove the existing printk() and (re)structure them in the process.

And if this (together with a file-changed notifier) could help me stop
polling 28 files once a second for events (/var/log recursively,
/proc/modules, /proc/mounts, /proc/net/arp and 'netstat -ltup' output)
I would be really happy...

-- 
Rutger Nijlunsing ---------------------------- rutger ed tux tmfweb nl
never attribute to a conspiracy which can be explained by incompetence
----------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-26 20:08             ` Rutger Nijlunsing
@ 2004-07-26 20:10               ` Robert Love
  0 siblings, 0 replies; 41+ messages in thread
From: Robert Love @ 2004-07-26 20:10 UTC (permalink / raw)
  To: linux-kernel; +Cc: dsaxena, Michael Clark, akpm, linux-kernel

On Mon, 2004-07-26 at 22:08 +0200, Rutger Nijlunsing wrote:

> So the events are some kind of structured printk()s, right? So why not
> printk() as a side-effect of sending an event. Then we could change
> relevant printk()s (but not the debug ones for example) and thereby
> remove the existing printk() and (re)structure them in the process.

I am not so sure I like this.  I want less printk's, not more.

Maybe we can add a send_event_and_printk() if the demand is high.  That
is fine, but I do not want the default events to cause printks.  Printks
are usually human readable sentences, change often, terribly unstable,
etc.  The events should be more basic and stable.


> And if this (together with a file-changed notifier) could help me stop
> polling 28 files once a second for events (/var/log recursively,
> /proc/modules, /proc/mounts, /proc/net/arp and 'netstat -ltup' output)
> I would be really happy...

That is the idea ;-)

	Robert Love



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-25 19:08               ` Robert Love
@ 2004-07-27  5:09                 ` Daniel Stekloff
  0 siblings, 0 replies; 41+ messages in thread
From: Daniel Stekloff @ 2004-07-27  5:09 UTC (permalink / raw)
  To: Robert Love; +Cc: Tim Hockin, dsaxena, Michael Clark, akpm, linux-kernel

On Sun, 2004-07-25 at 12:08, Robert Love wrote:
> On Sun, 2004-07-25 at 11:11 -0700, Tim Hockin wrote:
> > So, when I was at Sun (no, I'm not at Sun anymore) there was lots of talk
> > of driver hardening and fault management.  At the time, the team in
> > question looked at the various event systems that currently exist in the
> > kernel or in some patches.  This list might be incomplete, but it's off
> > the top of my head.
> > 
> > - Netlink
> > - ACPI (/proc/acpi/event)
> > - hotplug
> > - IPMI (not merged maybe?)
> > - relayfs (not merged)
> > - evlog (last I saw, this was in big flux)
> > 
> > Now you're proposing netlink as the kevent subsystem.
> > 
> > Wouldn't it be nice if everything could converge?  Ok maybe not
> > EVERYTHING, but some of these?
> 
> Yup.
> 
> So the last two are not being merged.  It seems unlikely that they will
> be, but if they were, we could use them as the backbone of the event
> system.  The medium is not really what interests me (or you, either - I
> think we both are interested in the results, right?).  Technically
> speaking, though, netlink does seem the best choice.  I look at kevent
> as serving the same purpose as these last two.
> 
> I don't know much about IPMI, but I thought it was a hardware spec.  I'm
> not sure it counts here.  If it communicates with user-space, it would
> be nice if it used netlink or /proc or even kevent and not its own
> thing.
> 
> So that leaves ACPI and hotplug.  ACPI absolutely should switch to using
> kevent.  It is the perfect user, right?  Send the ACPI events out via
> kevent.  You wouldn't even need acpid - just have policy agents
> listening on D-BUS or directly on the netlink socket or whatever.
> 
> As for hotplug, I'll leave that one to Greg.  He has some thoughts here.
> 
> > These are the things I can see kevents being used for:
> > 
> > - Stateless messages which only matter if someone is listening.  Examples
> >   of this are "media changed" and stuff like that.
> > 
> > - Fault and error that matter no matter what, and can not afford to be
> >   dropped.  Examples are things like ECC errors, significant
> >   driver/subsystem errors, etc.
> > 
> > - System state messages, which really do want someone to be listening, but
> >   are otherwise discoverable.  Examples of this are "disk full" and
> >   similar.
> > 
> > So can kevents be used for all of these?  The fact that netlink does not
> > buffer events if there are no listeners (not saying it should..) makes it
> > unreliable for fault events.  Can these all be converged?
> 
> I think kevents can (and should) be used for all of these.  But the lack
> of buffering is something we need to look into.


There are other issues that we may need to look into as well, like:

1) How do we handle events that occur in a hardware interrupt context?
One thing that has been suggested in the past was to delay broadcast and
schedule a tasklet to do the broadcast. Or, can we make the netlink
broadcast interrupt safe?

2) What about the recent thread on firmware errors for ppc64 systems?
They have errors at early boot and those error messages may be in binary
format? Should we buffer messages until netlink becomes available and
then send the messages and remove the buffer? As for the binary format,
should we change kevents so format and args are kept separate to User
Space? This would have other benefits as well.

Just some thoughts....

Thanks,

Dan


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-07-24  3:14   ` Robert Love
  2004-07-24  9:15     ` Michael Clark
  2004-07-24 15:08     ` Deepak Saxena
@ 2004-08-09 13:29     ` Pavel Machek
  2004-08-09 19:47       ` Robert Love
  2 siblings, 1 reply; 41+ messages in thread
From: Pavel Machek @ 2004-08-09 13:29 UTC (permalink / raw)
  To: Robert Love; +Cc: Michael Clark, akpm, linux-kernel

Hi!

> > Should there be some sharing with the device naming of sysfs or are
> > will we introduce a new one? ie sysfs uses:
> >
> > devices/system/cpu/cpu0/<blah>
> >
> > Would it be a better way to have a version that takes struct kobject
> > to enforce consistency in the device naming scheme. This also means
> > userspace would automatically know where to look in /sys if futher
> > info was needed.
> 
> No, we want to give an interface that matches the sort of provider URI
> used by object systems such as CORBA, D-BUS, and DCOP.  We also do _not_
> want to put policy in the kernel.

Funny... you want to create new namespace and argue "no policy".

If you want to translate it to something URI-like, you should
do that in userspace. 

-- 
64 bytes from 195.113.31.123: icmp_seq=28 ttl=51 time=448769.1 ms         


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [patch] kernel events layer
  2004-08-09 13:29     ` Pavel Machek
@ 2004-08-09 19:47       ` Robert Love
  0 siblings, 0 replies; 41+ messages in thread
From: Robert Love @ 2004-08-09 19:47 UTC (permalink / raw)
  To: Pavel Machek; +Cc: Michael Clark, akpm, linux-kernel

On Mon, 2004-08-09 at 15:29 +0200, Pavel Machek wrote:

> Funny... you want to create new namespace and argue "no policy".

The idea was to create the simplest namespace possible (simply using the
source path names) since we need _something_ to send to user-space.

However ...

> If you want to translate it to something URI-like, you should
> do that in userspace. 

It is no longer an issue, because the current plan is to tie the events
to kobjects.

	Robert Love



^ permalink raw reply	[flat|nested] 41+ messages in thread

end of thread, other threads:[~2004-08-09 19:49 UTC | newest]

Thread overview: 41+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-07-23 17:41 [patch] kernel events layer Robert Love
2004-07-23 18:25 ` Tim Hockin
2004-07-23 18:31 ` Muli Ben-Yehuda
2004-07-23 18:35   ` Robert Love
2004-07-23 21:32 ` Dan Aloni
2004-07-24  2:47   ` Robert Love
2004-07-24  4:42     ` Keith Owens
2004-07-24  5:00       ` Robert Love
2004-07-24  8:11         ` Andrew Morton
2004-07-24  5:37           ` Robert Love
2004-07-24  6:02             ` Robert Love
2004-07-24  9:43               ` Wichert Akkerman
2004-07-24 20:21               ` James Morris
2004-07-25  2:12                 ` Robert Love
2004-07-24  6:53       ` Paul Jackson
2004-07-24 11:37       ` Bernd Petrovitsch
2004-07-24  3:02 ` Michael Clark
2004-07-24  3:14   ` Robert Love
2004-07-24  9:15     ` Michael Clark
2004-07-24 15:08     ` Deepak Saxena
2004-07-24 15:45       ` Robert Love
2004-07-24 17:33         ` Ryan Anderson
2004-07-24 17:46         ` Tim Hockin
2004-07-24 18:19           ` Robert Love
2004-07-25 18:11             ` Tim Hockin
2004-07-25 19:08               ` Robert Love
2004-07-27  5:09                 ` Daniel Stekloff
2004-07-24 17:54         ` Deepak Saxena
2004-07-24 18:13           ` Robert Love
2004-07-26 20:08             ` Rutger Nijlunsing
2004-07-26 20:10               ` Robert Love
2004-08-09 13:29     ` Pavel Machek
2004-08-09 19:47       ` Robert Love
2004-07-24  3:03 ` Andrew Morton
2004-07-24  2:14   ` Robert Love
2004-07-24  5:15     ` Chris Wedgwood
2004-07-24  5:41       ` Robert Love
2004-07-24  5:45         ` Chris Wedgwood
2004-07-24  3:11   ` [patch] kernel events layer, updated Robert Love
2004-07-24  7:58     ` Deepak Saxena
2004-07-24  8:23       ` Deepak Saxena

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox