PowerOP 1/3: PowerOP core

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* PowerOP 1/3: PowerOP core
@ 2005-08-09  2:51 Todd Poynor
  2005-08-09  7:32 ` Greg KH
  2005-08-09 16:07 ` [linux-pm] " Geoff Levand
  0 siblings, 2 replies; 8+ messages in thread
From: Todd Poynor @ 2005-08-09  2:51 UTC (permalink / raw)
  To: linux-kernel, linux-pm, cpufreq

The PowerOP core provides the struct powerop_point that defines an
operating point, and trivial routines for calling the platform-specific
backend to read and set operating points and for registering a single
machine-dependent backend.

Operating points are an array of 32-bit integer-valued power parameters,
almost entirely interpreted by the platform-specific backend.
Higher-layer software shares information on the power parameters made
available by the backend (that is, the interpretation of the integers in
the array) via header files.  The value -1 is commonly used to denote an
unspecified value, but in situations where all ones is a valid power
parameter value some extra smarts may be needed.

It optionally adds sysfs attributes for reading and writing individual
power parameter values, mainly for diagnostic purposes.  For example,
using the Intel Centrino patch that follows, parameters for frequency
and core voltage for CPU #0 appear:

   # ls /sys/powerop/param/
   cpu0 v0

Index: linux-2.6.12/drivers/Makefile
===================================================================
--- linux-2.6.12.orig/drivers/Makefile	2005-06-30 01:32:17.000000000 +0000
+++ linux-2.6.12/drivers/Makefile	2005-07-29 00:39:44.000000000 +0000
@@ -58,6 +58,7 @@
 obj-$(CONFIG_ISDN)		+= isdn/
 obj-$(CONFIG_MCA)		+= mca/
 obj-$(CONFIG_EISA)		+= eisa/
+obj-$(CONFIG_POWEROP)		+= powerop/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_MMC)		+= mmc/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
Index: linux-2.6.12/drivers/powerop/Kconfig
===================================================================
--- linux-2.6.12.orig/drivers/powerop/Kconfig	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.12/drivers/powerop/Kconfig	2005-07-28 07:27:26.000000000 +0000
@@ -0,0 +1,17 @@
+#
+# powerop
+#
+
+menu "Platform Core Power Management"
+
+config POWEROP
+	bool "PowerOP Platform Core Power Management"
+	help
+
+config POWEROP_SYSFS
+	bool "  Enable PowerOP sysfs interface"
+	depends on POWEROP && SYSFS
+	help
+
+endmenu
+
Index: linux-2.6.12/drivers/powerop/Makefile
===================================================================
--- linux-2.6.12.orig/drivers/powerop/Makefile	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.12/drivers/powerop/Makefile	2005-07-28 07:29:04.000000000 +0000
@@ -0,0 +1,2 @@
+obj-$(CONFIG_POWEROP)		+= powerop.o
+
Index: linux-2.6.12/drivers/powerop/powerop.c
===================================================================
--- linux-2.6.12.orig/drivers/powerop/powerop.c	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.12/drivers/powerop/powerop.c	2005-08-04 19:50:38.000000000 +0000
@@ -0,0 +1,253 @@
+/*
+ * PowerOP generic core functions
+ *
+ * Author: Todd Poynor <tpoynor@mvista.com>
+ *
+ * 2005 (c) MontaVista Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/powerop.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+static struct powerop_driver *powerop_driver;
+static int powerop_subsys_init;
+
+int
+powerop_set_point(struct powerop_point *point)
+{
+	return powerop_driver && powerop_driver->set_point ? 
+		powerop_driver->set_point(point) : -EINVAL;
+}
+
+
+int
+powerop_get_point(struct powerop_point *point)
+{
+	return powerop_driver && powerop_driver->get_point ? 
+		powerop_driver->get_point(point) : -EINVAL;
+}
+
+
+EXPORT_SYMBOL_GPL(powerop_set_point);
+EXPORT_SYMBOL_GPL(powerop_get_point);
+
+#ifdef CONFIG_POWEROP_SYSFS
+decl_subsys(powerop, NULL, NULL);
+
+static void powerop_kobj_release(struct kobject *kobj)
+{
+	return;
+}
+
+struct powerop_param_attribute {
+	int index;
+        struct attribute        attr;
+};
+
+#define to_param_attr(_attr) container_of(_attr,\
+	struct powerop_param_attribute,attr)
+
+static ssize_t
+powerop_param_attr_show(struct kobject * kobj, struct attribute * attr,
+			char * buf)
+{
+	struct powerop_param_attribute * param_attr = to_param_attr(attr);
+	struct powerop_point point;
+	ssize_t ret = 0;
+
+	if ((ret = powerop_get_point(&point)) == 0)
+		ret = sprintf(buf, "%d\n", point.param[param_attr->index]);
+	return ret;
+}
+
+static ssize_t
+powerop_param_attr_store(struct kobject * kobj, struct attribute * attr, 
+			 const char * buf, size_t count)
+{
+	struct powerop_param_attribute * param_attr = to_param_attr(attr);
+	struct powerop_point point;
+	int i;
+	ssize_t ret = 0;
+
+	for (i = 0; i < powerop_driver->nr_params; i++) 
+		if (i == param_attr->index)
+			point.param[i] = simple_strtol(buf,NULL,0);
+		else
+			point.param[i] = -1;
+
+	if ((ret = powerop_set_point(&point)) == 0)
+		ret = count;
+
+	return ret;
+}
+
+static struct sysfs_ops powerop_param_attr_sysfs_ops = {
+	.show	= powerop_param_attr_show,
+	.store	= powerop_param_attr_store,
+};
+
+static struct kobj_type ktype_powerop_driver = {
+	.release        = powerop_kobj_release,
+	.sysfs_ops	= &powerop_param_attr_sysfs_ops,
+};
+
+static struct powerop_param_attribute *param_attr[POWEROP_DRIVER_MAX_PARAMS];
+
+static int powerop_driver_sysfs_register(struct powerop_driver *powerop_driver)
+{
+        int error, i;
+
+	if (! powerop_subsys_init)
+		return 0;
+
+	kobject_set_name(&powerop_driver->kobj, "param");
+	powerop_driver->kobj.ktype = &ktype_powerop_driver;
+	powerop_driver->kobj.kset = &powerop_subsys.kset;
+
+	if ((error = kobject_register(&powerop_driver->kobj))) {
+		printk(KERN_ERR "kobject_register for PowerOP driver failed.\n");
+		return error;
+	}
+
+	for (i = 0; i < powerop_driver->nr_params; i++) {
+		if (! (param_attr[i] = 
+		       kmalloc(sizeof(struct powerop_param_attribute),
+			       GFP_KERNEL))) {
+			printk(KERN_ERR "PowerOP: kmalloc failed.\n");
+			return -ENOMEM;
+		}
+
+		memset(param_attr[i], 0, 
+		       sizeof(struct powerop_param_attribute));
+		param_attr[i]->index = i;
+		param_attr[i]->attr.name = 
+			powerop_driver->param_names[i];
+		param_attr[i]->attr.mode = 0644;
+		if ((error = sysfs_create_file(&powerop_driver->kobj,
+					       &param_attr[i]->attr))) {
+			printk(KERN_ERR "sysfs_create_file for PowerOP param failed.\n");
+			return error;
+		}
+	}
+
+        return error;
+}
+
+static int __init powerop_sysfs_init(void)
+{
+        int error;
+
+	if ((error = subsystem_register(&powerop_subsys)))
+		printk(KERN_ERR "PowerOP subsystem register failed.\n");
+	else
+		powerop_subsys_init = 1;
+	
+
+	if (! error && powerop_driver)
+		error = powerop_driver_sysfs_register(powerop_driver);
+
+
+        return error;
+}
+
+static void powerop_driver_sysfs_unregister(struct powerop_driver *powerop_driver)
+{
+	int i;
+
+	if (! powerop_subsys_init || ! powerop_driver)
+		return;
+
+	for (i = 0; i < powerop_driver->nr_params; i++) {
+		if (param_attr[i]) {
+			sysfs_remove_file(&powerop_driver->kobj,
+					  &param_attr[i]->attr);
+			kfree(param_attr[i]);
+			param_attr[i] = 0;
+		}
+	}
+
+	kobject_unregister(&powerop_driver->kobj);
+}
+
+static void __exit powerop_sysfs_exit(void)
+{
+	powerop_driver_sysfs_unregister(powerop_driver);
+
+	if (powerop_subsys_init)
+		subsystem_unregister(&powerop_subsys);
+}
+
+#else /* CONFIG_POWEROP_SYSFS */
+static int powerop_driver_sysfs_init(struct powerop_driver *powerop_driver)
+{
+	return 0;
+}
+
+static int __init powerop_sysfs_init(void)
+{
+	return 0;
+}
+
+static void powerop_driver_sysfs_unregister(struct powerop_driver *powerop_driver)
+{
+}
+
+static void __exit powerop_sysfs_exit(void)
+{
+}
+#endif /* CONFIG_POWEROP_SYSFS */
+
+int powerop_driver_register(struct powerop_driver *p)
+{
+	int error;
+
+	if (! powerop_driver) {
+		printk(KERN_INFO "PowerOP registering driver %s.\n", p->name);
+		if ((error = powerop_driver_sysfs_register(p)))
+			powerop_driver_sysfs_unregister(p);
+		else
+			powerop_driver = p;
+
+	} else
+		error = -EBUSY;
+
+	return error;
+}
+
+
+void powerop_driver_unregister(struct powerop_driver *p)
+{
+	if (powerop_driver == p) {
+		printk(KERN_INFO "PowerOP unregistering driver %s.\n", p->name);
+		powerop_driver_sysfs_unregister(powerop_driver);
+		powerop_driver = NULL;
+	}
+}
+
+EXPORT_SYMBOL_GPL(powerop_driver_register);
+EXPORT_SYMBOL_GPL(powerop_driver_unregister);
+
+static int __init powerop_init(void)
+{
+        return powerop_sysfs_init();
+}
+
+static void __exit powerop_exit(void)
+{
+	powerop_sysfs_exit();
+}
+
+module_init(powerop_init);
+module_exit(powerop_exit);
+
+MODULE_DESCRIPTION("PowerOP Power Management");
+MODULE_LICENSE("GPL");
Index: linux-2.6.12/include/linux/powerop.h
===================================================================
--- linux-2.6.12.orig/include/linux/powerop.h	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.12/include/linux/powerop.h	2005-08-03 01:10:55.000000000 +0000
@@ -0,0 +1,36 @@
+/*
+ * PowerOP core definitions
+ *
+ * Author: Todd Poynor <tpoynor@mvista.com>
+ *
+ * 2005 (c) MontaVista Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+
+#ifndef __POWEROP_H__
+#define __POWEROP_H__
+
+#include <linux/kobject.h>
+#include <asm/powerop.h>
+
+struct powerop_point {
+	int param[POWEROP_DRIVER_MAX_PARAMS];
+};
+
+struct powerop_driver {
+	char *name;
+	struct kobject kobj;
+	int nr_params;
+	char **param_names;
+	int (*set_point)(struct powerop_point *point);
+	int (*get_point)(struct powerop_point *point);
+};
+
+int powerop_set_point(struct powerop_point *point);
+int powerop_get_point(struct powerop_point *point);
+int powerop_driver_register(struct powerop_driver *p);
+void powerop_driver_unregister(struct powerop_driver *p);
+
+#endif /*__POWEROP_H__*/

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: PowerOP 1/3: PowerOP core
  2005-08-09  2:51 PowerOP 1/3: PowerOP core Todd Poynor
@ 2005-08-09  7:32 ` Greg KH
  2005-08-09 16:07 ` [linux-pm] " Geoff Levand
  1 sibling, 0 replies; 8+ messages in thread
From: Greg KH @ 2005-08-09  7:32 UTC (permalink / raw)
  To: Todd Poynor; +Cc: linux-kernel, linux-pm, cpufreq

On Mon, Aug 08, 2005 at 07:51:57PM -0700, Todd Poynor wrote:
> +static void powerop_kobj_release(struct kobject *kobj)
> +{
> +	return;
> +}

Hint, if your release function is just a noop like this, your code is
wrong.  The kernel requires you to have a release function for a reason.
Please fix it.

> +struct powerop_param_attribute {
> +	int index;
> +        struct attribute        attr;
> +};

space vs. tab issue.

> +static ssize_t
> +powerop_param_attr_show(struct kobject * kobj, struct attribute * attr,
> +			char * buf)
> +{
> +	struct powerop_param_attribute * param_attr = to_param_attr(attr);
> +	struct powerop_point point;
> +	ssize_t ret = 0;
> +
> +	if ((ret = powerop_get_point(&point)) == 0)
> +		ret = sprintf(buf, "%d\n", point.param[param_attr->index]);

Please break this up into 3 lines instead of 2 to make it easier to read
and maintain over time.

You do this in other places too, please fix them too.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [linux-pm] PowerOP 1/3: PowerOP core
  2005-08-09  2:51 PowerOP 1/3: PowerOP core Todd Poynor
  2005-08-09  7:32 ` Greg KH
@ 2005-08-09 16:07 ` Geoff Levand
  2005-08-10  0:33   ` Todd Poynor
  1 sibling, 1 reply; 8+ messages in thread
From: Geoff Levand @ 2005-08-09 16:07 UTC (permalink / raw)
  To: Todd Poynor; +Cc: linux-kernel, linux-pm, cpufreq

Todd Poynor wrote:
...
> ===================================================================
> --- linux-2.6.12.orig/include/linux/powerop.h	1970-01-01
> 00:00:00.000000000 +0000
> +++ linux-2.6.12/include/linux/powerop.h	2005-08-03
> 01:10:55.000000000 +0000
> @@ -0,0 +1,36 @@
> +/*
> + * PowerOP core definitions
> + *
> + * Author: Todd Poynor <tpoynor@mvista.com>
> + *
> + * 2005 (c) MontaVista Software, Inc. This file is licensed under
> + * the terms of the GNU General Public License version 2. This program
> + * is licensed "as is" without any warranty of any kind, whether
> express
> + * or implied.
> + */
> +
> +#ifndef __POWEROP_H__
> +#define __POWEROP_H__
> +
> +#include <linux/kobject.h>
> +#include <asm/powerop.h>
> +
> +struct powerop_point {
> +	int param[POWEROP_DRIVER_MAX_PARAMS];
> +};

I'm wondering if anything could be gained by having the whole 
struct powerop_point defined in asm/powerop.h, and treat it as an 
opaque structure at this level.  That way, things other than just 
ints could be passed between the policy manager and the backend, 
although I guess that breaks the beauty of the simplicity and would 
complicate the sys-fs interface, etc.  I'm interested to hear your 
comments.

Another point is that a policy manager would need to poll the system 
and/or get events and then act.  Your powerop work here only provides 
a (one way) piece of the final action.  Any comments regarding a more 
general interface?

-Geoff

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [linux-pm] PowerOP 1/3: PowerOP core
  2005-08-09 16:07 ` [linux-pm] " Geoff Levand
@ 2005-08-10  0:33   ` Todd Poynor
  2005-08-10 13:58     ` Daniel Petrini
  2005-08-12 16:23     ` david-b
  0 siblings, 2 replies; 8+ messages in thread
From: Todd Poynor @ 2005-08-10  0:33 UTC (permalink / raw)
  To: Geoff Levand; +Cc: linux-kernel, linux-pm, cpufreq

Geoff Levand wrote:

> I'm wondering if anything could be gained by having the whole 
> struct powerop_point defined in asm/powerop.h, and treat it as an 
> opaque structure at this level.  That way, things other than just 
> ints could be passed between the policy manager and the backend, 
> although I guess that breaks the beauty of the simplicity and would 
> complicate the sys-fs interface, etc.  I'm interested to hear your 
> comments.

Making the "operating point" data structure entirely platform-specific 
should be OK.  There's a little value to having generic pieces handle 
some common chores (such as the sysfs interfaces), but even for integers 
decimal vs. hex formatting is nicer depending on the type of value. 
Since most values that have been managed using similar interfaces thus 
far have been flags, register values, voltages, etc. using integers has 
worked well and nicely simplified the platform backend, but if there's a 
need for other data types then should be doable.

> Another point is that a policy manager would need to poll the system 
> and/or get events and then act.  Your powerop work here only provides 
> a (one way) piece of the final action.  Any comments regarding a more 
> general interface?

What's discussed here is probably the bottommost layer of a power 
management software stack: to read and write the platform-specific 
system power parameters, optionally arranged into a mutually-consistent 
set called an "operating point".  Power policy management is a large, 
thorny topic that I wasn't trying to tackle now.

So far as kernel-to-userspace event notification goes (assuming the 
power policy manager is in userspace, which is certainly where I'd 
recommend), ACPI has a procfs-based communication channel but the 
kobject_uevent stuff looks like the way I'd go, and it's somewhere on my 
list to come up with a patch that does that as well.

If these general ideas of arbitrary platform power parameters and 
operating points are deemed worthy of continued consideration, I'll 
propose what I view is the next step: interfaces to create and activate 
operating points from userspace.

At that point it should be possible to write power policy management 
applications for systems that can benefit from this generalized notion 
of operating points: create the operating points that match the system 
usage models (in the case of many embedded systems, the system is some 
mode with different power/performance characteristics such as audio 
playback vs. mobile phone call in progress) and power needs (e.g., low 
battery strength vs. high strength) and activate operating points based 
on events received (new app running, low battery warning, etc.).

Any opinions on all that?  Thanks,

-- 
Todd

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [linux-pm] PowerOP 1/3: PowerOP core
  2005-08-10  0:33   ` Todd Poynor
@ 2005-08-10 13:58     ` Daniel Petrini
  2005-08-11  0:25       ` Todd Poynor
  2005-08-12 16:23     ` david-b
  1 sibling, 1 reply; 8+ messages in thread
From: Daniel Petrini @ 2005-08-10 13:58 UTC (permalink / raw)
  To: Todd Poynor; +Cc: Geoff Levand, cpufreq, linux-pm, linux-kernel

> If these general ideas of arbitrary platform power parameters and
> operating points are deemed worthy of continued consideration, I'll
> propose what I view is the next step: interfaces to create and activate
> operating points from userspace.
> 
> At that point it should be possible to write power policy management
> applications for systems that can benefit from this generalized notion
> of operating points: create the operating points that match the system
> usage models (in the case of many embedded systems, the system is some
> mode with different power/performance characteristics such as audio
> playback vs. mobile phone call in progress) and power needs (e.g., low
> battery strength vs. high strength) and activate operating points based
> on events received (new app running, low battery warning, etc.).
> 
> Any opinions on all that?  Thanks,
> 
> --
> Todd

Hi,

I'd like to have an idea of how the powerop would evolve to address:

a) exporting all operating points to sysfs - that would be useful for
a policy manager in user space, or the user policy will already be
aware of the ops?

b) Constraints: if I would like to change to a op and such a
transition is not allowed in hardware, what part of the software will
test it? The set/get powerop functions, the higher layers or even some
lower layer (don't know if expected) ?

thanks,

Daniel
-- 
10LE - Linux
INdT - Manaus - Brazil

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [linux-pm] PowerOP 1/3: PowerOP core
  2005-08-10 13:58     ` Daniel Petrini
@ 2005-08-11  0:25       ` Todd Poynor
  0 siblings, 0 replies; 8+ messages in thread
From: Todd Poynor @ 2005-08-11  0:25 UTC (permalink / raw)
  To: Daniel Petrini; +Cc: Geoff Levand, cpufreq, linux-pm, linux-kernel

Daniel Petrini wrote:

> I'd like to have an idea of how the powerop would evolve to address:
> 
> a) exporting all operating points to sysfs - that would be useful for
> a policy manager in user space, or the user policy will already be
> aware of the ops?

For different usage models I'd expect to see both situations.  In one 
situation (classic desktop/server), a canned set of operating points may 
bee preconfigured (for example, the cpufreq support for Centrino), and 
automated software or an interactive user knows how to select an 
appropriate point (like the existing cpufreq algorithms such as "choose 
the lowest (powersave) point").  In the other situation (classic 
embedded), a system designer has chosen a number of useful operating 
points and configures software to choose an appropriate point based on 
system state ("the MPEG4 video app is running"), and that software knows 
what points are available and in what situations the points should apply.

> b) Constraints: if I would like to change to a op and such a
> transition is not allowed in hardware, what part of the software will
> test it? The set/get powerop functions, the higher layers or even some
> lower layer (don't know if expected) ?

Any sort of policy on what operating points should be allowed is 
targeted for a higher layer than PowerOP.  As you mention, there are 
situations where device needs constrain the operating points that can be 
set (for example, a PXA27x has modes that disable PLLs and/or run clocks 
at low speeds such that certain devices, if powered on, will wedge). 
Intelligence on what to do about that situation, if anything, can be 
placed in the high-layer power policy management application (this is 
probably the best answer), and/or the mid-layer power management 
framework code can also attempt to enforce these rules.

Stepping up on my soapbox for a moment, it is always recommended to have 
the power policy management application be aware of what the constraints 
are on operating points and set an appropriate power policy based on 
that information.  This may entail sending event messages from the 
drivers to the power policy manager app, to coordinate changes in device 
state with changes in policy.  If the constraints change very rapidly 
then it may make sense to preconfigure the rules on which operating 
point to choose in the kernel to avoid userspace interactions (and this 
is what the DPM device constraints feature is intended to do).  Relying 
on the power management mid-layer to block attempts to set an 
incompatible operating point is not recommended, but can function 
reasonably well if the mid layer is smart enough to know what the next 
best choice is and set that operating point instead.

-- 
Todd

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [linux-pm] PowerOP 1/3: PowerOP core
  2005-08-10  0:33   ` Todd Poynor
  2005-08-10 13:58     ` Daniel Petrini
@ 2005-08-12 16:23     ` david-b
  2005-08-13  1:06       ` Todd Poynor
  1 sibling, 1 reply; 8+ messages in thread
From: david-b @ 2005-08-12 16:23 UTC (permalink / raw)
  To: tpoynor, geoffrey.levand; +Cc: linux-pm, linux-kernel, cpufreq

How well would _this_ notion of an operating point scale up?

I have this feeling that it's maybe better attuned to "scale down"
sorts of problems (maybe cell phones) than to a big NUMA box.  I can
see how a batch scheduled server might want to fire up only enough
components to run the next simulation, but I wonder if maybe systems
dynamically managing lots of resources might not be better off with
some other model ... where userspace makes higher level decisions,
and the kernel is adaptive within a potentially big solution space.
(Likewise, maybe some of the smaller systems would too.)

Also, I suspect everyone would be happier if cpufreq were left
responsible for the CPU-specific parts of an operating point.
That could mean system-specific hooks, e.g. to rule out certain
voltages based on available power or what devices are running.

Unfortunately, examples of non-CPUfreq part of an operating point
may be tricky to come by on desktop systems.

> Date: Tue, 09 Aug 2005 17:33:44 -0700
> From: Todd Poynor <tpoynor@mvista.com>
>
> Geoff Levand wrote:
>
> > I'm wondering if anything could be gained by having the whole 
> > struct powerop_point defined in asm/powerop.h, and treat it as an 
> > opaque structure at this level.  That way, things other than just 
> > ints could be passed between the policy manager and the backend, 
> > although I guess that breaks the beauty of the simplicity and would 
> > complicate the sys-fs interface, etc.  I'm interested to hear your 
> > comments.
>
> Making the "operating point" data structure entirely platform-specific 
> should be OK.  There's a little value to having generic pieces handle 
> some common chores (such as the sysfs interfaces), but even for integers 
> decimal vs. hex formatting is nicer depending on the type of value. 

Taking a more extreme position or two:

  - Why have any parsing at all?  It's opaque data; so insist that
    the kernel just get raw bytes.  That's the traditional solution,
    not having the kernel parse arrays of integers.

  - Why try to standardize a data-based abstraction at all?  Surely
    it'd be easier to use modprobe, and have it register operating
    points that include not just the data, but its interpretation.

  - If those numbers are needed, having single-valued sysfs attributes
    (maybe /sys/power/runstate/policy_name/XXX) would be preferable
    to relying on getting position right within a multivalued input.

What I've had in mind is that "modprobe" would register some code
implementing one or more named runtime policies.  Now one way to use
such code would be to equate "policy" and "operating point"; a static
mapping, as I think I see Todd suggesting, but compiled.  (Or maybe
tuned using individual sysfs attributes.)

But another way would be to have that code pick some operating point
that matches various constraints fed in from userspace ... maybe from
sysfs attribute files managed by that code.  It could use all sorts
of kernel APIs while choosing the point, and would clearly need to
use them in order to actually _set_ some new point.

It's easier for me to see how "echo policy_name > /sys/power/runtime"
would scale up and down (given pluggable policy_name components!)
than "echo 0 35 98 141 66 -3 0x7efc0 > /sys/power/foo" would.

> Since most values that have been managed using similar interfaces thus 
> far have been flags, register values, voltages, etc. using integers has 
> worked well and nicely simplified the platform backend, but if there's a 
> need for other data types then should be doable.

Sysfs already has all that infrastructure, if you adopt the policy
that such system-specific constraints show up in system-specific
attributes somewhere ... matching the system-specific knowledge that's
used to interpret them.  That'd also be less error prone than "whoops,
there wasn't supposed to be a space between 35 and 98" or "darn, I
switched the 141 and 66 around again".

- Dave

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [linux-pm] PowerOP 1/3: PowerOP core
  2005-08-12 16:23     ` david-b
@ 2005-08-13  1:06       ` Todd Poynor
  0 siblings, 0 replies; 8+ messages in thread
From: Todd Poynor @ 2005-08-13  1:06 UTC (permalink / raw)
  To: david-b; +Cc: geoffrey.levand, linux-pm, linux-kernel, cpufreq

david-b@pacbell.net wrote:
> How well would _this_ notion of an operating point scale up?
> 
> I have this feeling that it's maybe better attuned to "scale down"
> sorts of problems (maybe cell phones) than to a big NUMA box.  I can
> see how a batch scheduled server might want to fire up only enough
> components to run the next simulation, but I wonder if maybe systems
> dynamically managing lots of resources might not be better off with
> some other model ... where userspace makes higher level decisions,
> and the kernel is adaptive within a potentially big solution space.
> (Likewise, maybe some of the smaller systems would too.)

If I understand correctly, that does seem to describe how such systems 
are used today: out of a potentially large number of choices (and some 
embedded SOCs do have a good variety of clocking, core voltage, and 
power domain choices), configure only those useful for the problem at 
hand into the kernel and then userspace gives marching orders to the 
kernel to activate whatever's appropriate according to system-specific 
logic in userspace.

>   - Why have any parsing at all?  It's opaque data; so insist that
>     the kernel just get raw bytes.  That's the traditional solution,
>     not having the kernel parse arrays of integers.
> 
>   - Why try to standardize a data-based abstraction at all?  Surely
>     it'd be easier to use modprobe, and have it register operating
>     points that include not just the data, but its interpretation.

Configuring the definitions of desired operating points (and updating 
these on-the-fly) from userspace has its advantages, but inserting into 
the kernel as a module should work fine and avoids some awkward 
userspace interfaces.  In the current code it is intended that both 
in-kernel interfaces (without parsing) and userspace interfaces are 
available, but I think non-diagnostic userspace interfaces could easily 
be dropped.  One of the nice things about having it done from userspace 
is that apps can be in charge of configuring operating points and 
activating those operating points in response to changes in system state 
(with reduced chance of mismatched app + module).  And since it can be 
done in userspace it's nice to simplify the kernel that way, which some 
folks feel strongly about.  But not a big deal to me.

Standard data structures have small benefits for implementing some code 
once instead of per-platform.  Another possibility in addition to the 
configuration or diagnostic interfaces is the concept of "constraints" 
on operating points according to device needs that you and others have 
alluded to: the constraints can be expressed in a form that can be 
checked by platform-independent code ("check the value at this power 
parameter index in the array for this range of integer values").  Also 
not a big deal.

All in all, it sounds like the next version of this should not have a 
userspace configuration interface and should not provide a 
platform-independent structure for the operating points.  The operating 
point get and set functions are in machine-specific code and take an 
operating point argument defined in header files shared between the 
machine-specific code and whatever kernel code is creating and 
activating operating points.  Any diagnostic interfaces need to be 
machine-dependent as well.

Now that the operating points are created without a generic layer or 
structure, if one wants a generic interface to set (activate) one of 
those operating points, probably identified by name, from userspace then 
a little extra is needed.  Can worry about that later.

>   - If those numbers are needed, having single-valued sysfs attributes
>     (maybe /sys/power/runstate/policy_name/XXX) would be preferable
>     to relying on getting position right within a multivalued input.

In case it helps, the code thus far and proposed interfaces use 
single-valued sysfs interfaces.  Since you also mentioned:

> It's easier for me to see how "echo policy_name > /sys/power/runtime"
> would scale up and down (given pluggable policy_name components!)
> than "echo 0 35 98 141 66 -3 0x7efc0 > /sys/power/foo" would.

and:

> That'd also be less error prone than "whoops,
> there wasn't supposed to be a space between 35 and 98" or "darn, I
> switched the 141 and 66 around again".

It may be worth pointing out that these interfaces wouldn't do that (a 
two level hierarchy of operating point name and single power parameter 
attribute would be used, and the ordering into the array is handled by 
the generic PowerOP core, which knows how to associate parameter names 
with array indices).  Older versions of DPM did use interfaces similar 
to what you describe, in case you've got that in mind.  They weren't 
intended to be used interactively.

Thanks,

-- 
Todd

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2005-08-13  1:06 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-08-09  2:51 PowerOP 1/3: PowerOP core Todd Poynor
2005-08-09  7:32 ` Greg KH
2005-08-09 16:07 ` [linux-pm] " Geoff Levand
2005-08-10  0:33   ` Todd Poynor
2005-08-10 13:58     ` Daniel Petrini
2005-08-11  0:25       ` Todd Poynor
2005-08-12 16:23     ` david-b
2005-08-13  1:06       ` Todd Poynor

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox