public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [Patch 5/6] statistics infrastructure
@ 2006-05-24 12:33 Martin Peschke
  2006-05-24 22:57 ` Andrew Morton
  2006-05-25  8:05 ` Nikita Danilov
  0 siblings, 2 replies; 16+ messages in thread
From: Martin Peschke @ 2006-05-24 12:33 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel@vger.kernel.org

This patch adds statistics infrastructure as common code.

Signed-off-by: Martin Peschke <mp3@de.ibm.com>
---

 MAINTAINERS                |    7
 arch/s390/Kconfig          |    6
 arch/s390/oprofile/Kconfig |    5
 include/linux/statistic.h  |  348 ++++++++++
 lib/Kconfig.statistic      |   11
 lib/Makefile               |    2
 lib/statistic.c            | 1459 +++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 1833 insertions(+), 5 deletions(-)

diff -Nurp a/include/linux/statistic.h b/include/linux/statistic.h
--- a/include/linux/statistic.h	1970-01-01 01:00:00.000000000 +0100
+++ b/include/linux/statistic.h	2006-05-19 16:23:07.000000000 +0200
@@ -0,0 +1,348 @@
+/*
+ * include/linux/statistic.h
+ *
+ * Statistics facility
+ *
+ * (C) Copyright IBM Corp. 2005, 2006
+ *
+ * Author(s): Martin Peschke <mpeschke@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef STATISTIC_H
+#define STATISTIC_H
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/percpu.h>
+
+#define STATISTIC_ROOT_DIR	"statistics"
+
+#define STATISTIC_FILENAME_DATA	"data"
+#define STATISTIC_FILENAME_DEF	"definition"
+
+#define STATISTIC_NEED_BARRIER	1
+
+struct statistic;
+
+enum statistic_state {
+	STATISTIC_STATE_INVALID,
+	STATISTIC_STATE_UNCONFIGURED,
+	STATISTIC_STATE_RELEASED,
+	STATISTIC_STATE_OFF,
+	STATISTIC_STATE_ON
+};
+
+enum statistic_type {
+	STATISTIC_TYPE_COUNTER_INC,
+	STATISTIC_TYPE_COUNTER_PROD,
+	STATISTIC_TYPE_UTIL,
+	STATISTIC_TYPE_HISTOGRAM_LIN,
+	STATISTIC_TYPE_HISTOGRAM_LOG2,
+	STATISTIC_TYPE_SPARSE,
+	STATISTIC_TYPE_NONE
+};
+
+#define STATISTIC_FLAGS_NOINCR	0x01
+
+/**
+ * struct statistic_info - description of a class of statistics
+ * @name: pointer to name name string
+ * @x_unit: pointer to string describing unit of X of (X, Y) data pair
+ * @y_unit: pointer to string describing unit of Y of (X, Y) data pair
+ * @flags: only flag so far (distinction of incremental and other statistic)
+ * @defaults: pointer to string describing defaults setting for attributes
+ *
+ * Exploiters must setup an array of struct statistic_info for a
+ * corresponding array of struct statistic, which are then pointed to
+ * by struct statistic_interface.
+ *
+ * Struct statistic_info and all members and addressed strings must stay for
+ * the lifetime of corresponding statistics created with statistic_create().
+ *
+ * Except for the name string, all other members may be left blank.
+ * It would be nice of exploiters to fill it out completely, though.
+ */
+struct statistic_info {
+/* public: */
+	char *name;
+	char *x_unit;
+	char *y_unit;
+	int  flags;
+	char *defaults;
+};
+
+/**
+ * struct statistic_interface - collection of statistics for an entity
+ * @stat: a struct statistic array
+ * @info: a struct statistic_info array describing the struct statistic array
+ * @number: number of entries in both arrays
+ * @pull: an optional function called when user reads data from file
+ * @pull_private: optional data pointer passed to pull function
+ *
+ * Exploiters must setup a struct statistic_interface prior to calling
+ * statistic_create().
+ */
+struct statistic_interface {
+/* private: */
+	struct list_head	 list;
+	struct dentry		*debugfs_dir;
+	struct dentry		*data_file;
+	struct dentry		*def_file;
+/* public: */
+	struct statistic	*stat;
+	struct statistic_info	*info;
+	int			 number;
+	int			(*pull)(void*);
+	void			*pull_private;
+};
+
+struct sgrb_seg {
+	struct list_head list;
+	char *address;
+	int offset;
+	int size;
+};
+
+struct statistic_file_private {
+	struct list_head read_seg_lh;
+	struct list_head write_seg_lh;
+	size_t write_seg_total_size;
+};
+
+struct statistic_merge_private {
+	struct statistic *stat;
+	spinlock_t lock;
+	void *dst;
+};
+
+/**
+ * struct statistic_discipline - description of a data processing mode
+ * @parse: parses additional attributes specific to this mode (if any)
+ * @alloc: allocates a data area (mandatory, default routine available)
+ * @free: frees a data area (optional, kfree() is used otherwise)
+ * @reset: discards content of a data area (mandatory)
+ * @merge: merges content of a data area into another data area (mandatory)
+ * @fdata: prints content of a data area into buffer (mandatory)
+ * @fdef: prints additional attributes specific to this mode (if any)
+ * @add: updates a data area for a statistic fed incremental data (mandatory)
+ * @set: updates a data area for a statistic fed total numbers (mandatory)
+ * @name: pointer to name string (mandatory)
+ * @size: base size for a data area (passed to alloc function)
+ *
+ * Struct statistic_discipline describes a statistic infrastructure internal
+ * programming interface. Another data processing mode can be added by
+ * implementing these routines and appending an entry to the
+ * statistic_discs array.
+ *
+ * "Data area" in te above description usually means a chunk of memory,
+ * may it be allocated for data gathering per CPU, or be shared by all
+ * CPUs, or used for other purposes, like merging per-CPU data when
+ * users read data from files. Implementers of data processing modes
+ * don't need to worry about the designation of a particular chunk of memory.
+ * A data area of a data processing mode always has to look the same.
+ */
+struct statistic_discipline {
+	int (*parse)(struct statistic *, struct statistic_info *, int, char *);
+	void* (*alloc)(struct statistic *, size_t, gfp_t, int);
+	void (*free)(struct statistic *, void *);
+	void (*reset)(struct statistic *, void *);
+	void (*merge)(struct statistic *, void *, void*);
+	int (*fdata)(struct statistic *, const char *,
+		     struct statistic_file_private *, void *);
+	int (*fdef)(struct statistic *, char *);
+	void (*add)(struct statistic *, int, s64, u64);
+	void (*set)(struct statistic *, s64, u64);
+	char *name;
+	size_t size;
+};
+
+struct statistic_entry_util {
+	u32 res;
+	u32 num;	/* FIXME: better 64 bit; do_div can't deal with it) */
+	s64 acc;
+	s64 min;
+	s64 max;
+};
+
+struct statistic_entry_sparse {
+	struct list_head list;
+	s64 value;
+	u64 hits;
+};
+
+struct statistic_sparse_list {
+	struct list_head entry_lh;
+	u32 entries;
+	u32 entries_max;
+	u64 hits_missed;
+};
+
+/**
+ * struct statistic - any data required for gathering data for a statistic
+ */
+struct statistic {
+/* private: */
+	enum statistic_state	 state;
+	enum statistic_type	 type;
+	struct percpu_data	*pdata;
+	void			(*add)(struct statistic *, int, s64, u64);
+	u64			 started;
+	u64			 stopped;
+	u64			 age;
+	union {
+		struct {
+			s64 range_min;
+			u32 last_index;
+			u32 base_interval;
+		} histogram;
+		struct {
+			u32 entries_max;
+		} sparse;
+	} u;
+};
+
+#ifdef CONFIG_STATISTICS
+
+extern int statistic_create(struct statistic_interface *, const char *);
+extern int statistic_remove(struct statistic_interface *);
+
+/**
+ * statistic_add - update statistic with incremental data in (X, Y) pair
+ * @stat: struct statistic array
+ * @i: index of statistic to be updated
+ * @value: X
+ * @incr: Y
+ *
+ * The actual processing of the (X, Y) data pair is determined by the current
+ * the definition applied to the statistic. See Documentation/statistics.txt.
+ *
+ * This variant takes care of protecting per-cpu data. It is preferred whenever
+ * exploiters don't update several statistics of the same entity in one go.
+ */
+static inline void statistic_add(struct statistic *stat, int i,
+				 s64 value, u64 incr)
+{
+	unsigned long flags;
+	local_irq_save(flags);
+	if (stat[i].state == STATISTIC_STATE_ON)
+		stat[i].add(&stat[i], smp_processor_id(), value, incr);
+	local_irq_restore(flags);
+}
+
+/**
+ * statistic_add_nolock - update statistic with incremental data in (X, Y) pair
+ * @stat: struct statistic array
+ * @i: index of statistic to be updated
+ * @value: X
+ * @incr: Y
+ *
+ * The actual processing of the (X, Y) data pair is determined by the current
+ * definition applied to the statistic. See Documentation/statistics.txt.
+ *
+ * This variant leaves protecting per-cpu data to exploiters. It is preferred
+ * whenever exploiters update several statistics of the same entity in one go.
+ */
+static inline void statistic_add_nolock(struct statistic *stat, int i,
+					s64 value, u64 incr)
+{
+	if (stat[i].state == STATISTIC_STATE_ON)
+		stat[i].add(&stat[i], smp_processor_id(), value, incr);
+}
+
+/**
+ * statistic_inc - update statistic with incremental data in (X, 1) pair
+ * @stat: struct statistic array
+ * @i: index of statistic to be updated
+ * @value: X
+ *
+ * The actual processing of the (X, Y) data pair is determined by the current
+ * definition applied to the statistic. See Documentation/statistics.txt.
+ *
+ * This variant takes care of protecting per-cpu data. It is preferred whenever
+ * exploiters don't update several statistics of the same entity in one go.
+ */
+static inline void statistic_inc(struct statistic *stat, int i, s64 value)
+{
+	unsigned long flags;
+	local_irq_save(flags);
+	if (stat[i].state == STATISTIC_STATE_ON)
+		stat[i].add(&stat[i], smp_processor_id(), value, 1);
+	local_irq_restore(flags);
+}
+
+/**
+ * statistic_inc_nolock - update statistic with incremental data in (X, 1) pair
+ * @stat: struct statistic array
+ * @i: index of statistic to be updated
+ * @value: X
+ *
+ * The actual processing of the (X, Y) data pair is determined by the current
+ * definition applied to the statistic. See Documentation/statistics.txt.
+ *
+ * This variant leaves protecting per-cpu data to exploiters. It is preferred
+ * whenever exploiters update several statistics of the same entity in one go.
+ */
+static inline void statistic_inc_nolock(struct statistic *stat, int i,
+					s64 value)
+{
+	if (stat[i].state == STATISTIC_STATE_ON)
+		stat[i].add(&stat[i], smp_processor_id(), value, 1);
+}
+
+extern void statistic_set(struct statistic *, int, s64, u64);
+
+#else /* CONFIG_STATISTICS */
+
+static inline int statistic_create(struct statistic_interface *interface,
+				   const char *name)
+{
+	return 0;
+}
+
+static inline int statistic_remove(
+				struct statistic_interface *interface_ptr)
+{
+	return 0;
+}
+
+static inline void statistic_add(struct statistic *stat, int i,
+				 s64 value, u64 incr)
+{
+}
+
+static inline void statistic_add_nolock(struct statistic *stat, int i,
+					s64 value, u64 incr)
+{
+}
+
+static inline void statistic_inc(struct statistic *stat, int i, s64 value)
+{
+}
+
+static inline void statistic_inc_nolock(struct statistic *stat, int i,
+					s64 value)
+{
+}
+
+static inline void statistic_set(struct statistic *stat, int i,
+				 s64 value, u64 total)
+{
+}
+
+#endif /* CONFIG_STATISTICS */
+
+#endif /* STATISTIC_H */
diff -Nurp a/lib/statistic.c b/lib/statistic.c
--- a/lib/statistic.c	1970-01-01 01:00:00.000000000 +0100
+++ b/lib/statistic.c	2006-05-19 16:22:55.000000000 +0200
@@ -0,0 +1,1459 @@
+/*
+ *  lib/statistic.c
+ *    statistics facility
+ *
+ *    Copyright (C) 2005, 2006
+ *		IBM Deutschland Entwicklung GmbH,
+ *		IBM Corporation
+ *
+ *    Author(s): Martin Peschke (mpeschke@de.ibm.com),
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *    another bunch of ideas being pondered:
+ *	- define a set of agreed names or a naming scheme for
+ *	  consistency and comparability across exploiters;
+ *	  this entails an agreement about granularities
+ *	  as well (e.g. separate statistic for read/write/no-data commands);
+ *	  a common set of unit strings would be nice then, too, of course
+ *	  (e.g. "seconds", "milliseconds", "microseconds", ...)
+ *	- perf. opt. of array: table lookup of values, binary search for values
+ *	- another statistic disclipline based on some sort of tree, but
+ *	  similar in semantics to list discipline (for high-perf. histograms of
+ *	  discrete values)
+ *	- allow for more than a single "view" on data at the same time by
+ *	  providing the capability to attach several (a list of) "definitions"
+ *	  to a struct statistic
+ *	  (e.g. show histogram of requests sizes and history of megabytes/sec.
+ *	  at the same time)
+ *	- multi-dimensional statistic (combination of two or more
+ *	  characteristics/discriminators); worth the effort??
+ *	  (e.g. a matrix of occurences for latencies of requests of
+ *	  particular sizes)
+ *
+ *	FIXME:
+ *	- statistics file access when statistics are being removed
+ */
+
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/parser.h>
+#include <linux/time.h>
+#include <linux/sched.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+#include <linux/statistic.h>
+
+#include <asm/bug.h>
+#include <asm/uaccess.h>
+
+static struct statistic_discipline statistic_discs[];
+
+static inline int statistic_initialise(struct statistic *stat)
+{
+	stat->type = STATISTIC_TYPE_NONE;
+	stat->state = STATISTIC_STATE_UNCONFIGURED;
+	return 0;
+}
+
+static inline int statistic_uninitialise(struct statistic *stat)
+{
+	stat->state = STATISTIC_STATE_INVALID;
+	return 0;
+}
+
+static inline int statistic_define(struct statistic *stat)
+{
+	if (stat->type == STATISTIC_TYPE_NONE)
+		return -EINVAL;
+	stat->state = STATISTIC_STATE_RELEASED;
+	return 0;
+}
+
+static inline void statistic_reset_ptr(struct statistic *stat, void *ptr)
+{
+	struct statistic_discipline *disc = &statistic_discs[stat->type];
+	if (ptr)
+		disc->reset(stat, ptr);
+}
+
+static inline void statistic_move_ptr(struct statistic *stat, void *src)
+{
+	struct statistic_discipline *disc = &statistic_discs[stat->type];
+	unsigned long flags;
+	local_irq_save(flags);
+	disc->merge(stat, stat->pdata->ptrs[smp_processor_id()], src);
+	local_irq_restore(flags);
+}
+
+static inline void statistic_free_ptr(struct statistic *stat, void *ptr)
+{
+	struct statistic_discipline *disc = &statistic_discs[stat->type];
+	if (ptr) {
+		if (unlikely(disc->free))
+			disc->free(stat, ptr);
+		kfree(ptr);
+	}
+}
+
+static int statistic_free(struct statistic *stat, struct statistic_info *info)
+{
+	int cpu;
+	stat->state = STATISTIC_STATE_RELEASED;
+	if (unlikely(info->flags & STATISTIC_FLAGS_NOINCR)) {
+		statistic_free_ptr(stat, stat->pdata);
+		stat->pdata = NULL;
+		return 0;
+	}
+	for_each_cpu(cpu) {
+		statistic_free_ptr(stat, stat->pdata->ptrs[cpu]);
+		stat->pdata->ptrs[cpu] = NULL;
+	}
+	kfree(stat->pdata);
+	stat->pdata = NULL;
+	return 0;
+}
+
+static void * statistic_alloc_generic(struct statistic *stat, size_t size,
+				      gfp_t flags, int node)
+{
+	return kmalloc_node(size, flags, node);
+}
+
+static void * statistic_alloc_ptr(struct statistic *stat, gfp_t flags, int node)
+{
+	struct statistic_discipline *disc = &statistic_discs[stat->type];
+	void *buf = disc->alloc(stat, disc->size, flags, node);
+	if (likely(buf))
+		statistic_reset_ptr(stat, buf);
+	return buf;
+}
+
+static int statistic_alloc(struct statistic *stat,
+			   struct statistic_info *info)
+{
+	int cpu;
+	stat->age = sched_clock();
+	if (unlikely(info->flags & STATISTIC_FLAGS_NOINCR)) {
+		stat->pdata = statistic_alloc_ptr(stat, GFP_KERNEL, -1);
+		if (unlikely(!stat->pdata))
+			return -ENOMEM;
+		stat->state = STATISTIC_STATE_OFF;
+		return 0;
+	}
+	stat->pdata = kzalloc(sizeof(struct percpu_data), GFP_KERNEL);
+	if (unlikely(!stat->pdata))
+		return -ENOMEM;
+	for_each_online_cpu(cpu) {
+		stat->pdata->ptrs[cpu] = statistic_alloc_ptr(stat, GFP_KERNEL,
+							     cpu_to_node(cpu));
+		if (unlikely(!stat->pdata->ptrs[cpu])) {
+			statistic_free(stat, info);
+			return -ENOMEM;
+		}
+	}
+	stat->state = STATISTIC_STATE_OFF;
+	return 0;
+}
+
+static inline int statistic_start(struct statistic *stat)
+{
+	stat->started = sched_clock();
+	stat->state = STATISTIC_STATE_ON;
+	return 0;
+}
+
+static void _statistic_barrier(void *unused)
+{
+}
+
+static inline int statistic_stop(struct statistic *stat)
+{
+	stat->stopped = sched_clock();
+	stat->state = STATISTIC_STATE_OFF;
+	/* ensures that all CPUs have ceased updating statistics */
+	smp_mb();
+	on_each_cpu(_statistic_barrier, NULL, 0, 1);
+	return 0;
+}
+
+static int statistic_transition(struct statistic *stat,
+				struct statistic_info *info,
+				enum statistic_state requested_state)
+{
+	int z = (requested_state < stat->state ? 1 : 0);
+	int retval = -EINVAL;
+
+	while (stat->state != requested_state) {
+		switch (stat->state) {
+		case STATISTIC_STATE_INVALID:
+			retval = ( z ? -EINVAL : statistic_initialise(stat) );
+			break;
+		case STATISTIC_STATE_UNCONFIGURED:
+			retval = ( z ? statistic_uninitialise(stat)
+				     : statistic_define(stat) );
+			break;
+		case STATISTIC_STATE_RELEASED:
+			retval = ( z ? statistic_initialise(stat)
+				     : statistic_alloc(stat, info) );
+			break;
+		case STATISTIC_STATE_OFF:
+			retval = ( z ? statistic_free(stat, info)
+				     : statistic_start(stat) );
+			break;
+		case STATISTIC_STATE_ON:
+			retval = ( z ? statistic_stop(stat) : -EINVAL );
+			break;
+		}
+		if (unlikely(retval))
+			return retval;
+	}
+	return 0;
+}
+
+static int statistic_reset(struct statistic *stat, struct statistic_info *info)
+{
+	enum statistic_state prev_state = stat->state;
+	int cpu;
+
+	if (unlikely(stat->state < STATISTIC_STATE_OFF))
+		return 0;
+	statistic_transition(stat, info, STATISTIC_STATE_OFF);
+	if (unlikely(info->flags & STATISTIC_FLAGS_NOINCR))
+		statistic_reset_ptr(stat, stat->pdata);
+	else
+		for_each_cpu(cpu)
+			statistic_reset_ptr(stat, stat->pdata->ptrs[cpu]);
+	stat->age = sched_clock();
+	statistic_transition(stat, info, prev_state);
+	return 0;
+}
+
+static void statistic_merge(void *__mpriv)
+{
+	struct statistic_merge_private *mpriv = __mpriv;
+	struct statistic *stat = mpriv->stat;
+	struct statistic_discipline *disc = &statistic_discs[stat->type];
+	spin_lock(&mpriv->lock);
+	disc->merge(stat, mpriv->dst, stat->pdata->ptrs[smp_processor_id()]);
+	spin_unlock(&mpriv->lock);
+}
+
+/**
+ * statistic_set - set statistic using total numbers in (X, Y) data pair
+ * @stat: struct statistic array
+ * @i: index of statistic to be updated
+ * @value: X
+ * @total: Y
+ *
+ * The actual processing of the (X, Y) data pair is determined by the current
+ * definition applied to the statistic. See Documentation/statistics.txt.
+ *
+ * There is no distinction between a concurrency protected and unprotected
+ * statistic_set() flavour needed. statistic_set() may only
+ * be called when we pull statistic updates from exploiters. The statistics
+ * infrastructure guarantees serialisation for that. Exploiters must not
+ * intermix statistic_set() and statistic_add/inc() anyway. That is why,
+ * concurrent updates won't happen and there is no additional protection
+ * required for statistics fed through statistic_set().
+ */
+void statistic_set(struct statistic *stat, int i, s64 value, u64 total)
+{
+	struct statistic_discipline *disc = &statistic_discs[stat[i].type];
+	if (stat[i].state == STATISTIC_STATE_ON)
+		disc->set(&stat[i], value, total);
+}
+
+static struct sgrb_seg * sgrb_seg_find(struct list_head *lh, int size)
+{
+	struct sgrb_seg *seg;
+
+	/* only the last buffer, if any, may have spare bytes */
+	list_for_each_entry_reverse(seg, lh, list) {
+		if (likely((PAGE_SIZE - seg->offset) >= size))
+			return seg;
+		break;
+	}
+	seg = kzalloc(sizeof(struct sgrb_seg), GFP_KERNEL);
+	if (unlikely(!seg))
+		return NULL;
+	seg->size = PAGE_SIZE;
+	seg->address = (void*)__get_free_page(GFP_KERNEL);
+	if (unlikely(!seg->address)) {
+		kfree(seg);
+		return NULL;
+	}
+	list_add_tail(&seg->list, lh);
+	return seg;
+}
+
+static void sgrb_seg_release_all(struct list_head *lh)
+{
+	struct sgrb_seg *seg, *tmp;
+
+	list_for_each_entry_safe(seg, tmp, lh, list) {
+		list_del(&seg->list);
+		free_page((unsigned long)seg->address);
+		kfree(seg);
+	}
+}
+
+static char * statistic_state_strings[] = {
+	"undefined(BUG)",
+	"unconfigured",
+	"released",
+	"off",
+	"on",
+};
+
+static int statistic_fdef(struct statistic_interface *interface, int i,
+			  struct statistic_file_private *private)
+{
+	struct statistic *stat = &interface->stat[i];
+	struct statistic_info *info = &interface->info[i];
+	struct statistic_discipline *disc = &statistic_discs[stat->type];
+	struct sgrb_seg *seg;
+	char t0[TIMESTAMP_SIZE], t1[TIMESTAMP_SIZE], t2[TIMESTAMP_SIZE];
+
+	seg = sgrb_seg_find(&private->read_seg_lh, 512);
+	if (unlikely(!seg))
+		return -ENOMEM;
+
+	seg->offset += sprintf(seg->address + seg->offset,
+			       "name=%s state=%s units=%s/%s",
+			       info->name, statistic_state_strings[stat->state],
+			       info->x_unit, info->y_unit);
+	if (stat->state == STATISTIC_STATE_UNCONFIGURED) {
+		seg->offset += sprintf(seg->address + seg->offset, "\n");
+		return 0;
+	}
+
+	seg->offset += sprintf(seg->address + seg->offset, " type=%s",
+			       disc->name);
+	if (disc->fdef)
+		seg->offset += disc->fdef(stat, seg->address + seg->offset);
+	if (stat->state == STATISTIC_STATE_RELEASED) {
+		seg->offset += sprintf(seg->address + seg->offset, "\n");
+		return 0;
+	}
+
+	nsec_to_timestamp(t0, stat->age);
+	nsec_to_timestamp(t1, stat->started);
+	nsec_to_timestamp(t2, stat->stopped);
+	seg->offset += sprintf(seg->address + seg->offset,
+			       " data=%s started=%s stopped=%s\n", t0, t1, t2);
+	return 0;
+}
+
+static inline int statistic_fdata(struct statistic_interface *interface, int i,
+				  struct statistic_file_private *fpriv)
+{
+	struct statistic *stat = &interface->stat[i];
+	struct statistic_info *info = &interface->info[i];
+	struct statistic_discipline *disc = &statistic_discs[stat->type];
+	struct statistic_merge_private mpriv;
+	int retval;
+
+	if (unlikely(stat->state < STATISTIC_STATE_OFF))
+		return 0;
+	if (unlikely(info->flags & STATISTIC_FLAGS_NOINCR))
+		return disc->fdata(stat, info->name, fpriv, stat->pdata);
+	mpriv.dst = statistic_alloc_ptr(stat, GFP_KERNEL, -1);
+	if (unlikely(!mpriv.dst))
+		return -ENOMEM;
+	spin_lock_init(&mpriv.lock);
+	mpriv.stat = stat;
+	on_each_cpu(statistic_merge, &mpriv, 0, 1);
+	retval = disc->fdata(stat, info->name, fpriv, mpriv.dst);
+	statistic_free_ptr(stat, mpriv.dst);
+	return retval;
+}
+
+/* cpu hotplug handling for per-cpu data */
+
+static inline int _statistic_hotcpu(struct statistic_interface *interface,
+				    int i, unsigned long action, int cpu)
+{
+	struct statistic *stat = &interface->stat[i];
+	struct statistic_info *info = &interface->info[i];
+
+	if (unlikely(info->flags & STATISTIC_FLAGS_NOINCR))
+		return 0;
+	if (stat->state < STATISTIC_STATE_OFF)
+		return 0;
+	switch (action) {
+	case CPU_UP_PREPARE:
+		stat->pdata->ptrs[cpu] = statistic_alloc_ptr(stat, GFP_ATOMIC,
+							     cpu_to_node(cpu));
+		break;
+	case CPU_UP_CANCELED:
+	case CPU_DEAD:
+		statistic_move_ptr(stat, stat->pdata->ptrs[cpu]);
+		statistic_free_ptr(stat, stat->pdata->ptrs[cpu]);
+		stat->pdata->ptrs[cpu] = NULL;
+		break;
+	}
+	return 0;
+}
+
+static struct list_head statistic_list;
+static struct mutex statistic_list_mutex;
+
+static int __cpuinit statistic_hotcpu(struct notifier_block *notifier,
+				      unsigned long action, void *__cpu)
+{
+	int cpu = (unsigned long)__cpu, i;
+	struct statistic_interface *interface;
+
+	mutex_lock(&statistic_list_mutex);
+	list_for_each_entry(interface, &statistic_list, list)
+		for (i = 0; i < interface->number; i++)
+			_statistic_hotcpu(interface, i, action, cpu);
+	mutex_unlock(&statistic_list_mutex);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block statistic_hotcpu_notifier =
+{
+	.notifier_call = statistic_hotcpu,
+};
+
+/* module startup / removal */
+
+static struct dentry *statistic_root_dir;
+
+int __init statistic_init(void)
+{
+	statistic_root_dir = debugfs_create_dir(STATISTIC_ROOT_DIR, NULL);
+	if (unlikely(!statistic_root_dir))
+		return -ENOMEM;
+	INIT_LIST_HEAD(&statistic_list);
+	mutex_init(&statistic_list_mutex);
+	register_cpu_notifier(&statistic_hotcpu_notifier);
+	return 0;
+}
+
+void __exit statistic_exit(void)
+{
+	unregister_cpu_notifier(&statistic_hotcpu_notifier);
+	debugfs_remove(statistic_root_dir);
+}
+
+/* parser used for configuring statistics */
+
+static int statistic_parse_single(struct statistic *stat,
+				  struct statistic_info *info,
+				  char *def, int type)
+{
+	struct statistic_discipline *disc = &statistic_discs[type];
+	int prev_state = stat->state, retval = 0;
+	char *copy;
+
+	if (disc->parse) {
+		copy = kstrdup(def, GFP_KERNEL);
+		if (unlikely(!copy))
+			return -ENOMEM;
+		retval = disc->parse(stat, info, type, copy);
+		kfree(copy);
+	} else if (type != stat->type)
+		statistic_transition(stat, info, STATISTIC_STATE_UNCONFIGURED);
+	if (!retval) {
+		stat->type = type;
+		stat->add = disc->add;
+	}
+	statistic_transition(stat, info,
+			     max(prev_state, STATISTIC_STATE_RELEASED));
+	return retval;
+}
+
+static match_table_t statistic_match_type = {
+	{1, "type=%s"},
+	{9, NULL}
+};
+
+static int statistic_parse_match(struct statistic *stat,
+				 struct statistic_info *info, char *def)
+{
+	int type, len;
+	char *p, *copy, *twisted;
+	substring_t args[MAX_OPT_ARGS];
+	struct statistic_discipline *disc;
+
+	if (!def)
+		def = info->defaults;
+	twisted = copy = kstrdup(def, GFP_KERNEL);
+	if (unlikely(!copy))
+		return -ENOMEM;
+	while ((p = strsep(&twisted, " ")) != NULL) {
+		if (!*p)
+			continue;
+		if (match_token(p, statistic_match_type, args) != 1)
+			continue;
+		len = (args[0].to - args[0].from) + 1;
+		for (type = 0; type < STATISTIC_TYPE_NONE; type++) {
+			disc = &statistic_discs[type];
+			if (unlikely(strncmp(disc->name, args[0].from, len)))
+				continue;
+			kfree(copy);
+			return statistic_parse_single(stat, info, def, type);
+		}
+	}
+	kfree(copy);
+	if (unlikely(stat->type == STATISTIC_TYPE_NONE))
+		return -EINVAL;
+	return statistic_parse_single(stat, info, def, stat->type);
+}
+
+static match_table_t statistic_match_common = {
+	{STATISTIC_STATE_UNCONFIGURED, "state=unconfigured"},
+	{STATISTIC_STATE_RELEASED, "state=released"},
+	{STATISTIC_STATE_OFF, "state=off"},
+	{STATISTIC_STATE_ON, "state=on"},
+	{1001, "name=%s"},
+	{1002, "data=reset"},
+	{1003, "defaults"},
+	{9999, NULL}
+};
+
+static void statistic_parse_line(struct statistic_interface *interface,
+				 char *def)
+{
+	char *p, *copy, *twisted, *name = NULL;
+	substring_t args[MAX_OPT_ARGS];
+	int token, reset = 0, defaults = 0, i;
+	int state = STATISTIC_STATE_INVALID;
+	struct statistic *stat = interface->stat;
+	struct statistic_info *info = interface->info;
+
+	if (unlikely(!def))
+		return;
+	twisted = copy = kstrdup(def, GFP_KERNEL);
+	if (unlikely(!copy))
+		return;
+
+	while ((p = strsep(&twisted, " ")) != NULL) {
+		if (!*p)
+			continue;
+		token = match_token(p, statistic_match_common, args);
+		switch (token) {
+		case STATISTIC_STATE_UNCONFIGURED:
+		case STATISTIC_STATE_RELEASED:
+		case STATISTIC_STATE_OFF:
+		case STATISTIC_STATE_ON:
+			state = token;
+			break;
+		case 1001:
+			if (likely(!name))
+				name = match_strdup(&args[0]);
+			break;
+		case 1002:
+			reset = 1;
+			break;
+		case 1003:
+			defaults = 1;
+			break;
+		}
+	}
+	for (i = 0; i < interface->number; i++, stat++, info++) {
+		if (!name || (name && !strcmp(name, info->name))) {
+			if (defaults)
+				statistic_parse_match(stat, info, NULL);
+			if (name)
+				statistic_parse_match(stat, info, def);
+			if (state != STATISTIC_STATE_INVALID)
+				statistic_transition(stat, info, state);
+			if (reset)
+				statistic_reset(stat, info);
+		}
+	}
+	kfree(copy);
+	kfree(name);
+}
+
+static void statistic_parse(struct statistic_interface *interface,
+			    struct list_head *line_lh, size_t line_size)
+{
+	struct sgrb_seg *seg, *tmp;
+	char *buf;
+	int offset = 0;
+
+	if (unlikely(!line_size))
+		return;
+	buf = kmalloc(line_size + 2, GFP_KERNEL);
+	if (unlikely(!buf))
+		return;
+	buf[line_size] = ' ';
+	buf[line_size + 1] = '\0';
+	list_for_each_entry_safe(seg, tmp, line_lh, list) {
+		memcpy(buf + offset, seg->address, seg->size);
+		offset += seg->size;
+		list_del(&seg->list);
+		kfree(seg);
+	}
+	statistic_parse_line(interface, buf);
+	kfree(buf);
+}
+
+/* sequential files comprising user interface */
+
+static int statistic_generic_open(struct inode *inode,
+		struct file *file, struct statistic_interface **interface,
+		struct statistic_file_private **private)
+{
+	*interface = inode->u.generic_ip;
+	BUG_ON(!interface);
+	*private = kzalloc(sizeof(struct statistic_file_private), GFP_KERNEL);
+	if (unlikely(!*private))
+		return -ENOMEM;
+	INIT_LIST_HEAD(&(*private)->read_seg_lh);
+	INIT_LIST_HEAD(&(*private)->write_seg_lh);
+	file->private_data = *private;
+	return 0;
+}
+
+static int statistic_generic_close(struct inode *inode, struct file *file)
+{
+	struct statistic_file_private *private = file->private_data;
+	BUG_ON(!private);
+	sgrb_seg_release_all(&private->read_seg_lh);
+	sgrb_seg_release_all(&private->write_seg_lh);
+	kfree(private);
+	return 0;
+}
+
+static ssize_t statistic_generic_read(struct file *file,
+				char __user *buf, size_t len, loff_t *offset)
+{
+	struct statistic_file_private *private = file->private_data;
+	struct sgrb_seg *seg;
+	size_t seg_offset, seg_residual, seg_transfer;
+	size_t transfered = 0;
+	loff_t pos = 0;
+
+	BUG_ON(!private);
+	list_for_each_entry(seg, &private->read_seg_lh, list) {
+		if (unlikely(!len))
+			break;
+		if (*offset >= pos && *offset <= (pos + seg->offset)) {
+			seg_offset = *offset - pos;
+			seg_residual = seg->offset - seg_offset;
+			seg_transfer = min(len, seg_residual);
+			if (unlikely(copy_to_user(buf + transfered,
+						  seg->address + seg_offset,
+						  seg_transfer)))
+				return -EFAULT;
+			transfered += seg_transfer;
+			*offset += seg_transfer;
+			pos += seg_transfer + seg_offset;
+			len -= seg_transfer;
+		} else
+			pos += seg->offset;
+	}
+	return transfered;
+}
+
+static ssize_t statistic_generic_write(struct file *file,
+			const char __user *buf, size_t len, loff_t *offset)
+{
+	struct statistic_file_private *private = file->private_data;
+	struct sgrb_seg *seg;
+	size_t seg_residual, seg_transfer;
+	size_t transfered = 0;
+
+	BUG_ON(!private);
+	if (unlikely(*offset != private->write_seg_total_size))
+		return -EPIPE;
+	while (len) {
+		seg = sgrb_seg_find(&private->write_seg_lh, 1);
+		if (unlikely(!seg))
+			return -ENOMEM;
+		seg_residual = seg->size - seg->offset;
+		seg_transfer = min(len, seg_residual);
+		if (unlikely(copy_from_user(seg->address + seg->offset,
+					    buf + transfered, seg_transfer)))
+			return -EFAULT;
+		private->write_seg_total_size += seg_transfer;
+		seg->offset += seg_transfer;
+		transfered += seg_transfer;
+		*offset += seg_transfer;
+		len -= seg_transfer;
+	}
+	return transfered;
+}
+
+static int statistic_def_close(struct inode *inode, struct file *file)
+{
+	struct statistic_interface *interface = inode->u.generic_ip;
+	struct statistic_file_private *private = file->private_data;
+	struct sgrb_seg *seg, *seg_nl;
+	int offset;
+	struct list_head line_lh;
+	char *nl;
+	size_t line_size = 0;
+
+	INIT_LIST_HEAD(&line_lh);
+	list_for_each_entry(seg, &private->write_seg_lh, list) {
+		for (offset = 0; offset < seg->offset; offset += seg_nl->size) {
+			seg_nl = kmalloc(sizeof(struct sgrb_seg), GFP_KERNEL);
+			if (unlikely(!seg_nl))
+				/*
+				 * FIXME:
+				 * Should we omit other new settings because we
+				 * could not process this line of definitions?
+				 */
+				continue;
+			seg_nl->address = seg->address + offset;
+			nl = strnchr(seg_nl->address,
+				     seg->offset - offset, '\n');
+			if (nl) {
+				seg_nl->offset = nl - seg_nl->address;
+				if (seg_nl->offset)
+					seg_nl->offset--;
+			} else
+				seg_nl->offset = seg->offset - offset;
+			seg_nl->size = seg_nl->offset + 1;
+			line_size += seg_nl->size;
+			list_add_tail(&seg_nl->list, &line_lh);
+			if (nl) {
+				statistic_parse(interface, &line_lh, line_size);
+				line_size = 0;
+			}
+		}
+	}
+	if (!list_empty(&line_lh))
+		statistic_parse(interface, &line_lh, line_size);
+	return statistic_generic_close(inode, file);
+}
+
+static int statistic_def_open(struct inode *inode, struct file *file)
+{
+	struct statistic_interface *interface;
+	struct statistic_file_private *private;
+	int retval = 0;
+	int i;
+
+	retval = statistic_generic_open(inode, file, &interface, &private);
+	if (unlikely(retval))
+		return retval;
+	for (i = 0; i < interface->number; i++) {
+		retval = statistic_fdef(interface, i, private);
+		if (unlikely(retval)) {
+			statistic_def_close(inode, file);
+			break;
+		}
+	}
+	return retval;
+}
+
+static int statistic_data_open(struct inode *inode, struct file *file)
+{
+	struct statistic_interface *interface;
+	struct statistic_file_private *private;
+	int retval = 0;
+	int i;
+
+	retval = statistic_generic_open(inode, file, &interface, &private);
+	if (unlikely(retval))
+		return retval;
+	if (interface->pull)
+		interface->pull(interface->pull_private);
+	for (i = 0; i < interface->number; i++) {
+		retval = statistic_fdata(interface, i, private);
+		if (unlikely(retval)) {
+			statistic_generic_close(inode, file);
+			break;
+		}
+	}
+	return retval;
+}
+
+static struct file_operations statistic_def_fops = {
+	.owner		= THIS_MODULE,
+	.read		= statistic_generic_read,
+	.write		= statistic_generic_write,
+	.open		= statistic_def_open,
+	.release	= statistic_def_close,
+};
+
+static struct file_operations statistic_data_fops = {
+	.owner		= THIS_MODULE,
+	.read		= statistic_generic_read,
+	.open		= statistic_data_open,
+	.release	= statistic_generic_close,
+};
+
+/**
+ * statistic_create - setup statistics and create debugfs files
+ * @interface: struct statistic_interface provided by exploiter
+ * @name: name of debugfs directory to be created
+ *
+ * Creates a debugfs directory in "statistics" as well as the "data" and
+ * "definition" files. Then we attach setup statistics according to the
+ * definition provided by exploiter through struct statistic_interface.
+ *
+ * struct statistic_interface must have been set up prior to calling this.
+ *
+ * On success, 0 is returned.
+ *
+ * If some required memory could not be allocated, or the creation
+ * of debugfs entries failed, this routine fails, and -ENOMEM is returned.
+ */
+int statistic_create(struct statistic_interface *interface, const char *name)
+{
+	struct statistic *stat = interface->stat;
+	struct statistic_info *info = interface->info;
+	int i;
+
+	BUG_ON(!stat || !info || !interface->number);
+
+	interface->debugfs_dir =
+		debugfs_create_dir(name, statistic_root_dir);
+	if (unlikely(!interface->debugfs_dir))
+		return -ENOMEM;
+
+	interface->data_file = debugfs_create_file(
+		STATISTIC_FILENAME_DATA, S_IFREG | S_IRUSR,
+		interface->debugfs_dir, (void*)interface, &statistic_data_fops);
+	if (unlikely(!interface->data_file)) {
+		debugfs_remove(interface->debugfs_dir);
+		return -ENOMEM;
+	}
+
+	interface->def_file = debugfs_create_file(
+		STATISTIC_FILENAME_DEF, S_IFREG | S_IRUSR | S_IWUSR,
+		interface->debugfs_dir, (void*)interface, &statistic_def_fops);
+	if (unlikely(!interface->def_file)) {
+		debugfs_remove(interface->data_file);
+		debugfs_remove(interface->debugfs_dir);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < interface->number; i++, stat++, info++) {
+		statistic_transition(stat, info, STATISTIC_STATE_UNCONFIGURED);
+		statistic_parse_match(stat, info, NULL);
+	}
+
+	mutex_lock(&statistic_list_mutex);
+	list_add(&interface->list, &statistic_list);
+	mutex_unlock(&statistic_list_mutex);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(statistic_create);
+
+/**
+ * statistic_remove - remove unused statistics
+ * @interface: struct statistic_interface to clean up
+ *
+ * Remove a debugfs directory in "statistics" along with its "data" and
+ * "definition" files. Removing this user interface also causes the removal
+ * of all statistics attached to the interface.
+ *
+ * The exploiter must have ceased reporting statistic data.
+ *
+ * Returns -EINVAL for attempted double removal, 0 otherwise.
+ */
+int statistic_remove(struct statistic_interface *interface)
+{
+	struct statistic *stat = interface->stat;
+	struct statistic_info *info = interface->info;
+	int i;
+
+	if (unlikely(!interface->debugfs_dir))
+		return -EINVAL;
+	mutex_lock(&statistic_list_mutex);
+	list_del(&interface->list);
+	mutex_unlock(&statistic_list_mutex);
+	for (i = 0; i < interface->number; i++, stat++, info++)
+		statistic_transition(stat, info, STATISTIC_STATE_INVALID);
+	debugfs_remove(interface->data_file);
+	debugfs_remove(interface->def_file);
+	debugfs_remove(interface->debugfs_dir);
+	interface->debugfs_dir = NULL;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(statistic_remove);
+
+/* code concerned with single value statistics */
+
+static void statistic_reset_counter(struct statistic *stat, void *ptr)
+{
+	*(u64*)ptr = 0;
+}
+
+static void statistic_add_counter_inc(struct statistic *stat, int cpu,
+				     s64 value, u64 incr)
+{
+	*(u64*)stat->pdata->ptrs[cpu] += incr;
+}
+
+static void statistic_add_counter_prod(struct statistic *stat, int cpu,
+				       s64 value, u64 incr)
+{
+	if (unlikely(value < 0))
+		value = -value;
+	*(u64*)stat->pdata->ptrs[cpu] += value * incr;
+}
+
+static void statistic_set_counter_inc(struct statistic *stat,
+				      s64 value, u64 total)
+{
+	*(u64*)stat->pdata = total;
+}
+
+static void statistic_set_counter_prod(struct statistic *stat,
+				       s64 value, u64 total)
+{
+	if (unlikely(value < 0))
+		value = -value;
+	*(u64*)stat->pdata = value * total;
+}
+
+static void statistic_merge_counter(struct statistic *stat,
+				    void *dst, void *src)
+{
+	*(u64*)dst += *(u64*)src;
+}
+
+static int statistic_fdata_counter(struct statistic *stat, const char *name,
+				   struct statistic_file_private *fpriv,
+				   void *data)
+{
+	struct sgrb_seg *seg;
+	seg = sgrb_seg_find(&fpriv->read_seg_lh, 128);
+	if (unlikely(!seg))
+		return -ENOMEM;
+	seg->offset += sprintf(seg->address + seg->offset, "%s %Lu\n",
+			       name, *(unsigned long long *)data);
+	return 0;
+}
+
+/* code concerned with utilisation indicator statistic */
+
+static void statistic_reset_util(struct statistic *stat, void *ptr)
+{
+	struct statistic_entry_util *util = ptr;
+	util->num = 0;
+	util->acc = 0;
+	util->min = (~0ULL >> 1) - 1;
+	util->max = -(~0ULL >> 1) + 1;
+}
+
+static void statistic_add_util(struct statistic *stat, int cpu,
+			       s64 value, u64 incr)
+{
+	struct statistic_entry_util *util = stat->pdata->ptrs[cpu];
+	util->num += incr;
+	util->acc += value * incr;
+	if (unlikely(value < util->min))
+		util->min = value;
+	if (unlikely(value > util->max))
+		util->max = value;
+}
+
+static void statistic_set_util(struct statistic *stat, s64 value, u64 total)
+{
+	struct statistic_entry_util *util;
+	util = (struct statistic_entry_util *) stat->pdata;
+	util->num = total;
+	util->acc = value * total;
+	if (unlikely(value < util->min))
+		util->min = value;
+	if (unlikely(value > util->max))
+		util->max = value;
+}
+
+static void statistic_merge_util(struct statistic *stat, void *_dst, void *_src)
+{
+	struct statistic_entry_util *dst = _dst, *src = _src;
+	dst->num += src->num;
+	dst->acc += src->acc;
+	if (unlikely(src->min < dst->min))
+		dst->min = src->min;
+	if (unlikely(src->max > dst->max))
+		dst->max = src->max;
+}
+
+static int statistic_fdata_util(struct statistic *stat, const char *name,
+				struct statistic_file_private *fpriv,
+				void *data)
+{
+	struct sgrb_seg *seg;
+	struct statistic_entry_util *util = data;
+	unsigned long long whole = 0;
+	signed long long min = 0, max = 0, decimal = 0, last_digit;
+
+	seg = sgrb_seg_find(&fpriv->read_seg_lh, 128);
+	if (unlikely(!seg))
+		return -ENOMEM;
+	if (likely(util->num)) {
+		whole = util->acc;
+		do_div(whole, util->num);
+		decimal = util->acc * 10000;
+		do_div(decimal, util->num);
+		decimal -= whole * 10000;
+		if (decimal < 0)
+			decimal = -decimal;
+		last_digit = decimal;
+		do_div(last_digit, 10);
+		last_digit = decimal - last_digit * 10;
+		if (last_digit >= 5)
+			decimal += 10;
+		do_div(decimal, 10);
+		min = util->min;
+		max = util->max;
+	}
+	seg->offset += sprintf(seg->address + seg->offset,
+			       "%s %Lu %Ld %Ld.%03lld %Ld\n", name,
+			       (unsigned long long)util->num,
+			       (signed long long)min, whole, decimal,
+			       (signed long long)max);
+	return 0;
+}
+
+/* code concerned with histogram statistics */
+
+static void * statistic_alloc_histogram(struct statistic *stat, size_t size,
+					gfp_t flags, int node)
+{
+	return kmalloc_node(size * (stat->u.histogram.last_index + 1),
+			    flags, node);
+}
+
+static inline s64 statistic_histogram_calc_value_lin(struct statistic *stat,
+						     int i)
+{
+	return stat->u.histogram.range_min +
+		stat->u.histogram.base_interval * i;
+}
+
+static inline s64 statistic_histogram_calc_value_log2(struct statistic *stat,
+						      int i)
+{
+	return stat->u.histogram.range_min +
+		(i ? (stat->u.histogram.base_interval << (i - 1)) : 0);
+}
+
+static inline s64 statistic_histogram_calc_value(struct statistic *stat, int i)
+{
+	if (stat->type == STATISTIC_TYPE_HISTOGRAM_LIN)
+		return statistic_histogram_calc_value_lin(stat, i);
+	else
+		return statistic_histogram_calc_value_log2(stat, i);
+}
+
+static inline int statistic_histogram_calc_index_lin(struct statistic *stat,
+						 s64 value)
+{
+	unsigned long long i = value - stat->u.histogram.range_min;
+	do_div(i, stat->u.histogram.base_interval);
+	return i;
+}
+
+static inline int statistic_histogram_calc_index_log2(struct statistic *stat,
+						      s64 value)
+{
+	unsigned long long i;
+	for (i = 0;
+	     i < stat->u.histogram.last_index &&
+	     value > statistic_histogram_calc_value_log2(stat, i);
+	     i++);
+	return i;
+}
+
+static inline int statistic_histogram_calc_index(struct statistic *stat,
+						 s64 value)
+{
+	if (stat->type == STATISTIC_TYPE_HISTOGRAM_LIN)
+		return statistic_histogram_calc_index_lin(stat, value);
+	else
+		return statistic_histogram_calc_index_log2(stat, value);
+}
+
+static void statistic_reset_histogram(struct statistic *stat, void *ptr)
+{
+	memset(ptr, 0, (stat->u.histogram.last_index + 1) * sizeof(u64));
+}
+
+static void statistic_add_histogram_lin(struct statistic *stat, int cpu,
+					s64 value, u64 incr)
+{
+	int i = statistic_histogram_calc_index_lin(stat, value);
+	((u64*)stat->pdata->ptrs[cpu])[i] += incr;
+}
+
+static void statistic_add_histogram_log2(struct statistic *stat, int cpu,
+					 s64 value, u64 incr)
+{
+	int i = statistic_histogram_calc_index_log2(stat, value);
+	((u64*)stat->pdata->ptrs[cpu])[i] += incr;
+}
+
+static void statistic_set_histogram_lin(struct statistic *stat,
+					s64 value, u64 total)
+{
+	int i = statistic_histogram_calc_index_lin(stat, value);
+	((u64*)stat->pdata)[i] = total;
+}
+
+static void statistic_set_histogram_log2(struct statistic *stat,
+					 s64 value, u64 total)
+{
+	int i = statistic_histogram_calc_index_log2(stat, value);
+	((u64*)stat->pdata)[i] = total;
+}
+
+static void statistic_merge_histogram(struct statistic *stat,
+				      void *_dst, void *_src)
+{
+	u64 *dst = _dst, *src = _src;
+	int i;
+	for (i = 0; i <= stat->u.histogram.last_index; i++)
+		dst[i] += src[i];
+}
+
+static inline int statistic_fdata_histogram_line(const char *name,
+					struct statistic_file_private *private,
+					const char *prefix, s64 bound, u64 hits)
+{
+	struct sgrb_seg *seg;
+	seg = sgrb_seg_find(&private->read_seg_lh, 256);
+	if (unlikely(!seg))
+		return -ENOMEM;
+	seg->offset += sprintf(seg->address + seg->offset, "%s %s%Ld %Lu\n",
+			       name, prefix, (signed long long)bound,
+			       (unsigned long long)hits);
+	return 0;
+}
+
+static int statistic_fdata_histogram(struct statistic *stat, const char *name,
+				     struct statistic_file_private *fpriv,
+				     void *data)
+{
+	int i, retval;
+	s64 bound = 0;
+	for (i = 0; i < (stat->u.histogram.last_index); i++) {
+		bound = statistic_histogram_calc_value(stat, i);
+		retval = statistic_fdata_histogram_line(name, fpriv, "<=",
+							bound, ((u64*)data)[i]);
+		if (unlikely(retval))
+			return retval;
+	}
+	return statistic_fdata_histogram_line(name, fpriv, ">",
+					      bound, ((u64*)data)[i]);
+}
+
+static int statistic_fdef_histogram(struct statistic *stat, char *line)
+{
+	return sprintf(line, " range_min=%Li entries=%Li base_interval=%Lu",
+		       (signed long long)stat->u.histogram.range_min,
+		       (unsigned long long)(stat->u.histogram.last_index + 1),
+		       (unsigned long long)stat->u.histogram.base_interval);
+}
+
+static match_table_t statistic_match_histogram = {
+	{1, "entries=%u"},
+	{2, "base_interval=%s"},
+	{3, "range_min=%s"},
+	{9, NULL}
+};
+
+static int statistic_parse_histogram(struct statistic *stat,
+				     struct statistic_info *info,
+				     int type, char *def)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int token, got_entries = 0, got_interval = 0, got_range = 0;
+	u32 entries, base_interval;
+	s64 range_min;
+
+	while ((p = strsep(&def, " ")) != NULL) {
+		if (!*p)
+			continue;
+		token = match_token(p, statistic_match_histogram, args);
+		switch (token) {
+		case 1:
+			match_int(&args[0], &entries);
+			got_entries = 1;
+			break;
+		case 2:
+			match_int(&args[0], &base_interval);
+			got_interval = 1;
+			break;
+		case 3:
+			match_s64(&args[0], &range_min, 0);
+			got_range = 1;
+			break;
+		}
+	}
+	if (unlikely(type != stat->type &&
+		     !(got_entries && got_interval && got_range)))
+		return -EINVAL;
+	statistic_transition(stat, info, STATISTIC_STATE_UNCONFIGURED);
+	if (got_entries)
+		stat->u.histogram.last_index = entries - 1;
+	if (got_interval)
+		stat->u.histogram.base_interval = base_interval;
+	if (got_range)
+		stat->u.histogram.range_min = range_min;
+	return 0;
+}
+
+/* code concerned with histograms (discrete value) statistics */
+
+static void * statistic_alloc_sparse(struct statistic *stat, size_t size,
+				     gfp_t flags, int node)
+{
+	struct statistic_sparse_list *slist = kmalloc_node(size, flags, node);
+	INIT_LIST_HEAD(&slist->entry_lh);
+	slist->entries_max = stat->u.sparse.entries_max;
+	return slist;
+}
+
+static void statistic_free_sparse(struct statistic *stat, void *ptr)
+{
+	struct statistic_entry_sparse *entry, *tmp;
+	struct statistic_sparse_list *slist = ptr;
+	list_for_each_entry_safe(entry, tmp, &slist->entry_lh, list) {
+		list_del(&entry->list);
+		kfree(entry);
+	}
+	slist->hits_missed = 0;
+	slist->entries = 0;
+}
+
+static inline void statistic_add_sparse_sort(struct list_head *head,
+					struct statistic_entry_sparse *entry)
+{
+	struct statistic_entry_sparse *sort =
+		list_prepare_entry(entry, head, list);
+
+	list_for_each_entry_continue_reverse(sort, head, list)
+		if (likely(sort->hits >= entry->hits))
+			break;
+	if (unlikely(sort->list.next != &entry->list &&
+		     (&sort->list == head || sort->hits >= entry->hits)))
+		list_move(&entry->list, &sort->list);
+}
+
+static inline int statistic_add_sparse_new(struct statistic_sparse_list *slist,
+					   s64 value, u64 incr)
+{
+	struct statistic_entry_sparse *entry;
+
+	if (unlikely(slist->entries == slist->entries_max))
+		return -ENOMEM;
+	entry = kmalloc(sizeof(struct statistic_entry_sparse), GFP_ATOMIC);
+	if (unlikely(!entry))
+		return -ENOMEM;
+	entry->value = value;
+	entry->hits = incr;
+	slist->entries++;
+	list_add_tail(&entry->list, &slist->entry_lh);
+	return 0;
+}
+
+static inline void _statistic_add_sparse(struct statistic_sparse_list *slist,
+					 s64 value, u64 incr)
+{
+	struct list_head *head = &slist->entry_lh;
+	struct statistic_entry_sparse *entry;
+
+	list_for_each_entry(entry, head, list) {
+		if (likely(entry->value == value)) {
+			entry->hits += incr;
+			statistic_add_sparse_sort(head, entry);
+			return;
+		}
+	}
+	if (unlikely(statistic_add_sparse_new(slist, value, incr)))
+		slist->hits_missed += incr;
+}
+
+static void statistic_add_sparse(struct statistic *stat, int cpu,
+				 s64 value, u64 incr)
+{
+	struct statistic_sparse_list *slist = stat->pdata->ptrs[cpu];
+	_statistic_add_sparse(slist, value, incr);
+}
+
+static void statistic_set_sparse(struct statistic *stat, s64 value, u64 total)
+{
+	struct statistic_sparse_list *slist = (struct statistic_sparse_list *)
+						stat->pdata;
+	struct list_head *head = &slist->entry_lh;
+	struct statistic_entry_sparse *entry;
+
+	list_for_each_entry(entry, head, list) {
+		if (likely(entry->value == value)) {
+			entry->hits = total;
+			statistic_add_sparse_sort(head, entry);
+			return;
+		}
+	}
+	if (unlikely(statistic_add_sparse_new(slist, value, total)))
+		slist->hits_missed += total;
+}
+
+static void statistic_merge_sparse(struct statistic *stat,
+				   void *_dst, void *_src)
+{
+	struct statistic_sparse_list *dst = _dst, *src = _src;
+	struct statistic_entry_sparse *entry;
+	dst->hits_missed += src->hits_missed;
+	list_for_each_entry(entry, &src->entry_lh, list)
+		_statistic_add_sparse(dst, entry->value, entry->hits);
+}
+
+static int statistic_fdata_sparse(struct statistic *stat, const char *name,
+				  struct statistic_file_private *fpriv,
+				  void *data)
+{
+	struct sgrb_seg *seg;
+	struct statistic_sparse_list *slist = data;
+	struct statistic_entry_sparse *entry;
+
+	seg = sgrb_seg_find(&fpriv->read_seg_lh, 256);
+	if (unlikely(!seg))
+		return -ENOMEM;
+	seg->offset += sprintf(seg->address + seg->offset, "%s missed 0x%Lu\n",
+			       name, (unsigned long long)slist->hits_missed);
+	list_for_each_entry(entry, &slist->entry_lh, list) {
+		seg = sgrb_seg_find(&fpriv->read_seg_lh, 256);
+		if (unlikely(!seg))
+			return -ENOMEM;
+		seg->offset += sprintf(seg->address + seg->offset,
+				       "%s 0x%Lx %Lu\n", name,
+				       (signed long long)entry->value,
+				       (unsigned long long)entry->hits);
+	}
+	return 0;
+}
+
+static int statistic_fdef_sparse(struct statistic *stat, char *line)
+{
+	return sprintf(line, " entries=%u", stat->u.sparse.entries_max);
+}
+
+static match_table_t statistic_match_sparse = {
+	{1, "entries=%u"},
+	{9, NULL}
+};
+
+static int statistic_parse_sparse(struct statistic *stat,
+				  struct statistic_info *info,
+				  int type, char *def)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+
+	while ((p = strsep(&def, " ")) != NULL) {
+		if (!*p)
+			continue;
+		if (match_token(p, statistic_match_sparse, args) == 1) {
+			statistic_transition(stat, info,
+					     STATISTIC_STATE_UNCONFIGURED);
+			match_int(&args[0], &stat->u.sparse.entries_max);
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+/* code mostly concerned with managing statistics */
+
+static struct statistic_discipline statistic_discs[] = {
+	{ /* STATISTIC_TYPE_COUNTER_INC */
+	  NULL,
+	  statistic_alloc_generic,
+	  NULL,
+	  statistic_reset_counter,
+	  statistic_merge_counter,
+	  statistic_fdata_counter,
+	  NULL,
+	  statistic_add_counter_inc,
+	  statistic_set_counter_inc,
+	  "counter_inc", sizeof(u64)
+	},
+	{ /* STATISTIC_TYPE_COUNTER_PROD */
+	  NULL,
+	  statistic_alloc_generic,
+	  NULL,
+	  statistic_reset_counter,
+	  statistic_merge_counter,
+	  statistic_fdata_counter,
+	  NULL,
+	  statistic_add_counter_prod,
+	  statistic_set_counter_prod,
+	  "counter_prod", sizeof(u64)
+	},
+	{ /* STATISTIC_TYPE_UTIL */
+	  NULL,
+	  statistic_alloc_generic,
+	  NULL,
+	  statistic_reset_util,
+	  statistic_merge_util,
+	  statistic_fdata_util,
+	  NULL,
+	  statistic_add_util,
+	  statistic_set_util,
+	  "utilisation", sizeof(struct statistic_entry_util)
+	},
+	{ /* STATISTIC_TYPE_HISTOGRAM_LIN */
+	  statistic_parse_histogram,
+	  statistic_alloc_histogram,
+	  NULL,
+	  statistic_reset_histogram,
+	  statistic_merge_histogram,
+	  statistic_fdata_histogram,
+	  statistic_fdef_histogram,
+	  statistic_add_histogram_lin,
+	  statistic_set_histogram_lin,
+	  "histogram_lin", sizeof(u64)
+	},
+	{ /* STATISTIC_TYPE_HISTOGRAM_LOG2 */
+	  statistic_parse_histogram,
+	  statistic_alloc_histogram,
+	  NULL,
+	  statistic_reset_histogram,
+	  statistic_merge_histogram,
+	  statistic_fdata_histogram,
+	  statistic_fdef_histogram,
+	  statistic_add_histogram_log2,
+	  statistic_set_histogram_log2,
+	  "histogram_log2", sizeof(u64)
+	},
+	{ /* STATISTIC_TYPE_SPARSE */
+	  statistic_parse_sparse,
+	  statistic_alloc_sparse,
+	  statistic_free_sparse,
+	  statistic_free_sparse,	/* reset equals free */
+	  statistic_merge_sparse,
+	  statistic_fdata_sparse,
+	  statistic_fdef_sparse,
+	  statistic_add_sparse,
+	  statistic_set_sparse,
+	  "sparse", sizeof(struct statistic_sparse_list)
+	},
+	{ /* STATISTIC_TYPE_NONE */ }
+};
+
+postcore_initcall(statistic_init);
+module_exit(statistic_exit);
+
+MODULE_LICENSE("GPL");
diff -Nurp a/lib/Makefile b/lib/Makefile
--- a/lib/Makefile	2006-05-19 15:44:27.000000000 +0200
+++ b/lib/Makefile	2006-05-19 16:02:23.000000000 +0200
@@ -47,6 +47,8 @@ obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
 obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
 obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
 
+obj-$(CONFIG_STATISTICS) += statistic.o
+
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
 
 hostprogs-y	:= gen_crc32table
diff -Nurp a/arch/s390/Kconfig b/arch/s390/Kconfig
--- a/arch/s390/Kconfig	2006-05-19 15:44:22.000000000 +0200
+++ b/arch/s390/Kconfig	2006-05-19 16:02:23.000000000 +0200
@@ -474,8 +474,14 @@ source "drivers/net/Kconfig"
 
 source "fs/Kconfig"
 
+menu "Instrumentation Support"
+
 source "arch/s390/oprofile/Kconfig"
 
+source "lib/Kconfig.statistic"
+
+endmenu
+
 source "arch/s390/Kconfig.debug"
 
 source "security/Kconfig"
diff -Nurp a/lib/Kconfig.statistic b/lib/Kconfig.statistic
--- a/lib/Kconfig.statistic	1970-01-01 01:00:00.000000000 +0100
+++ b/lib/Kconfig.statistic	2006-05-19 16:02:23.000000000 +0200
@@ -0,0 +1,11 @@
+config STATISTICS
+	bool "Statistics infrastructure"
+	depends on DEBUG_FS
+	help
+	  The statistics infrastructure provides a debug-fs based user interface
+	  for statistics of kernel components, that is, usually device drivers.
+	  Statistics are available for components that have been instrumented to
+	  feed data into the statistics infrastructure.
+	  This feature is useful for performance measurements or performance
+	  debugging.
+	  If in doubt, say "N".
diff -Nurp a/arch/s390/oprofile/Kconfig b/arch/s390/oprofile/Kconfig
--- a/arch/s390/oprofile/Kconfig	2006-03-20 06:53:29.000000000 +0100
+++ b/arch/s390/oprofile/Kconfig	2006-05-19 16:02:23.000000000 +0200
@@ -1,6 +1,3 @@
-
-menu "Profiling support"
-
 config PROFILING
 	bool "Profiling support"
 	help
@@ -18,5 +15,3 @@ config OPROFILE
 
 	  If unsure, say N.
 
-endmenu
-
diff -Nurp a/MAINTAINERS b/MAINTAINERS
--- a/MAINTAINERS	2006-05-19 15:44:32.000000000 +0200
+++ b/MAINTAINERS	2006-05-19 16:02:23.000000000 +0200
@@ -2633,6 +2633,13 @@ STARMODE RADIO IP (STRIP) PROTOCOL DRIVE
 W:	http://mosquitonet.Stanford.EDU/strip.html
 S:	Unsupported ?
 
+STATISTICS INFRASTRUCTURE
+P:	Martin Peschke
+M:	mpeschke@de.ibm.com
+M:	linux390@de.ibm.com
+W:	http://www.ibm.com/developerworks/linux/linux390/
+S:	Supported
+
 STRADIS MPEG-2 DECODER DRIVER
 P:	Nathan Laredo
 M:	laredo@gnu.org



^ permalink raw reply	[flat|nested] 16+ messages in thread
* [Patch 5/6] statistics infrastructure
@ 2006-05-19 16:13 Martin Peschke
  0 siblings, 0 replies; 16+ messages in thread
From: Martin Peschke @ 2006-05-19 16:13 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel@vger.kernel.org

This patch adds statistics infrastructure as common code.

Signed-off-by: Martin Peschke <mp3@de.ibm.com>
---

 MAINTAINERS                |    7
 arch/s390/Kconfig          |    6
 arch/s390/oprofile/Kconfig |    5
 include/linux/statistic.h  |  348 ++++++++++
 lib/Kconfig.statistic      |   11
 lib/Makefile               |    2
 lib/statistic.c            | 1459 +++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 1833 insertions(+), 5 deletions(-)

diff -Nurp a/include/linux/statistic.h b/include/linux/statistic.h
--- a/include/linux/statistic.h	1970-01-01 01:00:00.000000000 +0100
+++ b/include/linux/statistic.h	2006-05-19 16:23:07.000000000 +0200
@@ -0,0 +1,348 @@
+/*
+ * include/linux/statistic.h
+ *
+ * Statistics facility
+ *
+ * (C) Copyright IBM Corp. 2005, 2006
+ *
+ * Author(s): Martin Peschke <mpeschke@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef STATISTIC_H
+#define STATISTIC_H
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/percpu.h>
+
+#define STATISTIC_ROOT_DIR	"statistics"
+
+#define STATISTIC_FILENAME_DATA	"data"
+#define STATISTIC_FILENAME_DEF	"definition"
+
+#define STATISTIC_NEED_BARRIER	1
+
+struct statistic;
+
+enum statistic_state {
+	STATISTIC_STATE_INVALID,
+	STATISTIC_STATE_UNCONFIGURED,
+	STATISTIC_STATE_RELEASED,
+	STATISTIC_STATE_OFF,
+	STATISTIC_STATE_ON
+};
+
+enum statistic_type {
+	STATISTIC_TYPE_COUNTER_INC,
+	STATISTIC_TYPE_COUNTER_PROD,
+	STATISTIC_TYPE_UTIL,
+	STATISTIC_TYPE_HISTOGRAM_LIN,
+	STATISTIC_TYPE_HISTOGRAM_LOG2,
+	STATISTIC_TYPE_SPARSE,
+	STATISTIC_TYPE_NONE
+};
+
+#define STATISTIC_FLAGS_NOINCR	0x01
+
+/**
+ * struct statistic_info - description of a class of statistics
+ * @name: pointer to name name string
+ * @x_unit: pointer to string describing unit of X of (X, Y) data pair
+ * @y_unit: pointer to string describing unit of Y of (X, Y) data pair
+ * @flags: only flag so far (distinction of incremental and other statistic)
+ * @defaults: pointer to string describing defaults setting for attributes
+ *
+ * Exploiters must setup an array of struct statistic_info for a
+ * corresponding array of struct statistic, which are then pointed to
+ * by struct statistic_interface.
+ *
+ * Struct statistic_info and all members and addressed strings must stay for
+ * the lifetime of corresponding statistics created with statistic_create().
+ *
+ * Except for the name string, all other members may be left blank.
+ * It would be nice of exploiters to fill it out completely, though.
+ */
+struct statistic_info {
+/* public: */
+	char *name;
+	char *x_unit;
+	char *y_unit;
+	int  flags;
+	char *defaults;
+};
+
+/**
+ * struct statistic_interface - collection of statistics for an entity
+ * @stat: a struct statistic array
+ * @info: a struct statistic_info array describing the struct statistic array
+ * @number: number of entries in both arrays
+ * @pull: an optional function called when user reads data from file
+ * @pull_private: optional data pointer passed to pull function
+ *
+ * Exploiters must setup a struct statistic_interface prior to calling
+ * statistic_create().
+ */
+struct statistic_interface {
+/* private: */
+	struct list_head	 list;
+	struct dentry		*debugfs_dir;
+	struct dentry		*data_file;
+	struct dentry		*def_file;
+/* public: */
+	struct statistic	*stat;
+	struct statistic_info	*info;
+	int			 number;
+	int			(*pull)(void*);
+	void			*pull_private;
+};
+
+struct sgrb_seg {
+	struct list_head list;
+	char *address;
+	int offset;
+	int size;
+};
+
+struct statistic_file_private {
+	struct list_head read_seg_lh;
+	struct list_head write_seg_lh;
+	size_t write_seg_total_size;
+};
+
+struct statistic_merge_private {
+	struct statistic *stat;
+	spinlock_t lock;
+	void *dst;
+};
+
+/**
+ * struct statistic_discipline - description of a data processing mode
+ * @parse: parses additional attributes specific to this mode (if any)
+ * @alloc: allocates a data area (mandatory, default routine available)
+ * @free: frees a data area (optional, kfree() is used otherwise)
+ * @reset: discards content of a data area (mandatory)
+ * @merge: merges content of a data area into another data area (mandatory)
+ * @fdata: prints content of a data area into buffer (mandatory)
+ * @fdef: prints additional attributes specific to this mode (if any)
+ * @add: updates a data area for a statistic fed incremental data (mandatory)
+ * @set: updates a data area for a statistic fed total numbers (mandatory)
+ * @name: pointer to name string (mandatory)
+ * @size: base size for a data area (passed to alloc function)
+ *
+ * Struct statistic_discipline describes a statistic infrastructure internal
+ * programming interface. Another data processing mode can be added by
+ * implementing these routines and appending an entry to the
+ * statistic_discs array.
+ *
+ * "Data area" in te above description usually means a chunk of memory,
+ * may it be allocated for data gathering per CPU, or be shared by all
+ * CPUs, or used for other purposes, like merging per-CPU data when
+ * users read data from files. Implementers of data processing modes
+ * don't need to worry about the designation of a particular chunk of memory.
+ * A data area of a data processing mode always has to look the same.
+ */
+struct statistic_discipline {
+	int (*parse)(struct statistic *, struct statistic_info *, int, char *);
+	void* (*alloc)(struct statistic *, size_t, gfp_t, int);
+	void (*free)(struct statistic *, void *);
+	void (*reset)(struct statistic *, void *);
+	void (*merge)(struct statistic *, void *, void*);
+	int (*fdata)(struct statistic *, const char *,
+		     struct statistic_file_private *, void *);
+	int (*fdef)(struct statistic *, char *);
+	void (*add)(struct statistic *, int, s64, u64);
+	void (*set)(struct statistic *, s64, u64);
+	char *name;
+	size_t size;
+};
+
+struct statistic_entry_util {
+	u32 res;
+	u32 num;	/* FIXME: better 64 bit; do_div can't deal with it) */
+	s64 acc;
+	s64 min;
+	s64 max;
+};
+
+struct statistic_entry_sparse {
+	struct list_head list;
+	s64 value;
+	u64 hits;
+};
+
+struct statistic_sparse_list {
+	struct list_head entry_lh;
+	u32 entries;
+	u32 entries_max;
+	u64 hits_missed;
+};
+
+/**
+ * struct statistic - any data required for gathering data for a statistic
+ */
+struct statistic {
+/* private: */
+	enum statistic_state	 state;
+	enum statistic_type	 type;
+	struct percpu_data	*pdata;
+	void			(*add)(struct statistic *, int, s64, u64);
+	u64			 started;
+	u64			 stopped;
+	u64			 age;
+	union {
+		struct {
+			s64 range_min;
+			u32 last_index;
+			u32 base_interval;
+		} histogram;
+		struct {
+			u32 entries_max;
+		} sparse;
+	} u;
+};
+
+#ifdef CONFIG_STATISTICS
+
+extern int statistic_create(struct statistic_interface *, const char *);
+extern int statistic_remove(struct statistic_interface *);
+
+/**
+ * statistic_add - update statistic with incremental data in (X, Y) pair
+ * @stat: struct statistic array
+ * @i: index of statistic to be updated
+ * @value: X
+ * @incr: Y
+ *
+ * The actual processing of the (X, Y) data pair is determined by the current
+ * the definition applied to the statistic. See Documentation/statistics.txt.
+ *
+ * This variant takes care of protecting per-cpu data. It is preferred whenever
+ * exploiters don't update several statistics of the same entity in one go.
+ */
+static inline void statistic_add(struct statistic *stat, int i,
+				 s64 value, u64 incr)
+{
+	unsigned long flags;
+	local_irq_save(flags);
+	if (stat[i].state == STATISTIC_STATE_ON)
+		stat[i].add(&stat[i], smp_processor_id(), value, incr);
+	local_irq_restore(flags);
+}
+
+/**
+ * statistic_add_nolock - update statistic with incremental data in (X, Y) pair
+ * @stat: struct statistic array
+ * @i: index of statistic to be updated
+ * @value: X
+ * @incr: Y
+ *
+ * The actual processing of the (X, Y) data pair is determined by the current
+ * definition applied to the statistic. See Documentation/statistics.txt.
+ *
+ * This variant leaves protecting per-cpu data to exploiters. It is preferred
+ * whenever exploiters update several statistics of the same entity in one go.
+ */
+static inline void statistic_add_nolock(struct statistic *stat, int i,
+					s64 value, u64 incr)
+{
+	if (stat[i].state == STATISTIC_STATE_ON)
+		stat[i].add(&stat[i], smp_processor_id(), value, incr);
+}
+
+/**
+ * statistic_inc - update statistic with incremental data in (X, 1) pair
+ * @stat: struct statistic array
+ * @i: index of statistic to be updated
+ * @value: X
+ *
+ * The actual processing of the (X, Y) data pair is determined by the current
+ * definition applied to the statistic. See Documentation/statistics.txt.
+ *
+ * This variant takes care of protecting per-cpu data. It is preferred whenever
+ * exploiters don't update several statistics of the same entity in one go.
+ */
+static inline void statistic_inc(struct statistic *stat, int i, s64 value)
+{
+	unsigned long flags;
+	local_irq_save(flags);
+	if (stat[i].state == STATISTIC_STATE_ON)
+		stat[i].add(&stat[i], smp_processor_id(), value, 1);
+	local_irq_restore(flags);
+}
+
+/**
+ * statistic_inc_nolock - update statistic with incremental data in (X, 1) pair
+ * @stat: struct statistic array
+ * @i: index of statistic to be updated
+ * @value: X
+ *
+ * The actual processing of the (X, Y) data pair is determined by the current
+ * definition applied to the statistic. See Documentation/statistics.txt.
+ *
+ * This variant leaves protecting per-cpu data to exploiters. It is preferred
+ * whenever exploiters update several statistics of the same entity in one go.
+ */
+static inline void statistic_inc_nolock(struct statistic *stat, int i,
+					s64 value)
+{
+	if (stat[i].state == STATISTIC_STATE_ON)
+		stat[i].add(&stat[i], smp_processor_id(), value, 1);
+}
+
+extern void statistic_set(struct statistic *, int, s64, u64);
+
+#else /* CONFIG_STATISTICS */
+
+static inline int statistic_create(struct statistic_interface *interface,
+				   const char *name)
+{
+	return 0;
+}
+
+static inline int statistic_remove(
+				struct statistic_interface *interface_ptr)
+{
+	return 0;
+}
+
+static inline void statistic_add(struct statistic *stat, int i,
+				 s64 value, u64 incr)
+{
+}
+
+static inline void statistic_add_nolock(struct statistic *stat, int i,
+					s64 value, u64 incr)
+{
+}
+
+static inline void statistic_inc(struct statistic *stat, int i, s64 value)
+{
+}
+
+static inline void statistic_inc_nolock(struct statistic *stat, int i,
+					s64 value)
+{
+}
+
+static inline void statistic_set(struct statistic *stat, int i,
+				 s64 value, u64 total)
+{
+}
+
+#endif /* CONFIG_STATISTICS */
+
+#endif /* STATISTIC_H */
diff -Nurp a/lib/statistic.c b/lib/statistic.c
--- a/lib/statistic.c	1970-01-01 01:00:00.000000000 +0100
+++ b/lib/statistic.c	2006-05-19 16:22:55.000000000 +0200
@@ -0,0 +1,1459 @@
+/*
+ *  lib/statistic.c
+ *    statistics facility
+ *
+ *    Copyright (C) 2005, 2006
+ *		IBM Deutschland Entwicklung GmbH,
+ *		IBM Corporation
+ *
+ *    Author(s): Martin Peschke (mpeschke@de.ibm.com),
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *    another bunch of ideas being pondered:
+ *	- define a set of agreed names or a naming scheme for
+ *	  consistency and comparability across exploiters;
+ *	  this entails an agreement about granularities
+ *	  as well (e.g. separate statistic for read/write/no-data commands);
+ *	  a common set of unit strings would be nice then, too, of course
+ *	  (e.g. "seconds", "milliseconds", "microseconds", ...)
+ *	- perf. opt. of array: table lookup of values, binary search for values
+ *	- another statistic disclipline based on some sort of tree, but
+ *	  similar in semantics to list discipline (for high-perf. histograms of
+ *	  discrete values)
+ *	- allow for more than a single "view" on data at the same time by
+ *	  providing the capability to attach several (a list of) "definitions"
+ *	  to a struct statistic
+ *	  (e.g. show histogram of requests sizes and history of megabytes/sec.
+ *	  at the same time)
+ *	- multi-dimensional statistic (combination of two or more
+ *	  characteristics/discriminators); worth the effort??
+ *	  (e.g. a matrix of occurences for latencies of requests of
+ *	  particular sizes)
+ *
+ *	FIXME:
+ *	- statistics file access when statistics are being removed
+ */
+
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/parser.h>
+#include <linux/time.h>
+#include <linux/sched.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+#include <linux/statistic.h>
+
+#include <asm/bug.h>
+#include <asm/uaccess.h>
+
+static struct statistic_discipline statistic_discs[];
+
+static inline int statistic_initialise(struct statistic *stat)
+{
+	stat->type = STATISTIC_TYPE_NONE;
+	stat->state = STATISTIC_STATE_UNCONFIGURED;
+	return 0;
+}
+
+static inline int statistic_uninitialise(struct statistic *stat)
+{
+	stat->state = STATISTIC_STATE_INVALID;
+	return 0;
+}
+
+static inline int statistic_define(struct statistic *stat)
+{
+	if (stat->type == STATISTIC_TYPE_NONE)
+		return -EINVAL;
+	stat->state = STATISTIC_STATE_RELEASED;
+	return 0;
+}
+
+static inline void statistic_reset_ptr(struct statistic *stat, void *ptr)
+{
+	struct statistic_discipline *disc = &statistic_discs[stat->type];
+	if (ptr)
+		disc->reset(stat, ptr);
+}
+
+static inline void statistic_move_ptr(struct statistic *stat, void *src)
+{
+	struct statistic_discipline *disc = &statistic_discs[stat->type];
+	unsigned long flags;
+	local_irq_save(flags);
+	disc->merge(stat, stat->pdata->ptrs[smp_processor_id()], src);
+	local_irq_restore(flags);
+}
+
+static inline void statistic_free_ptr(struct statistic *stat, void *ptr)
+{
+	struct statistic_discipline *disc = &statistic_discs[stat->type];
+	if (ptr) {
+		if (unlikely(disc->free))
+			disc->free(stat, ptr);
+		kfree(ptr);
+	}
+}
+
+static int statistic_free(struct statistic *stat, struct statistic_info *info)
+{
+	int cpu;
+	stat->state = STATISTIC_STATE_RELEASED;
+	if (unlikely(info->flags & STATISTIC_FLAGS_NOINCR)) {
+		statistic_free_ptr(stat, stat->pdata);
+		stat->pdata = NULL;
+		return 0;
+	}
+	for_each_cpu(cpu) {
+		statistic_free_ptr(stat, stat->pdata->ptrs[cpu]);
+		stat->pdata->ptrs[cpu] = NULL;
+	}
+	kfree(stat->pdata);
+	stat->pdata = NULL;
+	return 0;
+}
+
+static void * statistic_alloc_generic(struct statistic *stat, size_t size,
+				      gfp_t flags, int node)
+{
+	return kmalloc_node(size, flags, node);
+}
+
+static void * statistic_alloc_ptr(struct statistic *stat, gfp_t flags, int node)
+{
+	struct statistic_discipline *disc = &statistic_discs[stat->type];
+	void *buf = disc->alloc(stat, disc->size, flags, node);
+	if (likely(buf))
+		statistic_reset_ptr(stat, buf);
+	return buf;
+}
+
+static int statistic_alloc(struct statistic *stat,
+			   struct statistic_info *info)
+{
+	int cpu;
+	stat->age = sched_clock();
+	if (unlikely(info->flags & STATISTIC_FLAGS_NOINCR)) {
+		stat->pdata = statistic_alloc_ptr(stat, GFP_KERNEL, -1);
+		if (unlikely(!stat->pdata))
+			return -ENOMEM;
+		stat->state = STATISTIC_STATE_OFF;
+		return 0;
+	}
+	stat->pdata = kzalloc(sizeof(struct percpu_data), GFP_KERNEL);
+	if (unlikely(!stat->pdata))
+		return -ENOMEM;
+	for_each_online_cpu(cpu) {
+		stat->pdata->ptrs[cpu] = statistic_alloc_ptr(stat, GFP_KERNEL,
+							     cpu_to_node(cpu));
+		if (unlikely(!stat->pdata->ptrs[cpu])) {
+			statistic_free(stat, info);
+			return -ENOMEM;
+		}
+	}
+	stat->state = STATISTIC_STATE_OFF;
+	return 0;
+}
+
+static inline int statistic_start(struct statistic *stat)
+{
+	stat->started = sched_clock();
+	stat->state = STATISTIC_STATE_ON;
+	return 0;
+}
+
+static void _statistic_barrier(void *unused)
+{
+}
+
+static inline int statistic_stop(struct statistic *stat)
+{
+	stat->stopped = sched_clock();
+	stat->state = STATISTIC_STATE_OFF;
+	/* ensures that all CPUs have ceased updating statistics */
+	smp_mb();
+	on_each_cpu(_statistic_barrier, NULL, 0, 1);
+	return 0;
+}
+
+static int statistic_transition(struct statistic *stat,
+				struct statistic_info *info,
+				enum statistic_state requested_state)
+{
+	int z = (requested_state < stat->state ? 1 : 0);
+	int retval = -EINVAL;
+
+	while (stat->state != requested_state) {
+		switch (stat->state) {
+		case STATISTIC_STATE_INVALID:
+			retval = ( z ? -EINVAL : statistic_initialise(stat) );
+			break;
+		case STATISTIC_STATE_UNCONFIGURED:
+			retval = ( z ? statistic_uninitialise(stat)
+				     : statistic_define(stat) );
+			break;
+		case STATISTIC_STATE_RELEASED:
+			retval = ( z ? statistic_initialise(stat)
+				     : statistic_alloc(stat, info) );
+			break;
+		case STATISTIC_STATE_OFF:
+			retval = ( z ? statistic_free(stat, info)
+				     : statistic_start(stat) );
+			break;
+		case STATISTIC_STATE_ON:
+			retval = ( z ? statistic_stop(stat) : -EINVAL );
+			break;
+		}
+		if (unlikely(retval))
+			return retval;
+	}
+	return 0;
+}
+
+static int statistic_reset(struct statistic *stat, struct statistic_info *info)
+{
+	enum statistic_state prev_state = stat->state;
+	int cpu;
+
+	if (unlikely(stat->state < STATISTIC_STATE_OFF))
+		return 0;
+	statistic_transition(stat, info, STATISTIC_STATE_OFF);
+	if (unlikely(info->flags & STATISTIC_FLAGS_NOINCR))
+		statistic_reset_ptr(stat, stat->pdata);
+	else
+		for_each_cpu(cpu)
+			statistic_reset_ptr(stat, stat->pdata->ptrs[cpu]);
+	stat->age = sched_clock();
+	statistic_transition(stat, info, prev_state);
+	return 0;
+}
+
+static void statistic_merge(void *__mpriv)
+{
+	struct statistic_merge_private *mpriv = __mpriv;
+	struct statistic *stat = mpriv->stat;
+	struct statistic_discipline *disc = &statistic_discs[stat->type];
+	spin_lock(&mpriv->lock);
+	disc->merge(stat, mpriv->dst, stat->pdata->ptrs[smp_processor_id()]);
+	spin_unlock(&mpriv->lock);
+}
+
+/**
+ * statistic_set - set statistic using total numbers in (X, Y) data pair
+ * @stat: struct statistic array
+ * @i: index of statistic to be updated
+ * @value: X
+ * @total: Y
+ *
+ * The actual processing of the (X, Y) data pair is determined by the current
+ * definition applied to the statistic. See Documentation/statistics.txt.
+ *
+ * There is no distinction between a concurrency protected and unprotected
+ * statistic_set() flavour needed. statistic_set() may only
+ * be called when we pull statistic updates from exploiters. The statistics
+ * infrastructure guarantees serialisation for that. Exploiters must not
+ * intermix statistic_set() and statistic_add/inc() anyway. That is why,
+ * concurrent updates won't happen and there is no additional protection
+ * required for statistics fed through statistic_set().
+ */
+void statistic_set(struct statistic *stat, int i, s64 value, u64 total)
+{
+	struct statistic_discipline *disc = &statistic_discs[stat[i].type];
+	if (stat[i].state == STATISTIC_STATE_ON)
+		disc->set(&stat[i], value, total);
+}
+
+static struct sgrb_seg * sgrb_seg_find(struct list_head *lh, int size)
+{
+	struct sgrb_seg *seg;
+
+	/* only the last buffer, if any, may have spare bytes */
+	list_for_each_entry_reverse(seg, lh, list) {
+		if (likely((PAGE_SIZE - seg->offset) >= size))
+			return seg;
+		break;
+	}
+	seg = kzalloc(sizeof(struct sgrb_seg), GFP_KERNEL);
+	if (unlikely(!seg))
+		return NULL;
+	seg->size = PAGE_SIZE;
+	seg->address = (void*)__get_free_page(GFP_KERNEL);
+	if (unlikely(!seg->address)) {
+		kfree(seg);
+		return NULL;
+	}
+	list_add_tail(&seg->list, lh);
+	return seg;
+}
+
+static void sgrb_seg_release_all(struct list_head *lh)
+{
+	struct sgrb_seg *seg, *tmp;
+
+	list_for_each_entry_safe(seg, tmp, lh, list) {
+		list_del(&seg->list);
+		free_page((unsigned long)seg->address);
+		kfree(seg);
+	}
+}
+
+static char * statistic_state_strings[] = {
+	"undefined(BUG)",
+	"unconfigured",
+	"released",
+	"off",
+	"on",
+};
+
+static int statistic_fdef(struct statistic_interface *interface, int i,
+			  struct statistic_file_private *private)
+{
+	struct statistic *stat = &interface->stat[i];
+	struct statistic_info *info = &interface->info[i];
+	struct statistic_discipline *disc = &statistic_discs[stat->type];
+	struct sgrb_seg *seg;
+	char t0[TIMESTAMP_SIZE], t1[TIMESTAMP_SIZE], t2[TIMESTAMP_SIZE];
+
+	seg = sgrb_seg_find(&private->read_seg_lh, 512);
+	if (unlikely(!seg))
+		return -ENOMEM;
+
+	seg->offset += sprintf(seg->address + seg->offset,
+			       "name=%s state=%s units=%s/%s",
+			       info->name, statistic_state_strings[stat->state],
+			       info->x_unit, info->y_unit);
+	if (stat->state == STATISTIC_STATE_UNCONFIGURED) {
+		seg->offset += sprintf(seg->address + seg->offset, "\n");
+		return 0;
+	}
+
+	seg->offset += sprintf(seg->address + seg->offset, " type=%s",
+			       disc->name);
+	if (disc->fdef)
+		seg->offset += disc->fdef(stat, seg->address + seg->offset);
+	if (stat->state == STATISTIC_STATE_RELEASED) {
+		seg->offset += sprintf(seg->address + seg->offset, "\n");
+		return 0;
+	}
+
+	nsec_to_timestamp(t0, stat->age);
+	nsec_to_timestamp(t1, stat->started);
+	nsec_to_timestamp(t2, stat->stopped);
+	seg->offset += sprintf(seg->address + seg->offset,
+			       " data=%s started=%s stopped=%s\n", t0, t1, t2);
+	return 0;
+}
+
+static inline int statistic_fdata(struct statistic_interface *interface, int i,
+				  struct statistic_file_private *fpriv)
+{
+	struct statistic *stat = &interface->stat[i];
+	struct statistic_info *info = &interface->info[i];
+	struct statistic_discipline *disc = &statistic_discs[stat->type];
+	struct statistic_merge_private mpriv;
+	int retval;
+
+	if (unlikely(stat->state < STATISTIC_STATE_OFF))
+		return 0;
+	if (unlikely(info->flags & STATISTIC_FLAGS_NOINCR))
+		return disc->fdata(stat, info->name, fpriv, stat->pdata);
+	mpriv.dst = statistic_alloc_ptr(stat, GFP_KERNEL, -1);
+	if (unlikely(!mpriv.dst))
+		return -ENOMEM;
+	spin_lock_init(&mpriv.lock);
+	mpriv.stat = stat;
+	on_each_cpu(statistic_merge, &mpriv, 0, 1);
+	retval = disc->fdata(stat, info->name, fpriv, mpriv.dst);
+	statistic_free_ptr(stat, mpriv.dst);
+	return retval;
+}
+
+/* cpu hotplug handling for per-cpu data */
+
+static inline int _statistic_hotcpu(struct statistic_interface *interface,
+				    int i, unsigned long action, int cpu)
+{
+	struct statistic *stat = &interface->stat[i];
+	struct statistic_info *info = &interface->info[i];
+
+	if (unlikely(info->flags & STATISTIC_FLAGS_NOINCR))
+		return 0;
+	if (stat->state < STATISTIC_STATE_OFF)
+		return 0;
+	switch (action) {
+	case CPU_UP_PREPARE:
+		stat->pdata->ptrs[cpu] = statistic_alloc_ptr(stat, GFP_ATOMIC,
+							     cpu_to_node(cpu));
+		break;
+	case CPU_UP_CANCELED:
+	case CPU_DEAD:
+		statistic_move_ptr(stat, stat->pdata->ptrs[cpu]);
+		statistic_free_ptr(stat, stat->pdata->ptrs[cpu]);
+		stat->pdata->ptrs[cpu] = NULL;
+		break;
+	}
+	return 0;
+}
+
+static struct list_head statistic_list;
+static struct mutex statistic_list_mutex;
+
+static int __cpuinit statistic_hotcpu(struct notifier_block *notifier,
+				      unsigned long action, void *__cpu)
+{
+	int cpu = (unsigned long)__cpu, i;
+	struct statistic_interface *interface;
+
+	mutex_lock(&statistic_list_mutex);
+	list_for_each_entry(interface, &statistic_list, list)
+		for (i = 0; i < interface->number; i++)
+			_statistic_hotcpu(interface, i, action, cpu);
+	mutex_unlock(&statistic_list_mutex);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block statistic_hotcpu_notifier =
+{
+	.notifier_call = statistic_hotcpu,
+};
+
+/* module startup / removal */
+
+static struct dentry *statistic_root_dir;
+
+int __init statistic_init(void)
+{
+	statistic_root_dir = debugfs_create_dir(STATISTIC_ROOT_DIR, NULL);
+	if (unlikely(!statistic_root_dir))
+		return -ENOMEM;
+	INIT_LIST_HEAD(&statistic_list);
+	mutex_init(&statistic_list_mutex);
+	register_cpu_notifier(&statistic_hotcpu_notifier);
+	return 0;
+}
+
+void __exit statistic_exit(void)
+{
+	unregister_cpu_notifier(&statistic_hotcpu_notifier);
+	debugfs_remove(statistic_root_dir);
+}
+
+/* parser used for configuring statistics */
+
+static int statistic_parse_single(struct statistic *stat,
+				  struct statistic_info *info,
+				  char *def, int type)
+{
+	struct statistic_discipline *disc = &statistic_discs[type];
+	int prev_state = stat->state, retval = 0;
+	char *copy;
+
+	if (disc->parse) {
+		copy = kstrdup(def, GFP_KERNEL);
+		if (unlikely(!copy))
+			return -ENOMEM;
+		retval = disc->parse(stat, info, type, copy);
+		kfree(copy);
+	} else if (type != stat->type)
+		statistic_transition(stat, info, STATISTIC_STATE_UNCONFIGURED);
+	if (!retval) {
+		stat->type = type;
+		stat->add = disc->add;
+	}
+	statistic_transition(stat, info,
+			     max(prev_state, STATISTIC_STATE_RELEASED));
+	return retval;
+}
+
+static match_table_t statistic_match_type = {
+	{1, "type=%s"},
+	{9, NULL}
+};
+
+static int statistic_parse_match(struct statistic *stat,
+				 struct statistic_info *info, char *def)
+{
+	int type, len;
+	char *p, *copy, *twisted;
+	substring_t args[MAX_OPT_ARGS];
+	struct statistic_discipline *disc;
+
+	if (!def)
+		def = info->defaults;
+	twisted = copy = kstrdup(def, GFP_KERNEL);
+	if (unlikely(!copy))
+		return -ENOMEM;
+	while ((p = strsep(&twisted, " ")) != NULL) {
+		if (!*p)
+			continue;
+		if (match_token(p, statistic_match_type, args) != 1)
+			continue;
+		len = (args[0].to - args[0].from) + 1;
+		for (type = 0; type < STATISTIC_TYPE_NONE; type++) {
+			disc = &statistic_discs[type];
+			if (unlikely(strncmp(disc->name, args[0].from, len)))
+				continue;
+			kfree(copy);
+			return statistic_parse_single(stat, info, def, type);
+		}
+	}
+	kfree(copy);
+	if (unlikely(stat->type == STATISTIC_TYPE_NONE))
+		return -EINVAL;
+	return statistic_parse_single(stat, info, def, stat->type);
+}
+
+static match_table_t statistic_match_common = {
+	{STATISTIC_STATE_UNCONFIGURED, "state=unconfigured"},
+	{STATISTIC_STATE_RELEASED, "state=released"},
+	{STATISTIC_STATE_OFF, "state=off"},
+	{STATISTIC_STATE_ON, "state=on"},
+	{1001, "name=%s"},
+	{1002, "data=reset"},
+	{1003, "defaults"},
+	{9999, NULL}
+};
+
+static void statistic_parse_line(struct statistic_interface *interface,
+				 char *def)
+{
+	char *p, *copy, *twisted, *name = NULL;
+	substring_t args[MAX_OPT_ARGS];
+	int token, reset = 0, defaults = 0, i;
+	int state = STATISTIC_STATE_INVALID;
+	struct statistic *stat = interface->stat;
+	struct statistic_info *info = interface->info;
+
+	if (unlikely(!def))
+		return;
+	twisted = copy = kstrdup(def, GFP_KERNEL);
+	if (unlikely(!copy))
+		return;
+
+	while ((p = strsep(&twisted, " ")) != NULL) {
+		if (!*p)
+			continue;
+		token = match_token(p, statistic_match_common, args);
+		switch (token) {
+		case STATISTIC_STATE_UNCONFIGURED:
+		case STATISTIC_STATE_RELEASED:
+		case STATISTIC_STATE_OFF:
+		case STATISTIC_STATE_ON:
+			state = token;
+			break;
+		case 1001:
+			if (likely(!name))
+				name = match_strdup(&args[0]);
+			break;
+		case 1002:
+			reset = 1;
+			break;
+		case 1003:
+			defaults = 1;
+			break;
+		}
+	}
+	for (i = 0; i < interface->number; i++, stat++, info++) {
+		if (!name || (name && !strcmp(name, info->name))) {
+			if (defaults)
+				statistic_parse_match(stat, info, NULL);
+			if (name)
+				statistic_parse_match(stat, info, def);
+			if (state != STATISTIC_STATE_INVALID)
+				statistic_transition(stat, info, state);
+			if (reset)
+				statistic_reset(stat, info);
+		}
+	}
+	kfree(copy);
+	kfree(name);
+}
+
+static void statistic_parse(struct statistic_interface *interface,
+			    struct list_head *line_lh, size_t line_size)
+{
+	struct sgrb_seg *seg, *tmp;
+	char *buf;
+	int offset = 0;
+
+	if (unlikely(!line_size))
+		return;
+	buf = kmalloc(line_size + 2, GFP_KERNEL);
+	if (unlikely(!buf))
+		return;
+	buf[line_size] = ' ';
+	buf[line_size + 1] = '\0';
+	list_for_each_entry_safe(seg, tmp, line_lh, list) {
+		memcpy(buf + offset, seg->address, seg->size);
+		offset += seg->size;
+		list_del(&seg->list);
+		kfree(seg);
+	}
+	statistic_parse_line(interface, buf);
+	kfree(buf);
+}
+
+/* sequential files comprising user interface */
+
+static int statistic_generic_open(struct inode *inode,
+		struct file *file, struct statistic_interface **interface,
+		struct statistic_file_private **private)
+{
+	*interface = inode->u.generic_ip;
+	BUG_ON(!interface);
+	*private = kzalloc(sizeof(struct statistic_file_private), GFP_KERNEL);
+	if (unlikely(!*private))
+		return -ENOMEM;
+	INIT_LIST_HEAD(&(*private)->read_seg_lh);
+	INIT_LIST_HEAD(&(*private)->write_seg_lh);
+	file->private_data = *private;
+	return 0;
+}
+
+static int statistic_generic_close(struct inode *inode, struct file *file)
+{
+	struct statistic_file_private *private = file->private_data;
+	BUG_ON(!private);
+	sgrb_seg_release_all(&private->read_seg_lh);
+	sgrb_seg_release_all(&private->write_seg_lh);
+	kfree(private);
+	return 0;
+}
+
+static ssize_t statistic_generic_read(struct file *file,
+				char __user *buf, size_t len, loff_t *offset)
+{
+	struct statistic_file_private *private = file->private_data;
+	struct sgrb_seg *seg;
+	size_t seg_offset, seg_residual, seg_transfer;
+	size_t transfered = 0;
+	loff_t pos = 0;
+
+	BUG_ON(!private);
+	list_for_each_entry(seg, &private->read_seg_lh, list) {
+		if (unlikely(!len))
+			break;
+		if (*offset >= pos && *offset <= (pos + seg->offset)) {
+			seg_offset = *offset - pos;
+			seg_residual = seg->offset - seg_offset;
+			seg_transfer = min(len, seg_residual);
+			if (unlikely(copy_to_user(buf + transfered,
+						  seg->address + seg_offset,
+						  seg_transfer)))
+				return -EFAULT;
+			transfered += seg_transfer;
+			*offset += seg_transfer;
+			pos += seg_transfer + seg_offset;
+			len -= seg_transfer;
+		} else
+			pos += seg->offset;
+	}
+	return transfered;
+}
+
+static ssize_t statistic_generic_write(struct file *file,
+			const char __user *buf, size_t len, loff_t *offset)
+{
+	struct statistic_file_private *private = file->private_data;
+	struct sgrb_seg *seg;
+	size_t seg_residual, seg_transfer;
+	size_t transfered = 0;
+
+	BUG_ON(!private);
+	if (unlikely(*offset != private->write_seg_total_size))
+		return -EPIPE;
+	while (len) {
+		seg = sgrb_seg_find(&private->write_seg_lh, 1);
+		if (unlikely(!seg))
+			return -ENOMEM;
+		seg_residual = seg->size - seg->offset;
+		seg_transfer = min(len, seg_residual);
+		if (unlikely(copy_from_user(seg->address + seg->offset,
+					    buf + transfered, seg_transfer)))
+			return -EFAULT;
+		private->write_seg_total_size += seg_transfer;
+		seg->offset += seg_transfer;
+		transfered += seg_transfer;
+		*offset += seg_transfer;
+		len -= seg_transfer;
+	}
+	return transfered;
+}
+
+static int statistic_def_close(struct inode *inode, struct file *file)
+{
+	struct statistic_interface *interface = inode->u.generic_ip;
+	struct statistic_file_private *private = file->private_data;
+	struct sgrb_seg *seg, *seg_nl;
+	int offset;
+	struct list_head line_lh;
+	char *nl;
+	size_t line_size = 0;
+
+	INIT_LIST_HEAD(&line_lh);
+	list_for_each_entry(seg, &private->write_seg_lh, list) {
+		for (offset = 0; offset < seg->offset; offset += seg_nl->size) {
+			seg_nl = kmalloc(sizeof(struct sgrb_seg), GFP_KERNEL);
+			if (unlikely(!seg_nl))
+				/*
+				 * FIXME:
+				 * Should we omit other new settings because we
+				 * could not process this line of definitions?
+				 */
+				continue;
+			seg_nl->address = seg->address + offset;
+			nl = strnchr(seg_nl->address,
+				     seg->offset - offset, '\n');
+			if (nl) {
+				seg_nl->offset = nl - seg_nl->address;
+				if (seg_nl->offset)
+					seg_nl->offset--;
+			} else
+				seg_nl->offset = seg->offset - offset;
+			seg_nl->size = seg_nl->offset + 1;
+			line_size += seg_nl->size;
+			list_add_tail(&seg_nl->list, &line_lh);
+			if (nl) {
+				statistic_parse(interface, &line_lh, line_size);
+				line_size = 0;
+			}
+		}
+	}
+	if (!list_empty(&line_lh))
+		statistic_parse(interface, &line_lh, line_size);
+	return statistic_generic_close(inode, file);
+}
+
+static int statistic_def_open(struct inode *inode, struct file *file)
+{
+	struct statistic_interface *interface;
+	struct statistic_file_private *private;
+	int retval = 0;
+	int i;
+
+	retval = statistic_generic_open(inode, file, &interface, &private);
+	if (unlikely(retval))
+		return retval;
+	for (i = 0; i < interface->number; i++) {
+		retval = statistic_fdef(interface, i, private);
+		if (unlikely(retval)) {
+			statistic_def_close(inode, file);
+			break;
+		}
+	}
+	return retval;
+}
+
+static int statistic_data_open(struct inode *inode, struct file *file)
+{
+	struct statistic_interface *interface;
+	struct statistic_file_private *private;
+	int retval = 0;
+	int i;
+
+	retval = statistic_generic_open(inode, file, &interface, &private);
+	if (unlikely(retval))
+		return retval;
+	if (interface->pull)
+		interface->pull(interface->pull_private);
+	for (i = 0; i < interface->number; i++) {
+		retval = statistic_fdata(interface, i, private);
+		if (unlikely(retval)) {
+			statistic_generic_close(inode, file);
+			break;
+		}
+	}
+	return retval;
+}
+
+static struct file_operations statistic_def_fops = {
+	.owner		= THIS_MODULE,
+	.read		= statistic_generic_read,
+	.write		= statistic_generic_write,
+	.open		= statistic_def_open,
+	.release	= statistic_def_close,
+};
+
+static struct file_operations statistic_data_fops = {
+	.owner		= THIS_MODULE,
+	.read		= statistic_generic_read,
+	.open		= statistic_data_open,
+	.release	= statistic_generic_close,
+};
+
+/**
+ * statistic_create - setup statistics and create debugfs files
+ * @interface: struct statistic_interface provided by exploiter
+ * @name: name of debugfs directory to be created
+ *
+ * Creates a debugfs directory in "statistics" as well as the "data" and
+ * "definition" files. Then we attach setup statistics according to the
+ * definition provided by exploiter through struct statistic_interface.
+ *
+ * struct statistic_interface must have been set up prior to calling this.
+ *
+ * On success, 0 is returned.
+ *
+ * If some required memory could not be allocated, or the creation
+ * of debugfs entries failed, this routine fails, and -ENOMEM is returned.
+ */
+int statistic_create(struct statistic_interface *interface, const char *name)
+{
+	struct statistic *stat = interface->stat;
+	struct statistic_info *info = interface->info;
+	int i;
+
+	BUG_ON(!stat || !info || !interface->number);
+
+	interface->debugfs_dir =
+		debugfs_create_dir(name, statistic_root_dir);
+	if (unlikely(!interface->debugfs_dir))
+		return -ENOMEM;
+
+	interface->data_file = debugfs_create_file(
+		STATISTIC_FILENAME_DATA, S_IFREG | S_IRUSR,
+		interface->debugfs_dir, (void*)interface, &statistic_data_fops);
+	if (unlikely(!interface->data_file)) {
+		debugfs_remove(interface->debugfs_dir);
+		return -ENOMEM;
+	}
+
+	interface->def_file = debugfs_create_file(
+		STATISTIC_FILENAME_DEF, S_IFREG | S_IRUSR | S_IWUSR,
+		interface->debugfs_dir, (void*)interface, &statistic_def_fops);
+	if (unlikely(!interface->def_file)) {
+		debugfs_remove(interface->data_file);
+		debugfs_remove(interface->debugfs_dir);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < interface->number; i++, stat++, info++) {
+		statistic_transition(stat, info, STATISTIC_STATE_UNCONFIGURED);
+		statistic_parse_match(stat, info, NULL);
+	}
+
+	mutex_lock(&statistic_list_mutex);
+	list_add(&interface->list, &statistic_list);
+	mutex_unlock(&statistic_list_mutex);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(statistic_create);
+
+/**
+ * statistic_remove - remove unused statistics
+ * @interface: struct statistic_interface to clean up
+ *
+ * Remove a debugfs directory in "statistics" along with its "data" and
+ * "definition" files. Removing this user interface also causes the removal
+ * of all statistics attached to the interface.
+ *
+ * The exploiter must have ceased reporting statistic data.
+ *
+ * Returns -EINVAL for attempted double removal, 0 otherwise.
+ */
+int statistic_remove(struct statistic_interface *interface)
+{
+	struct statistic *stat = interface->stat;
+	struct statistic_info *info = interface->info;
+	int i;
+
+	if (unlikely(!interface->debugfs_dir))
+		return -EINVAL;
+	mutex_lock(&statistic_list_mutex);
+	list_del(&interface->list);
+	mutex_unlock(&statistic_list_mutex);
+	for (i = 0; i < interface->number; i++, stat++, info++)
+		statistic_transition(stat, info, STATISTIC_STATE_INVALID);
+	debugfs_remove(interface->data_file);
+	debugfs_remove(interface->def_file);
+	debugfs_remove(interface->debugfs_dir);
+	interface->debugfs_dir = NULL;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(statistic_remove);
+
+/* code concerned with single value statistics */
+
+static void statistic_reset_counter(struct statistic *stat, void *ptr)
+{
+	*(u64*)ptr = 0;
+}
+
+static void statistic_add_counter_inc(struct statistic *stat, int cpu,
+				     s64 value, u64 incr)
+{
+	*(u64*)stat->pdata->ptrs[cpu] += incr;
+}
+
+static void statistic_add_counter_prod(struct statistic *stat, int cpu,
+				       s64 value, u64 incr)
+{
+	if (unlikely(value < 0))
+		value = -value;
+	*(u64*)stat->pdata->ptrs[cpu] += value * incr;
+}
+
+static void statistic_set_counter_inc(struct statistic *stat,
+				      s64 value, u64 total)
+{
+	*(u64*)stat->pdata = total;
+}
+
+static void statistic_set_counter_prod(struct statistic *stat,
+				       s64 value, u64 total)
+{
+	if (unlikely(value < 0))
+		value = -value;
+	*(u64*)stat->pdata = value * total;
+}
+
+static void statistic_merge_counter(struct statistic *stat,
+				    void *dst, void *src)
+{
+	*(u64*)dst += *(u64*)src;
+}
+
+static int statistic_fdata_counter(struct statistic *stat, const char *name,
+				   struct statistic_file_private *fpriv,
+				   void *data)
+{
+	struct sgrb_seg *seg;
+	seg = sgrb_seg_find(&fpriv->read_seg_lh, 128);
+	if (unlikely(!seg))
+		return -ENOMEM;
+	seg->offset += sprintf(seg->address + seg->offset, "%s %Lu\n",
+			       name, *(unsigned long long *)data);
+	return 0;
+}
+
+/* code concerned with utilisation indicator statistic */
+
+static void statistic_reset_util(struct statistic *stat, void *ptr)
+{
+	struct statistic_entry_util *util = ptr;
+	util->num = 0;
+	util->acc = 0;
+	util->min = (~0ULL >> 1) - 1;
+	util->max = -(~0ULL >> 1) + 1;
+}
+
+static void statistic_add_util(struct statistic *stat, int cpu,
+			       s64 value, u64 incr)
+{
+	struct statistic_entry_util *util = stat->pdata->ptrs[cpu];
+	util->num += incr;
+	util->acc += value * incr;
+	if (unlikely(value < util->min))
+		util->min = value;
+	if (unlikely(value > util->max))
+		util->max = value;
+}
+
+static void statistic_set_util(struct statistic *stat, s64 value, u64 total)
+{
+	struct statistic_entry_util *util;
+	util = (struct statistic_entry_util *) stat->pdata;
+	util->num = total;
+	util->acc = value * total;
+	if (unlikely(value < util->min))
+		util->min = value;
+	if (unlikely(value > util->max))
+		util->max = value;
+}
+
+static void statistic_merge_util(struct statistic *stat, void *_dst, void *_src)
+{
+	struct statistic_entry_util *dst = _dst, *src = _src;
+	dst->num += src->num;
+	dst->acc += src->acc;
+	if (unlikely(src->min < dst->min))
+		dst->min = src->min;
+	if (unlikely(src->max > dst->max))
+		dst->max = src->max;
+}
+
+static int statistic_fdata_util(struct statistic *stat, const char *name,
+				struct statistic_file_private *fpriv,
+				void *data)
+{
+	struct sgrb_seg *seg;
+	struct statistic_entry_util *util = data;
+	unsigned long long whole = 0;
+	signed long long min = 0, max = 0, decimal = 0, last_digit;
+
+	seg = sgrb_seg_find(&fpriv->read_seg_lh, 128);
+	if (unlikely(!seg))
+		return -ENOMEM;
+	if (likely(util->num)) {
+		whole = util->acc;
+		do_div(whole, util->num);
+		decimal = util->acc * 10000;
+		do_div(decimal, util->num);
+		decimal -= whole * 10000;
+		if (decimal < 0)
+			decimal = -decimal;
+		last_digit = decimal;
+		do_div(last_digit, 10);
+		last_digit = decimal - last_digit * 10;
+		if (last_digit >= 5)
+			decimal += 10;
+		do_div(decimal, 10);
+		min = util->min;
+		max = util->max;
+	}
+	seg->offset += sprintf(seg->address + seg->offset,
+			       "%s %Lu %Ld %Ld.%03lld %Ld\n", name,
+			       (unsigned long long)util->num,
+			       (signed long long)min, whole, decimal,
+			       (signed long long)max);
+	return 0;
+}
+
+/* code concerned with histogram statistics */
+
+static void * statistic_alloc_histogram(struct statistic *stat, size_t size,
+					gfp_t flags, int node)
+{
+	return kmalloc_node(size * (stat->u.histogram.last_index + 1),
+			    flags, node);
+}
+
+static inline s64 statistic_histogram_calc_value_lin(struct statistic *stat,
+						     int i)
+{
+	return stat->u.histogram.range_min +
+		stat->u.histogram.base_interval * i;
+}
+
+static inline s64 statistic_histogram_calc_value_log2(struct statistic *stat,
+						      int i)
+{
+	return stat->u.histogram.range_min +
+		(i ? (stat->u.histogram.base_interval << (i - 1)) : 0);
+}
+
+static inline s64 statistic_histogram_calc_value(struct statistic *stat, int i)
+{
+	if (stat->type == STATISTIC_TYPE_HISTOGRAM_LIN)
+		return statistic_histogram_calc_value_lin(stat, i);
+	else
+		return statistic_histogram_calc_value_log2(stat, i);
+}
+
+static inline int statistic_histogram_calc_index_lin(struct statistic *stat,
+						 s64 value)
+{
+	unsigned long long i = value - stat->u.histogram.range_min;
+	do_div(i, stat->u.histogram.base_interval);
+	return i;
+}
+
+static inline int statistic_histogram_calc_index_log2(struct statistic *stat,
+						      s64 value)
+{
+	unsigned long long i;
+	for (i = 0;
+	     i < stat->u.histogram.last_index &&
+	     value > statistic_histogram_calc_value_log2(stat, i);
+	     i++);
+	return i;
+}
+
+static inline int statistic_histogram_calc_index(struct statistic *stat,
+						 s64 value)
+{
+	if (stat->type == STATISTIC_TYPE_HISTOGRAM_LIN)
+		return statistic_histogram_calc_index_lin(stat, value);
+	else
+		return statistic_histogram_calc_index_log2(stat, value);
+}
+
+static void statistic_reset_histogram(struct statistic *stat, void *ptr)
+{
+	memset(ptr, 0, (stat->u.histogram.last_index + 1) * sizeof(u64));
+}
+
+static void statistic_add_histogram_lin(struct statistic *stat, int cpu,
+					s64 value, u64 incr)
+{
+	int i = statistic_histogram_calc_index_lin(stat, value);
+	((u64*)stat->pdata->ptrs[cpu])[i] += incr;
+}
+
+static void statistic_add_histogram_log2(struct statistic *stat, int cpu,
+					 s64 value, u64 incr)
+{
+	int i = statistic_histogram_calc_index_log2(stat, value);
+	((u64*)stat->pdata->ptrs[cpu])[i] += incr;
+}
+
+static void statistic_set_histogram_lin(struct statistic *stat,
+					s64 value, u64 total)
+{
+	int i = statistic_histogram_calc_index_lin(stat, value);
+	((u64*)stat->pdata)[i] = total;
+}
+
+static void statistic_set_histogram_log2(struct statistic *stat,
+					 s64 value, u64 total)
+{
+	int i = statistic_histogram_calc_index_log2(stat, value);
+	((u64*)stat->pdata)[i] = total;
+}
+
+static void statistic_merge_histogram(struct statistic *stat,
+				      void *_dst, void *_src)
+{
+	u64 *dst = _dst, *src = _src;
+	int i;
+	for (i = 0; i <= stat->u.histogram.last_index; i++)
+		dst[i] += src[i];
+}
+
+static inline int statistic_fdata_histogram_line(const char *name,
+					struct statistic_file_private *private,
+					const char *prefix, s64 bound, u64 hits)
+{
+	struct sgrb_seg *seg;
+	seg = sgrb_seg_find(&private->read_seg_lh, 256);
+	if (unlikely(!seg))
+		return -ENOMEM;
+	seg->offset += sprintf(seg->address + seg->offset, "%s %s%Ld %Lu\n",
+			       name, prefix, (signed long long)bound,
+			       (unsigned long long)hits);
+	return 0;
+}
+
+static int statistic_fdata_histogram(struct statistic *stat, const char *name,
+				     struct statistic_file_private *fpriv,
+				     void *data)
+{
+	int i, retval;
+	s64 bound = 0;
+	for (i = 0; i < (stat->u.histogram.last_index); i++) {
+		bound = statistic_histogram_calc_value(stat, i);
+		retval = statistic_fdata_histogram_line(name, fpriv, "<=",
+							bound, ((u64*)data)[i]);
+		if (unlikely(retval))
+			return retval;
+	}
+	return statistic_fdata_histogram_line(name, fpriv, ">",
+					      bound, ((u64*)data)[i]);
+}
+
+static int statistic_fdef_histogram(struct statistic *stat, char *line)
+{
+	return sprintf(line, " range_min=%Li entries=%Li base_interval=%Lu",
+		       (signed long long)stat->u.histogram.range_min,
+		       (unsigned long long)(stat->u.histogram.last_index + 1),
+		       (unsigned long long)stat->u.histogram.base_interval);
+}
+
+static match_table_t statistic_match_histogram = {
+	{1, "entries=%u"},
+	{2, "base_interval=%s"},
+	{3, "range_min=%s"},
+	{9, NULL}
+};
+
+static int statistic_parse_histogram(struct statistic *stat,
+				     struct statistic_info *info,
+				     int type, char *def)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+	int token, got_entries = 0, got_interval = 0, got_range = 0;
+	u32 entries, base_interval;
+	s64 range_min;
+
+	while ((p = strsep(&def, " ")) != NULL) {
+		if (!*p)
+			continue;
+		token = match_token(p, statistic_match_histogram, args);
+		switch (token) {
+		case 1:
+			match_int(&args[0], &entries);
+			got_entries = 1;
+			break;
+		case 2:
+			match_int(&args[0], &base_interval);
+			got_interval = 1;
+			break;
+		case 3:
+			match_s64(&args[0], &range_min, 0);
+			got_range = 1;
+			break;
+		}
+	}
+	if (unlikely(type != stat->type &&
+		     !(got_entries && got_interval && got_range)))
+		return -EINVAL;
+	statistic_transition(stat, info, STATISTIC_STATE_UNCONFIGURED);
+	if (got_entries)
+		stat->u.histogram.last_index = entries - 1;
+	if (got_interval)
+		stat->u.histogram.base_interval = base_interval;
+	if (got_range)
+		stat->u.histogram.range_min = range_min;
+	return 0;
+}
+
+/* code concerned with histograms (discrete value) statistics */
+
+static void * statistic_alloc_sparse(struct statistic *stat, size_t size,
+				     gfp_t flags, int node)
+{
+	struct statistic_sparse_list *slist = kmalloc_node(size, flags, node);
+	INIT_LIST_HEAD(&slist->entry_lh);
+	slist->entries_max = stat->u.sparse.entries_max;
+	return slist;
+}
+
+static void statistic_free_sparse(struct statistic *stat, void *ptr)
+{
+	struct statistic_entry_sparse *entry, *tmp;
+	struct statistic_sparse_list *slist = ptr;
+	list_for_each_entry_safe(entry, tmp, &slist->entry_lh, list) {
+		list_del(&entry->list);
+		kfree(entry);
+	}
+	slist->hits_missed = 0;
+	slist->entries = 0;
+}
+
+static inline void statistic_add_sparse_sort(struct list_head *head,
+					struct statistic_entry_sparse *entry)
+{
+	struct statistic_entry_sparse *sort =
+		list_prepare_entry(entry, head, list);
+
+	list_for_each_entry_continue_reverse(sort, head, list)
+		if (likely(sort->hits >= entry->hits))
+			break;
+	if (unlikely(sort->list.next != &entry->list &&
+		     (&sort->list == head || sort->hits >= entry->hits)))
+		list_move(&entry->list, &sort->list);
+}
+
+static inline int statistic_add_sparse_new(struct statistic_sparse_list *slist,
+					   s64 value, u64 incr)
+{
+	struct statistic_entry_sparse *entry;
+
+	if (unlikely(slist->entries == slist->entries_max))
+		return -ENOMEM;
+	entry = kmalloc(sizeof(struct statistic_entry_sparse), GFP_ATOMIC);
+	if (unlikely(!entry))
+		return -ENOMEM;
+	entry->value = value;
+	entry->hits = incr;
+	slist->entries++;
+	list_add_tail(&entry->list, &slist->entry_lh);
+	return 0;
+}
+
+static inline void _statistic_add_sparse(struct statistic_sparse_list *slist,
+					 s64 value, u64 incr)
+{
+	struct list_head *head = &slist->entry_lh;
+	struct statistic_entry_sparse *entry;
+
+	list_for_each_entry(entry, head, list) {
+		if (likely(entry->value == value)) {
+			entry->hits += incr;
+			statistic_add_sparse_sort(head, entry);
+			return;
+		}
+	}
+	if (unlikely(statistic_add_sparse_new(slist, value, incr)))
+		slist->hits_missed += incr;
+}
+
+static void statistic_add_sparse(struct statistic *stat, int cpu,
+				 s64 value, u64 incr)
+{
+	struct statistic_sparse_list *slist = stat->pdata->ptrs[cpu];
+	_statistic_add_sparse(slist, value, incr);
+}
+
+static void statistic_set_sparse(struct statistic *stat, s64 value, u64 total)
+{
+	struct statistic_sparse_list *slist = (struct statistic_sparse_list *)
+						stat->pdata;
+	struct list_head *head = &slist->entry_lh;
+	struct statistic_entry_sparse *entry;
+
+	list_for_each_entry(entry, head, list) {
+		if (likely(entry->value == value)) {
+			entry->hits = total;
+			statistic_add_sparse_sort(head, entry);
+			return;
+		}
+	}
+	if (unlikely(statistic_add_sparse_new(slist, value, total)))
+		slist->hits_missed += total;
+}
+
+static void statistic_merge_sparse(struct statistic *stat,
+				   void *_dst, void *_src)
+{
+	struct statistic_sparse_list *dst = _dst, *src = _src;
+	struct statistic_entry_sparse *entry;
+	dst->hits_missed += src->hits_missed;
+	list_for_each_entry(entry, &src->entry_lh, list)
+		_statistic_add_sparse(dst, entry->value, entry->hits);
+}
+
+static int statistic_fdata_sparse(struct statistic *stat, const char *name,
+				  struct statistic_file_private *fpriv,
+				  void *data)
+{
+	struct sgrb_seg *seg;
+	struct statistic_sparse_list *slist = data;
+	struct statistic_entry_sparse *entry;
+
+	seg = sgrb_seg_find(&fpriv->read_seg_lh, 256);
+	if (unlikely(!seg))
+		return -ENOMEM;
+	seg->offset += sprintf(seg->address + seg->offset, "%s missed 0x%Lu\n",
+			       name, (unsigned long long)slist->hits_missed);
+	list_for_each_entry(entry, &slist->entry_lh, list) {
+		seg = sgrb_seg_find(&fpriv->read_seg_lh, 256);
+		if (unlikely(!seg))
+			return -ENOMEM;
+		seg->offset += sprintf(seg->address + seg->offset,
+				       "%s 0x%Lx %Lu\n", name,
+				       (signed long long)entry->value,
+				       (unsigned long long)entry->hits);
+	}
+	return 0;
+}
+
+static int statistic_fdef_sparse(struct statistic *stat, char *line)
+{
+	return sprintf(line, " entries=%u", stat->u.sparse.entries_max);
+}
+
+static match_table_t statistic_match_sparse = {
+	{1, "entries=%u"},
+	{9, NULL}
+};
+
+static int statistic_parse_sparse(struct statistic *stat,
+				  struct statistic_info *info,
+				  int type, char *def)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+
+	while ((p = strsep(&def, " ")) != NULL) {
+		if (!*p)
+			continue;
+		if (match_token(p, statistic_match_sparse, args) == 1) {
+			statistic_transition(stat, info,
+					     STATISTIC_STATE_UNCONFIGURED);
+			match_int(&args[0], &stat->u.sparse.entries_max);
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+/* code mostly concerned with managing statistics */
+
+static struct statistic_discipline statistic_discs[] = {
+	{ /* STATISTIC_TYPE_COUNTER_INC */
+	  NULL,
+	  statistic_alloc_generic,
+	  NULL,
+	  statistic_reset_counter,
+	  statistic_merge_counter,
+	  statistic_fdata_counter,
+	  NULL,
+	  statistic_add_counter_inc,
+	  statistic_set_counter_inc,
+	  "counter_inc", sizeof(u64)
+	},
+	{ /* STATISTIC_TYPE_COUNTER_PROD */
+	  NULL,
+	  statistic_alloc_generic,
+	  NULL,
+	  statistic_reset_counter,
+	  statistic_merge_counter,
+	  statistic_fdata_counter,
+	  NULL,
+	  statistic_add_counter_prod,
+	  statistic_set_counter_prod,
+	  "counter_prod", sizeof(u64)
+	},
+	{ /* STATISTIC_TYPE_UTIL */
+	  NULL,
+	  statistic_alloc_generic,
+	  NULL,
+	  statistic_reset_util,
+	  statistic_merge_util,
+	  statistic_fdata_util,
+	  NULL,
+	  statistic_add_util,
+	  statistic_set_util,
+	  "utilisation", sizeof(struct statistic_entry_util)
+	},
+	{ /* STATISTIC_TYPE_HISTOGRAM_LIN */
+	  statistic_parse_histogram,
+	  statistic_alloc_histogram,
+	  NULL,
+	  statistic_reset_histogram,
+	  statistic_merge_histogram,
+	  statistic_fdata_histogram,
+	  statistic_fdef_histogram,
+	  statistic_add_histogram_lin,
+	  statistic_set_histogram_lin,
+	  "histogram_lin", sizeof(u64)
+	},
+	{ /* STATISTIC_TYPE_HISTOGRAM_LOG2 */
+	  statistic_parse_histogram,
+	  statistic_alloc_histogram,
+	  NULL,
+	  statistic_reset_histogram,
+	  statistic_merge_histogram,
+	  statistic_fdata_histogram,
+	  statistic_fdef_histogram,
+	  statistic_add_histogram_log2,
+	  statistic_set_histogram_log2,
+	  "histogram_log2", sizeof(u64)
+	},
+	{ /* STATISTIC_TYPE_SPARSE */
+	  statistic_parse_sparse,
+	  statistic_alloc_sparse,
+	  statistic_free_sparse,
+	  statistic_free_sparse,	/* reset equals free */
+	  statistic_merge_sparse,
+	  statistic_fdata_sparse,
+	  statistic_fdef_sparse,
+	  statistic_add_sparse,
+	  statistic_set_sparse,
+	  "sparse", sizeof(struct statistic_sparse_list)
+	},
+	{ /* STATISTIC_TYPE_NONE */ }
+};
+
+postcore_initcall(statistic_init);
+module_exit(statistic_exit);
+
+MODULE_LICENSE("GPL");
diff -Nurp a/lib/Makefile b/lib/Makefile
--- a/lib/Makefile	2006-05-19 15:44:27.000000000 +0200
+++ b/lib/Makefile	2006-05-19 16:02:23.000000000 +0200
@@ -47,6 +47,8 @@ obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
 obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
 obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
 
+obj-$(CONFIG_STATISTICS) += statistic.o
+
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
 
 hostprogs-y	:= gen_crc32table
diff -Nurp a/arch/s390/Kconfig b/arch/s390/Kconfig
--- a/arch/s390/Kconfig	2006-05-19 15:44:22.000000000 +0200
+++ b/arch/s390/Kconfig	2006-05-19 16:02:23.000000000 +0200
@@ -474,8 +474,14 @@ source "drivers/net/Kconfig"
 
 source "fs/Kconfig"
 
+menu "Instrumentation Support"
+
 source "arch/s390/oprofile/Kconfig"
 
+source "lib/Kconfig.statistic"
+
+endmenu
+
 source "arch/s390/Kconfig.debug"
 
 source "security/Kconfig"
diff -Nurp a/lib/Kconfig.statistic b/lib/Kconfig.statistic
--- a/lib/Kconfig.statistic	1970-01-01 01:00:00.000000000 +0100
+++ b/lib/Kconfig.statistic	2006-05-19 16:02:23.000000000 +0200
@@ -0,0 +1,11 @@
+config STATISTICS
+	bool "Statistics infrastructure"
+	depends on DEBUG_FS
+	help
+	  The statistics infrastructure provides a debug-fs based user interface
+	  for statistics of kernel components, that is, usually device drivers.
+	  Statistics are available for components that have been instrumented to
+	  feed data into the statistics infrastructure.
+	  This feature is useful for performance measurements or performance
+	  debugging.
+	  If in doubt, say "N".
diff -Nurp a/arch/s390/oprofile/Kconfig b/arch/s390/oprofile/Kconfig
--- a/arch/s390/oprofile/Kconfig	2006-03-20 06:53:29.000000000 +0100
+++ b/arch/s390/oprofile/Kconfig	2006-05-19 16:02:23.000000000 +0200
@@ -1,6 +1,3 @@
-
-menu "Profiling support"
-
 config PROFILING
 	bool "Profiling support"
 	help
@@ -18,5 +15,3 @@ config OPROFILE
 
 	  If unsure, say N.
 
-endmenu
-
diff -Nurp a/MAINTAINERS b/MAINTAINERS
--- a/MAINTAINERS	2006-05-19 15:44:32.000000000 +0200
+++ b/MAINTAINERS	2006-05-19 16:02:23.000000000 +0200
@@ -2608,6 +2608,13 @@ STARMODE RADIO IP (STRIP) PROTOCOL DRIVE
 W:	http://mosquitonet.Stanford.EDU/strip.html
 S:	Unsupported ?
 
+STATISTICS INFRASTRUCTURE
+P:	Martin Peschke
+M:	mpeschke@de.ibm.com
+M:	linux390@de.ibm.com
+W:	http://www.ibm.com/developerworks/linux/linux390/
+S:	Supported
+
 STRADIS MPEG-2 DECODER DRIVER
 P:	Nathan Laredo
 M:	laredo@gnu.org



^ permalink raw reply	[flat|nested] 16+ messages in thread
* Re: [patch 5/6] statistics infrastructure
@ 2005-12-16 12:27 Martin Peschke
  0 siblings, 0 replies; 16+ messages in thread
From: Martin Peschke @ 2005-12-16 12:27 UTC (permalink / raw)
  To: ak; +Cc: akpm, linux-kernel

..oops, should have made sure that my mailer does line breaks 
appropriately. Getting it right this time, sorry ...




Andi Kleen wrote:

 > Locks and indirect function calls?
 > It seems very wrong to me to make such heavy weight statistic
 > functions. Most likely you will disturb the performance whatever is
 > being counted badly.


Well, that's a tradeoff between flexibility/function and performance.

I don't have any reliable numbers ready at hand. At least, doing I/O to 
my SCSI devices with enabled statistics didn't feel bad.

Here is the rational for both the indirect function call and the lock:

If a statistic is disabled (that's the default) neither is locking done 
nor is a function called indirectly. So far so good, I would say.

If a statistic is enabled the lock for this entity is grabbed and one 
indirect function call is done. Anything else is inlined. I use granular 
per-interface (per-entity, for example per-LUN or per-HBA) locking.

The indirect function call allows customization of the ways data 
processing is done for particular statistics.

For example, one could deflate a histogram of latencies into a counter 
providing the total of latency measurements, that is the total of 
requests observed; or inflate a statistic the other way around if 
required. Another example: one can make a statistic gather data for 
recurring periods of time, like megabytes per seconds instead of just 
the total amount of bytes transferred, or like queue utilization per 
whatever-unit-of-time instead of just an overall utilization.

A statistic that feeds on request sizes can be setup to provide the 
following "views":
- number of requests observed (counter)
- number of requests per unit of time (history based on a counter)
- number of bytes transfered (counter)
- number of bytes transfered per unit of time = transfer rate (history 
based on a counter)
- traffic pattern (histogram for descrete request sizes or for ranges of 
request sizes)
- raw measurement data gathered
- etc.

As a device driver programmer I might pick a "view" a user is interested 
in. My pick might miss by a mile. I simply don't know for sure beforehand.

The indirect function call could be a replaced by a switch statement. 
Not sure whether this is less critical and more acceptable than indirect 
function calls. Might be architucture dependent.

We can get rid of the indirect function call (or an alternative switch 
statement) if the vote is against this level of flexibility.
Then it would be solely up to the exploiter to define once and for all 
whether a particular sort of data is shown as a simple counter, a 
histogram, a fill level indicator, this history-type statistic thing in 
a ringbuffer etc. This might be fine for a considerable number of cases.

The lock is there to avoid trouble with concurrent updates to a 
statistic. If per-CPU data was used, concurrent updates are fine as long 
as they are done on different CPUs. Precautions for concurrent updates 
to the same per-CPU is still needed, though.

The current interface allows to use the lock this way:

lock(stat_x->interface);
statistics_inc_nolock(stat_x, y);
statistics_inc_nolock(stat_m, n);
statistics_add_nolock(stat_a, e, f);
unlock(stat_x->interface);

Because we hold the same lock when creating output for users, coherency 
of several statistics of a single entity can be achieved if statistic 
updates are done within one critical section as shown above.

The lock is also used to make sure that updates to a statistic don't 
happen while the setup of a statistic is changed by users. If we get rid 
of the indirect function call, some of these setup changes go away, 
anyway. Other cases, like statistic resets or inflating a 5-counter 
histogram to a 25-counter histogram, don't go away. If I can figure out 
how to reallocate, say, an array of counters for a histogram without 
holding a lock while updates happen... Maybe I could temporarily turn of 
a statistic.



 > Take a look at many other subsystems - they do per CPU counters etc.
 > to make this all fast.

I am looking into per CPU data.

But, is this really required for _all_ statistics? I see that it makes 
sense to have per CPU optimizations for very critical components, like 
parts of VM. But there are still a lot of do-it-yourself type statistics 
around that use an atomic_t, for example, without implementing it per CPU.

Then, I am not sure yet whether per CPU data is feasible for histograms 
and other more complex statistics. I have got to find out.

I tried to write the code in a way that allows to add other statistic 
type, like counters, histograms and so on, with moderate effort. Maybe I 
can use the internal interface to plug in some disclipline based on per 
CPU counters...



 > But it's still unclear why it would need such an heavyweight
 > infrastructure. Normally it's not that bad to reimplemented on the
 > fly. Maybe some common code can be refactored out of that, but
 > probably not too much.
 >
 > [... lots of other code snipped ... ]
 >
 > Looks all very very overdesigned to me. How about you just start
 > with a minimum specification and describe what you want to do?

As a device driver programmer I don't want to reinvent the wheel when 
coding statistics. I would prefer to use a few and easy to use library 
functions. I don't want to worry about getting my personal wheel being 
functional. I'd rather use my time to worry about which kind of data is 
really needed and which is not.

I'd like to provide a tool that can be customized at run time to a 
certain degree because it might not be acceptable for customers to 
install private kernels in order to get tuned statistics.
As a device driver programmer I can make an educated guess, at best, 
about certain parameters that impact the processing of statistic data.
Users might know better whether they need to focus on latencies from 2 
ms up to 64 ms or  from 100 ms up to 500 ms, because this kind of 
decisions depends on the environment to be measured, e.g. devices attached.

In a device driver, I don't want to spent much thought about the 
statistic's user interface. Particularly not if the statistic is a 
little bit more complex than a simple counter. Would be really nice to 
have a user interface that looks the same for all exploiters, i.e. to 
have common output formats for counters, histograms, fill level 
indicators etc.

Martin


^ permalink raw reply	[flat|nested] 16+ messages in thread
* [patch 5/6] statistics infrastructure
@ 2005-12-14 16:46 Martin Peschke
  2005-12-14 18:38 ` Andi Kleen
  0 siblings, 1 reply; 16+ messages in thread
From: Martin Peschke @ 2005-12-14 16:46 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm

... posting was too big and bounced, next try

[patch 5/6] statistics infrastructure

This patch adds statistics infrastructure as common code.

In accordance to pending changes for other archs I introduced an
"Instrumentation Support" menu, where I added an entry for the statistics
infrastructure. I didn't bother to add this entry for other architectures.
Please do so if you are interested in exploiting statistics on non-s390
architectures.

Signed-off-by: Martin Peschke <mp3@de.ibm.com>
---

  MAINTAINERS                |    7
  arch/s390/Kconfig          |    6
  arch/s390/oprofile/Kconfig |    5
  include/linux/statistic.h  |  449 +++++++++
  lib/Kconfig.statistic      |   12
  lib/Makefile               |    1
  lib/statistic.c            | 2183 +++++++++++++++++++++++++++++++++++++++++++++
  7 files changed, 2658 insertions(+), 5 deletions(-)

diff -Nurp e/arch/s390/Kconfig f/arch/s390/Kconfig
--- e/arch/s390/Kconfig	2005-10-28 02:02:08.000000000 +0200
+++ f/arch/s390/Kconfig	2005-12-14 14:22:54.000000000 +0100
@@ -481,8 +481,14 @@ source "drivers/net/Kconfig"

  source "fs/Kconfig"

+menu "Instrumentation Support"
+
  source "arch/s390/oprofile/Kconfig"

+source "lib/Kconfig.statistic"
+
+endmenu
+
  source "arch/s390/Kconfig.debug"

  source "security/Kconfig"
diff -Nurp e/arch/s390/oprofile/Kconfig f/arch/s390/oprofile/Kconfig
--- e/arch/s390/oprofile/Kconfig	2005-10-28 02:02:08.000000000 +0200
+++ f/arch/s390/oprofile/Kconfig	2005-12-14 14:22:54.000000000 +0100
@@ -1,6 +1,3 @@
-
-menu "Profiling support"
-
  config PROFILING
  	bool "Profiling support"
  	help
@@ -18,5 +15,3 @@ config OPROFILE

  	  If unsure, say N.

-endmenu
-
diff -Nurp e/include/linux/statistic.h f/include/linux/statistic.h
--- e/include/linux/statistic.h	1970-01-01 01:00:00.000000000 +0100
+++ f/include/linux/statistic.h	2005-12-14 14:22:54.000000000 +0100
@@ -0,0 +1,449 @@
+/*
+ * include/linux/statistic.h
+ *
+ * Statistics facility
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * Author(s): Martin Peschke <mp3@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef STATISTIC_H
+#define STATISTIC_H
+
+#define STATISTIC_H_REVISION "$Revision: 1.5 $"
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/sgrb.h>
+
+#define STATISTIC_ROOT_DIR	"statistics"
+
+#define STATISTIC_FILENAME_DATA	"data"
+#define STATISTIC_FILENAME_DEF	"definition"
+
+#define STATISTIC_NAME_SIZE	64
+
+#define STATISTIC_RANGE_MIN	-0x7fffffffffffffffLL
+#define STATISTIC_RANGE_MAX	 0x7ffffffffffffffeLL
+
+enum {
+	STATISTIC_DEF_NAME,
+	STATISTIC_DEF_UNIT,
+	STATISTIC_DEF_TYPE_VALUE,
+	STATISTIC_DEF_TYPE_RANGE,
+	STATISTIC_DEF_TYPE_ARRAY,
+	STATISTIC_DEF_TYPE_LIST,
+	STATISTIC_DEF_TYPE_RAW,
+	STATISTIC_DEF_TYPE_HISTORY,
+	STATISTIC_DEF_ON,
+	STATISTIC_DEF_OFF,
+	STATISTIC_DEF_STARTED,
+	STATISTIC_DEF_STOPPED,
+	STATISTIC_DEF_RANGEMIN,
+	STATISTIC_DEF_RANGEMAX,
+	STATISTIC_DEF_SCALE_LIN,
+	STATISTIC_DEF_SCALE_LOG2,
+	STATISTIC_DEF_ENTRIESMAX,
+	STATISTIC_DEF_BASEINT,
+	STATISTIC_DEF_HITSMISSED,
+	STATISTIC_DEF_HITSOUT,
+	STATISTIC_DEF_RESET,
+	STATISTIC_DEF_MODE_INC,
+	STATISTIC_DEF_MODE_PROD,
+	STATISTIC_DEF_MODE_RANGE,
+	STATISTIC_DEF_PERIOD,
+	STATISTIC_DEF_VOID,
+};
+
+struct statistic;
+struct statistic_file_private;
+
+typedef void (statistic_release_fn) (struct statistic *);
+typedef void (statistic_reset_fn) (struct statistic *);
+typedef int (statistic_format_data_fn)
+		(struct statistic *, struct statistic_file_private *);
+typedef int (statistic_format_def_fn) (struct statistic *, char *);
+typedef u64 (statistic_add_fn) (struct statistic *, s64, u64);
+
+struct statistic_entry_list {
+	struct list_head	list;
+	s64			value;
+	u64			hits;
+};
+
+struct statistic_entry_raw {
+	u64 clock;
+	u64 serial;
+	s64 value;
+	u64 incr;
+};
+
+struct statistic_entry_range {
+	u32 res;
+	u32 num;	/* FIXME: better 64 bit; do_div can't deal with it) */
+	s64 acc;
+	s64 min;
+	s64 max;
+};
+
+struct statistic {
+	struct list_head		list;
+	struct statistic_interface	*interface;
+	struct statistic		**stat_ptr;
+	statistic_release_fn		*release;
+	statistic_reset_fn		*reset;
+	statistic_format_data_fn	*format_data;
+	statistic_format_def_fn		*format_def;
+	statistic_add_fn		*add;
+	char	name[STATISTIC_NAME_SIZE];
+	char	units[STATISTIC_NAME_SIZE];
+	u8	type;
+	u8	on;
+	u64	started;
+	u64	stopped;
+	u64	age;
+	s64 	range_min;
+	s64 	range_max;
+	u64	hits_out_of_range;
+	union {
+		struct {
+			/* data */
+			u64 hits;
+			/* user-writeable */
+			int mode;
+		} value;
+		struct {
+			/* data */
+			struct statistic_entry_range range;
+		} range;
+		struct {
+			/* data */
+			u64 *hits;
+			/* user-writeable */
+			u32 base_interval;
+			u8 scale;
+			/* internal */
+			u32 entries;
+		} array;
+		struct {
+			/* data */
+			struct list_head entry_lh;
+			/* user-writeable */
+			u32 entries_max;
+			/* informational for user */
+			u64 hits_missed;
+			/* internal */
+			u32 entries;
+		} list;
+		struct {
+			/* data */
+			struct sgrb rb;
+			/* user-writeable */
+			u32 entries_max;
+			/* internal */
+			u64 next_serial;
+		} raw;
+		struct {
+			/* data */
+			struct sgrb rb;
+			/* user-writeable */
+			u32 entries_max;
+			int mode;
+			u64 period;
+			/* internal */
+			u64 checkpoint;
+			u64 window;
+			u8 entry_size;
+		} history;
+	} data;
+};
+
+struct statistic_interface {
+	struct list_head	list;
+	struct dentry		*debugfs_dir;
+	struct dentry		*data_file;
+	struct dentry		*def_file;
+	struct list_head	statistic_lh;
+	struct semaphore	sem;
+	spinlock_t		lock;
+};
+
+struct statistic_file_private {
+	struct list_head read_seg_lh;
+	struct list_head write_seg_lh;
+	size_t write_seg_total_size;
+};
+
+struct statistic_global_data {
+	struct dentry		*root_dir;
+	struct list_head	interface_lh;
+	struct semaphore	sem;
+};
+
+#ifdef CONFIG_STATISTICS
+
+#define statistic_lock(interface, flags)	\
+		spin_lock_irqsave(&(interface)->lock, flags)
+#define statistic_unlock(interface, flags)	\
+		spin_unlock_irqrestore(&(interface)->lock, flags)
+
+extern int statistic_interface_create(struct statistic_interface **,
+				      const char *);
+extern int statistic_interface_remove(struct statistic_interface **);
+
+extern int statistic_create(struct statistic **, struct statistic_interface *,
+			    const char *, const char *);
+extern int statistic_remove(struct statistic **);
+
+extern int statistic_define_value(struct statistic *, s64, s64, int);
+extern int statistic_define_range(struct statistic *, s64, s64);
+extern int statistic_define_array(struct statistic *, s64, s64, u32, u8);
+extern int statistic_define_list(struct statistic *, s64, s64, u32);
+extern int statistic_define_raw(struct statistic *, s64, s64, u32);
+extern int statistic_define_history(struct statistic *, s64, s64, u32, u64,
+				    int);
+
+extern int statistic_start(struct statistic *);
+extern int statistic_stop(struct statistic *);
+extern void statistic_reset(struct statistic *);
+
+/**
+ * statistic_add - update statistic with (discriminator, increment) pair
+ * @stat: statistic
+ * @value: discriminator
+ * @incr: increment
+ *
+ * The actual processing of (discriminator, increment) is determined by the
+ * the definition applied to the statistic. See the descriptions of the
+ * statistic_define_*() routines for details.
+ *
+ * This variant grabs the lock and should be used when there is _no_ need
+ * to make a bunch of updates to various statistics of an interface,
+ * including the statistic this update is reported for, atomic
+ * in order to be meaningful (get the next coherent state of several
+ * statistics).
+ *
+ * On success, the return value is dependend on which type of accumulation
+ * has been applied through the recent definition. Usually, returns the
+ * updated total of increments reported for this discriminator, if the
+ * defined type of accumulation does this kind of computation.
+ *
+ * If the struct statistic pointer provided by the caller
+ * is NULL (unused), this routine fails, and 0 is returned.
+ *
+ * If some required memory could not be allocated this routine fails,
+ * and 0 is returned.
+ *
+ * If the discriminator is not valid (out of range), this routine fails,
+ * and 0 is returned.
+ */
+static inline u64 statistic_add(struct statistic *stat, s64 value, u64 incr)
+{
+	unsigned long flags;
+	int retval;
+
+	if (stat->on != STATISTIC_DEF_ON)
+		return 0;
+
+	statistic_lock(stat->interface, flags);
+	retval = stat->add(stat, value, incr);
+	statistic_unlock(stat->interface, flags);
+
+	return retval;
+}
+
+/**
+ * statistic_add_nolock - a statistic_add() variant
+ * @stat: statistic
+ * @value: discriminator
+ * @incr: increment
+ *
+ * Same purpose and behavious as statistic_add(). See there for details.
+ *
+ * Only difference to statistic_add():
+ * Lock management is up to the exploiter. Basically, we give exploiters
+ * the option to ensure data consistency across all statistics attached
+ * to a parent interface by adding several calls to this routine into one
+ * critical section protected by stat->interface->lock,
+ */
+static inline u64 statistic_add_nolock(struct statistic *stat, s64 value,
+				       u64 incr)
+{
+	if (stat->on != STATISTIC_DEF_ON)
+		return 0;
+
+#ifdef DEBUG
+	assert_spin_locked(&stat->interface->lock);
+#endif
+
+	return stat->add(stat, value, incr);
+}
+
+/**
+ * statistic_inc - a statistic_add() variant
+ * @stat: statistic
+ * @value: discriminator
+ *
+ * Same purpose and behaviour as statistic_add(). See there for details.
+ * Difference: Increment defaults to 1.
+ */
+static inline u64 statistic_inc(struct statistic *stat, s64 value)
+{
+	unsigned long flags;
+	int retval;
+
+	if (stat->on != STATISTIC_DEF_ON)
+		return 0;
+
+	statistic_lock(stat->interface, flags);
+	retval = stat->add(stat, value, 1);
+	statistic_unlock(stat->interface, flags);
+
+	return retval;
+}
+
+/**
+ * statistic_inc_nolock - a statistic_add_nolock() variant
+ * @stat: statistic
+ * @value: discriminator
+ *
+ * Same purpose and behaviour as statistic_add_nolock(). See there for details.
+ * Difference: Increment defaults to 1.
+ */
+static inline u64 statistic_inc_nolock(struct statistic *stat, s64 value)
+{
+	if (stat->on != STATISTIC_DEF_ON)
+		return 0;
+
+#ifdef DEBUG
+	assert_spin_locked(&stat->interface->lock);
+#endif
+
+	return stat->add(stat, value, 1);
+}
+
+#else /* CONFIG_STATISTICS */
+
+#define statistic_lock(interface, flags)	do { } while (0)
+#define statistic_unlock(interface, flags)	do { } while (0)
+
+static inline int statistic_interface_create(
+				struct statistic_interface **interface_ptr,
+				const char *name)
+{
+	return 0;
+}
+
+static inline int statistic_interface_remove(
+				struct statistic_interface **interface_ptr)
+{
+	return 0;
+}
+
+static inline int statistic_create(struct statistic **stat_ptr,
+				   struct statistic_interface *interface,
+				   const char *name, const char *units)
+{
+	return 0;
+}
+
+static inline int statistic_remove(struct statistic **stat_ptr)
+{
+	return 0;
+}
+
+
+static inline int statistic_define_value(struct statistic *stat, s64 range_min,
+					 s64 range_max, int mode)
+{
+	return 0;
+}
+
+static inline int statistic_define_range(struct statistic *stat, s64 range_min,
+					 s64 range_max)
+{
+	return 0;
+}
+
+static inline int statistic_define_array(struct statistic *stat, s64 range_min,
+					 s64 range_max, u32 base_interval,
+					 u8 scale)
+{
+	return 0;
+}
+
+static inline int statistic_define_list(struct statistic *stat, s64 range_min,
+					s64 range_max, u32 entries_max)
+{
+	return 0;
+}
+
+static inline int statistic_define_raw(struct statistic *stat, s64 range_min,
+				       s64 range_max, u32 entries_max)
+{
+	return 0;
+}
+
+static inline int statistic_define_history(struct statistic *stat,
+					   s64 range_min, s64 range_max,
+					   u32 entries_max, u64 period,
+					   int mode)
+{
+	return 0;
+}
+
+
+static inline int statistic_start(struct statistic *stat)
+{
+	return 0;
+}
+
+static inline int statistic_stop(struct statistic *stat)
+{
+	return 0;
+}
+
+static inline void statistic_reset(struct statistic *stat)
+{
+}
+
+static inline u64 statistic_add(struct statistic *stat, s64 value, u64 incr)
+{
+	return 0;
+}
+
+static inline u64 statistic_add_nolock(struct statistic *stat, s64 value,
+				       u64 incr)
+{
+	return 0;
+}
+
+static inline u64 statistic_inc(struct statistic *stat, s64 value)
+{
+	return 0;
+}
+
+static inline u64 statistic_inc_nolock(struct statistic *stat, s64 value)
+{
+	return 0;
+}
+
+#endif /* CONFIG_STATISTICS */
+
+#endif /* STATISTIC_H */
diff -Nurp e/lib/Kconfig.statistic f/lib/Kconfig.statistic
--- e/lib/Kconfig.statistic	1970-01-01 01:00:00.000000000 +0100
+++ f/lib/Kconfig.statistic	2005-12-14 14:22:54.000000000 +0100
@@ -0,0 +1,12 @@
+config STATISTICS
+	bool "Statistics infrastructure"
+	depends on DEBUG_FS
+	select SGRB
+	help
+	  The statistics infrastructure provides a debug-fs based user interface
+	  for statistics of kernel components, that is, usually device drivers.
+	  Statistics are available for components that have been instrumented to
+	  feed data into the statistics infrastructure.
+	  This feature is useful for performance measurements or performance
+	  debugging.
+	  If in doubt, say "N".
diff -Nurp e/lib/Makefile f/lib/Makefile
--- e/lib/Makefile	2005-12-14 13:26:51.000000000 +0100
+++ f/lib/Makefile	2005-12-14 14:30:13.000000000 +0100
@@ -47,6 +47,7 @@ obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
  obj-$(CONFIG_SWIOTLB) += swiotlb.o

  obj-$(CONFIG_SGRB) += sgrb.o
+obj-$(CONFIG_STATISTICS) += statistic.o

  hostprogs-y	:= gen_crc32table
  clean-files	:= crc32table.h
diff -Nurp e/lib/statistic.c f/lib/statistic.c
--- e/lib/statistic.c	1970-01-01 01:00:00.000000000 +0100
+++ f/lib/statistic.c	2005-12-14 14:22:55.000000000 +0100
@@ -0,0 +1,2183 @@
+/*
+ *  lib/statistic.c
+ *    statistics facility
+ *
+ *    Copyright (C) 2005 IBM Deutschland Entwicklung GmbH,
+ *                       IBM Corporation
+ *
+ *    Author(s): Martin Peschke (mp3@de.ibm.com),
+ *
+ *    Bugreports to: <Linux390@de.ibm.com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *    todos:
+ *	- see locking rules; one tbd remaining
+ *	- define a set of agreed names or a naming scheme for
+ *	  consistency and comparability across exploiters
+ *	  (e.g. similar statistic names for latencies of dasd driver
+ *	  and zfcp driver); this entails an agreement about granularities
+ *	  as well (e.g. separate statistic for read/write/no-data commands);
+ *	  a common set of unit strings would be nice then, too, of course
+ *	  (e.g. "seconds", "milliseconds", "microseconds", ...)
+ *	- seq_file use might simplify this code
+ *
+ *
+ *    another bunch of ideas being pondered:
+ *	- provide a (perl?) script for automatic reformating and processing of
+ *	  the contents of "data" files (for generating fancy tables, diagrams
+ *	  in ASCII-art, XML-output ready to be imported into the OpenOffice
+ *	  spreadsheet/diagram tool, ... you name it); a generic script that
+ *	  takes hints from "definition" files into account would basically
+ *	  suffice for all exploiters
+ *	- slim down struct statistic and move on/off/reset/started/stopped
+ *        etc. up to struct statistic_interface???
+ *	  (Is there a need to turn individual statistic on and off etc; or
+ *	  is it more handy and sufficient to allow that only for the entirety
+ *	  of all statistic attached to an interface?)
+ *	- some user-configurable that allows to release unused resources
+ *	  of stopped statistic; or release on stop / allocate on start?
+ *	- perf. opt. of array: table lookup of values, binary search for values
+ *	- another statistic disclipline based on some sort of tree, but
+ *	  similar in semantics to list discipline (for high-perf. histograms of
+ *	  discrete values)
+ *	- use list entries (visible in data file) for hits_out_of_range/
+ *	  hits_missed instead of meta data values (visible in definition file)
+ *	  on analogy to first and last entries of array discipline (<=range_min,
+ *	  >range_max)???
+ *	- allow for more than a single "view" on data at the same time by
+ *	  providing the capability to attach several (a list of) "definitions"
+ *	  to a struct statistic
+ *	  (e.g. show histogram of requests sizes and history of megabytes/sec.
+ *	  at the same time)
+ *	- group similar statitistics in classes and allow for redefinitions
+ *	  per group
+ *	  (e.g. [automagically?] group all list disciplines of request sizes
+ *	  gathered by zfcp in order to allow for a single-operation redefinition
+ *	  of range_max for all of them)
+ *	- multi-dimensional statistic (combination of two or more
+ *	  characteristics/discriminators); worth the effort??
+ *	  (e.g. a matrix of occurences for latencies of requests of
+ *	  particular sizes)
+ *	- allow exploiters to register a callback with every struct statistic
+ *	  (or statistic interface?) in order to be able to do another
+ *	  statitics update when the user reads the data file; would be useful
+ *	  for gathering statistic data about any ongoing condition
+ *	- have exploiters always provide the best granularity possible
+ *	  (like nanoseconds instead of milliseconds) in order to keep
+ *	  flexibility, and have the statitics user interface handle any
+ *	  desired computation (like from nanoseconds to milliseconds)
+ *	- allow user to choose hex/oct/dec representation of numbers
+ *	- make "history" an extra option that allows it to combine with
+ *	  any other type/discipline??? (history request size lists???)
+ *	- split out the definition of range_min/range_max from existing
+ *	  definition functions
+ *
+ *
+ *    locking rules
+ *	- We grab a global semaphore on calls to statistic_interface_create() /
+ *	  statistic_interface_remove() to make sure various exploiters do not
+ *	  interfere with each other by corrupting internal global data (list).
+ *	- Once an interface has been created, it is assumed that the exploiter
+ *	  serialises any other setup or closure business related to statistics
+ *	  attached to the same interface. No internal locking for this purpose!
+ *	- data reading vs. data gathering vs. redefinition (incl. on/off/reset):
+ *	  We hold the interfaces spinlock to make sure that statistics' meta
+ *	  data as well their data is not being messed with concurrently,
+ *	  and coherent on reading (concurrent updates shall not interfere).
+ *	- data gathering vs. removal of statistic: It is assumed that the
+ *	  exploiter makes sure that data gathering has ceased prior to removing
+ *	  a statistic.
+ *	- multiple related data updates in the scope of a single interface:
+ *	  We provide *_nolock variants of the statistic_inc() /
+ *	  statistic_add() routines, and thus allow exploiters to manage
+ *	  locking during updates. This way multiple updates can be made in
+ *	  an atomic fashion. Exploiters are encouraged to make use of this
+ *	  if an atomic update of more than one statistic is required to
+ *	  generate the next valid state as to the coherence of related
+ *	  statistics.
+ *	- touching files by user vs. removal of anything: tbd
+ *
+ *    known bugs:
+ *      - Statistics of type "history" are continuesly updated even if being
+ *	  turned off. Fix: implement statistic_stop_history() and move the
+ *	  beforementioned enforced update - inactivity case - there from
+ *	  statistic_format_data_history().
+ */
+
+#define STATISTIC_C_REVISION "$Revision: 1.5 $"
+
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/parser.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+
+#include <asm/bug.h>
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+
+#include <linux/sgrb.h>
+#include <linux/statistic.h>
+
+extern void tod_to_timeval(__u64, struct timespec *);
+
+struct statistic_global_data statistic_globals;
+
+static int statistic_interface_generic_close(struct inode *inode,
+					     struct file *file);
+static ssize_t statistic_interface_generic_read(struct file *, char __user *,
+						size_t, loff_t *);
+static ssize_t statistic_interface_generic_write(struct file *,
+					const char __user *, size_t, loff_t *);
+
+static int statistic_interface_def_open(struct inode *, struct file *);
+static int statistic_interface_def_close(struct inode *, struct file *);
+
+static int statistic_interface_data_open(struct inode *, struct file *);
+
+struct file_operations statistic_def_file_ops = {
+	.owner		= THIS_MODULE,
+	.read		= statistic_interface_generic_read,
+	.write		= statistic_interface_generic_write,
+	.open		= statistic_interface_def_open,
+	.release	= statistic_interface_def_close,
+};
+
+struct file_operations statistic_data_file_ops = {
+	.owner		= THIS_MODULE,
+	.read		= statistic_interface_generic_read,
+	.open		= statistic_interface_data_open,
+	.release	= statistic_interface_generic_close,
+};
+
+/*
+ * FIXME:
+ * Is there any way to get rid of statistic_strings by merging it somehow into
+ * statistic_def?
+ */
+static char * statistic_strings[] = {
+	"name=",
+	"units=",
+	"type=value",
+	"type=range",
+	"type=array",
+	"type=list",
+	"type=raw",
+	"type=history",
+	"on=1",
+	"on=0",
+	"started=",
+	"stopped=",
+	"range_min=",
+	"range_max=",
+	"scale=lin",
+	"scale=log2",
+	"entries_max=",
+	"base_interval=",
+	"hits_missed=",
+	"hits_out_of_range=",
+	"data=",
+	"mode=increments",
+	"mode=products",
+	"mode=range",
+	"period=",
+	NULL
+};
+
+static match_table_t statistic_def = {
+	{STATISTIC_DEF_NAME, "name=%s"},
+	{STATISTIC_DEF_UNIT, "units="},
+	{STATISTIC_DEF_TYPE_VALUE, "type=value"},
+	{STATISTIC_DEF_TYPE_RANGE, "type=range"},
+	{STATISTIC_DEF_TYPE_ARRAY, "type=array"},
+	{STATISTIC_DEF_TYPE_LIST, "type=list"},
+	{STATISTIC_DEF_TYPE_RAW, "type=raw"},
+	{STATISTIC_DEF_TYPE_HISTORY, "type=history"},
+	{STATISTIC_DEF_ON, "on=1"},
+	{STATISTIC_DEF_OFF, "on=0"},
+	{STATISTIC_DEF_STARTED, "started="},
+	{STATISTIC_DEF_STOPPED, "stopped="},
+	{STATISTIC_DEF_RANGEMIN, "range_min=%s"},
+	{STATISTIC_DEF_RANGEMAX, "range_max=%s"},
+	{STATISTIC_DEF_SCALE_LIN, "scale=lin"},
+	{STATISTIC_DEF_SCALE_LOG2, "scale=log2"},
+	{STATISTIC_DEF_ENTRIESMAX, "entries_max=%u"},
+	{STATISTIC_DEF_BASEINT, "base_interval=%s"},
+	{STATISTIC_DEF_HITSMISSED, "hits_missed="},
+	{STATISTIC_DEF_HITSOUT, "hits_out_of_range="},
+	{STATISTIC_DEF_RESET, "data=reset"},
+	{STATISTIC_DEF_MODE_INC, "mode=increments"},
+	{STATISTIC_DEF_MODE_PROD, "mode=products"},
+	{STATISTIC_DEF_MODE_RANGE, "mode=range"},
+	{STATISTIC_DEF_PERIOD, "period=%s"},
+	{STATISTIC_DEF_VOID, NULL}
+};
+
+/* code concerned with module matters */
+
+int __init statistic_init(void)
+{
+	sema_init(&statistic_globals.sem, 1);
+	INIT_LIST_HEAD(&statistic_globals.interface_lh);
+	statistic_globals.root_dir = debugfs_create_dir(STATISTIC_ROOT_DIR, NULL);
+	return 0;
+}
+
+void __exit statistic_exit(void)
+{
+	/*
+	 * FIXME: any need to cleanup any statistic possibly still allocated?
+	 * (would only concern leftovers of exploiters - someone elses problem?)
+	 */
+	debugfs_remove(statistic_globals.root_dir);
+}
+
+/* code mostly concerned with accounting */
+
+static inline int statistic_start_nolock(struct statistic *stat)
+{
+	int retval = stat->on;
+
+	stat->on = STATISTIC_DEF_ON;
+	stat->started = sched_clock();
+
+	return retval;
+}
+
+/**
+ * statistic_start - enable statistic for data gathering
+ * @stat: statistic to be enabled
+ *
+ * Start data gathering without discarding old data.
+ * Function is both available to exploiting device drivers as well as to
+ * the user through the "definition" file.
+ *
+ * On success, returns the previous on/off state.
+ */
+int statistic_start(struct statistic *stat)
+{
+	unsigned long flags;
+	int retval;
+
+	statistic_lock(stat->interface, flags);
+	retval = statistic_start_nolock(stat);
+	statistic_unlock(stat->interface, flags);
+
+	return retval;
+}
+
+static inline int statistic_stop_nolock(struct statistic *stat)
+{
+	int retval = stat->on;
+
+	stat->on = STATISTIC_DEF_OFF;
+	stat->stopped = sched_clock();
+
+	return retval;
+}
+
+/**
+ * statistic_stop - disable statistic for data gathering
+ * @stat: statistic to be disabled
+ *
+ * Stop data gathering without discarding old data.
+ * Function is both available to exploiting device drivers as well as to
+ * the user through the "definition" file.
+ *
+ * On success, returns the previous on/off state.
+ */
+int statistic_stop(struct statistic *stat)
+{
+	unsigned long flags;
+	int retval;
+
+	statistic_lock(stat->interface, flags);
+	retval = statistic_stop_nolock(stat);
+	statistic_unlock(stat->interface, flags);
+
+	return retval;
+}
+
+static inline void statistic_reset_nolock(struct statistic *stat)
+{
+	stat->reset(stat);
+	stat->hits_out_of_range = 0;
+	stat->age = sched_clock();
+}
+
+/**
+ * statistic_reset - discard data gathered so far
+ * @stat: statistic to be reset
+ *
+ * Discard any gathered data without changing the on/off state.
+ * Function is both available to exploiting device drivers as well as to
+ * the user through the "definition" file.
+ */
+void statistic_reset(struct statistic *stat)
+{
+	unsigned long flags;
+
+	statistic_lock(stat->interface, flags);
+	statistic_reset_nolock(stat);
+	statistic_unlock(stat->interface, flags);
+}
+
+/**
+ * statistic_create - create a statistic and attach it to a given interface
+ * @stat_ptr: reference to struct statistic pointer
+ * @interface_ptr: struct statistic_interface pointer
+ * @name: name of statistic to be created and as seen in "data" and
+ *        "definition" files
+ * @units: string describing the units of the (discriminator, value) pairs
+ *	   that are the raw data delivered by the exploiter to
+ *	   the statistics facility for every update to a statistic,
+ *         exported through the definition file for users' information,
+ *	   suggested format "unit1/unit2" (no blanks!)
+ *
+ * Create a statistic, which - after being defined and enabled - is ready
+ * to capture and compute data provided by the exploiter. A line in the
+ * interface's "definition" file will hold specifics about the named statistic.
+ * The statistic is defined as type "value" by default for the convenience
+ * of the statistics code which can now rely on a statistic to have some
+ * valid settings all the time.
+ *
+ * On success, 0 is returned, and the struct statistic pointer
+ * provided by the caller points to a newly allocated struct.
+ *
+ * If the struct statistic pointer provided by the caller
+ * is not NULL (used), this routine fails, the struct statistic
+ * pointer is not changed, and -EINVAL is returned.
+ *
+ * If some required memory could not be allocated this routine fails,
+ * the struct statistic pointer is not changed, and -ENOMEM is returned.
+ */
+int statistic_create(struct statistic **stat_ptr,
+		     struct statistic_interface *interface,
+		     const char *name, const char *units)
+{
+	struct statistic *stat;
+	unsigned long flags;
+
+	if (*stat_ptr || !interface)
+		return -EINVAL;
+
+	stat = kmalloc(sizeof(struct statistic), GFP_KERNEL);
+	if (!stat)
+		return -ENOMEM;
+	memset(stat, 0, sizeof(struct statistic));
+
+	stat->interface = interface;
+	strlcpy(stat->name, name, sizeof(stat->name));
+	strlcpy(stat->units, units, sizeof(stat->units));
+	statistic_define_value(stat, STATISTIC_RANGE_MIN, STATISTIC_RANGE_MAX,
+				STATISTIC_DEF_MODE_INC);
+	statistic_stop_nolock(stat);
+	stat->started = stat->stopped;
+	stat->stat_ptr = stat_ptr;
+
+	statistic_lock(interface, flags);
+	list_add_tail(&stat->list, &interface->statistic_lh);
+	*stat_ptr = stat;
+	statistic_unlock(interface, flags);
+
+	return 0;
+}
+
+/**
+ * statistic_remove - remove given statistic
+ * @stat_ptr: reference to struct statistic pointer
+ *
+ * Remove statistic along with its recent data and definition.
+ *
+ * On success, 0 is returned and the struct statistic pointer
+ * provided by the caller is set to NULL.
+ *
+ * If the struct statistic pointer provided by the caller
+ * is NULL (unused), this routine fails, the struct statistic
+ * pointer is not changed, and -EINVAL is returned.
+ */
+int statistic_remove(struct statistic **stat_ptr)
+{
+	unsigned long flags;
+
+	if (!*stat_ptr)
+		return -EINVAL;
+
+	statistic_lock((*stat_ptr)->interface, flags);
+	if ((*stat_ptr)->release)
+		(*stat_ptr)->release(*stat_ptr);
+	list_del(&(*stat_ptr)->list);
+	kfree(*stat_ptr);
+	*stat_ptr = NULL;
+	statistic_unlock((*stat_ptr)->interface, flags);
+
+	return 0;
+}
+
+static int statistic_format_def(struct statistic *stat,
+				struct statistic_file_private *private)
+{
+	struct sgrb_seg *seg;
+	char t0[TIMESTAMP_SIZE], t1[TIMESTAMP_SIZE], t2[TIMESTAMP_SIZE];
+
+	seg = sgrb_seg_find(&private->read_seg_lh, 1024, GFP_ATOMIC);
+	if (!seg)
+		return -ENOMEM;
+
+	seg->offset += sprintf(seg->address + seg->offset,
+				"%s%s %s %s %s%lld %s%lld",
+				statistic_strings[STATISTIC_DEF_NAME],
+				stat->name,
+				statistic_strings[stat->on],
+				statistic_strings[stat->type],
+				statistic_strings[STATISTIC_DEF_RANGEMIN],
+				(long long signed)stat->range_min,
+				statistic_strings[STATISTIC_DEF_RANGEMAX],
+				(long long signed)stat->range_max);
+
+	if (stat->format_def)
+		seg->offset += stat->format_def(stat,
+						seg->address + seg->offset);
+
+	nsec_to_timestamp(t0, stat->age);
+	nsec_to_timestamp(t1, stat->started);
+	nsec_to_timestamp(t2, stat->stopped);
+
+	seg->offset += sprintf(seg->address + seg->offset,
+				" %s%llu %s%s %s%s %s%s %s%s\n\n",
+				statistic_strings[STATISTIC_DEF_HITSOUT],
+				(long long unsigned)stat->hits_out_of_range,
+				statistic_strings[STATISTIC_DEF_RESET], t0,
+				statistic_strings[STATISTIC_DEF_STARTED], t1,
+				statistic_strings[STATISTIC_DEF_STOPPED], t2,
+				statistic_strings[STATISTIC_DEF_UNIT],
+				stat->units);
+
+	return 0;
+}
+
+/* code concerned with single value statistics */
+
+static void statistic_reset_value(struct statistic *stat)
+{
+	stat->data.value.hits = 0;
+}
+
+static inline void _statistic_format_data_value(struct statistic *stat,
+				struct sgrb_seg *seg, char *s, u64 value)
+{
+	seg->offset += sprintf(seg->address + seg->offset, "%s%s %llu\n",
+				stat->name, s,
+				(unsigned long long)stat->data.value.hits);
+}
+
+static int statistic_format_data_value(struct statistic *stat,
+				       struct statistic_file_private *private)
+{
+	struct sgrb_seg *seg;
+
+	seg = sgrb_seg_find(&private->read_seg_lh, 128, GFP_ATOMIC);
+	if (!seg)
+		return -ENOMEM;
+
+	_statistic_format_data_value(stat, seg, "", stat->data.value.hits);
+	return 0;
+}
+
+static int statistic_format_def_value(struct statistic *stat, char *line)
+{
+	return sprintf(line, " %s", statistic_strings[stat->data.value.mode]);
+}
+
+static inline u64 _statistic_add_value_increments(s64 *single, s64 value,
+						  u64 incr)
+{
+	return (*single += incr);
+}
+
+static inline u64 _statistic_add_value_products(s64 *single, s64 value,
+						u64 incr)
+{
+	if (value < 0)
+		value = -value;
+	return (*single += value * incr);
+}
+
+static u64 statistic_add_value_increments(struct statistic *stat, s64 value,
+					  u64 incr)
+{
+	if (value < stat->range_min || value > stat->range_max) {
+		stat->hits_out_of_range++;
+		return 0;
+	}
+	return _statistic_add_value_increments(
+			&stat->data.value.hits, value, incr);
+}
+
+static u64 statistic_add_value_products(struct statistic *stat, s64 value,
+					u64 incr)
+{
+	if (value < stat->range_min || value > stat->range_max) {
+		stat->hits_out_of_range++;
+		return 0;
+	}
+	return _statistic_add_value_products(
+			&stat->data.value.hits, value, incr);
+}
+
+/**
+ * statistic_define_value - instantiate statistic as single value (counter)
+ * @stat: statistic to be defined
+ * @range_min: lower bound of discriminators
+ * @range_max: upper bound of discriminators
+ * @mode: determines how to use the counter
+ *
+ * Depending on the mode parameter, accumulation is done as
+ *
+ * a) <total> += <increment N>				(mode=increments)
+ * b) <total> += ABS(<discriminator N>) * <increment N>	(mode=products)
+ *
+ * The output format of a single value statistic found in the "data" file is:
+ * <statistic name> <total>
+ *
+ * This (re)definition function is available both to exploiting device drivers
+ * and to the user through the "definition" file. Device driver programmers
+ * might find it user-friendly to provide a default definition for a
+ * particular statistic by calling this or a related function. A previous
+ * definition is replaced by the new one. In addition, the statistic must be
+ * started in order to make it gather data. A line in the interface's "definition"
+ * file holds specifics about the named statistic.
+ *
+ * This routines always succeeds and returns 0.
+ */
+int statistic_define_value(struct statistic *stat, s64 range_min, s64 range_max,
+			   int mode)
+{
+	unsigned long flags;
+
+	if (mode != STATISTIC_DEF_MODE_INC &&
+	    mode != STATISTIC_DEF_MODE_PROD)
+		return -EINVAL;
+
+	statistic_lock(stat->interface, flags);
+
+	if (stat->release)
+		stat->release(stat);
+
+	stat->type = STATISTIC_DEF_TYPE_VALUE;
+	stat->range_min = range_min;
+	stat->range_max = range_max;
+	stat->data.value.mode = mode;
+
+	stat->release = NULL;
+	stat->reset = statistic_reset_value;
+	stat->format_data = statistic_format_data_value;
+	stat->format_def = statistic_format_def_value;
+	if (mode == STATISTIC_DEF_MODE_INC)
+		stat->add = statistic_add_value_increments;
+	else
+		stat->add = statistic_add_value_products;
+
+	statistic_reset_nolock(stat);
+
+	statistic_unlock(stat->interface, flags);
+
+	return 0;
+}
+
+/* code concerned with range statistic */
+
+static inline void statistic_init_range(struct statistic_entry_range *range,
+					s64 range_min, s64 range_max)
+{
+	range->num = 0;
+	range->acc = 0;
+	range->min = range_max + 1;
+	range->max = range_min - 1;
+}
+
+static void statistic_reset_range(struct statistic *stat)
+{
+	statistic_init_range(
+		&stat->data.range.range, stat->range_min, stat->range_max);
+}
+
+static inline void _statistic_format_data_range(struct statistic *stat,
+	struct sgrb_seg *seg, char *s, struct statistic_entry_range *range)
+{
+	long long unsigned whole = 0;
+	long long signed min = 0, max = 0, decimal = 0, last_digit;
+
+	if (range->num) {
+		whole = range->acc;
+		do_div(whole, range->num);
+		decimal  = range->acc * 10000;
+		do_div(decimal, range->num);
+		decimal -= whole * 10000;
+		if (decimal < 0)
+			decimal = -decimal;
+		last_digit = decimal;
+		do_div(last_digit, 10);
+		last_digit = decimal - last_digit * 10;
+		if (last_digit >= 5)
+			decimal += 10;
+		do_div(decimal, 10);
+		min = range->min;
+		max = range->max;
+	}
+
+	seg->offset += sprintf(seg->address + seg->offset,
+				"%s%s %llu %lld %lld.%03lld %lld\n",
+				stat->name, s,
+				(long long unsigned)range->num,
+				(long long signed)min,
+				whole, decimal,
+				(long long signed)max);
+}
+
+static int statistic_format_data_range(struct statistic *stat,
+				       struct statistic_file_private *private)
+{
+	struct sgrb_seg *seg;
+
+	seg = sgrb_seg_find(&private->read_seg_lh, 128, GFP_ATOMIC);
+	if (!seg)
+		return -ENOMEM;
+
+	_statistic_format_data_range(
+		stat, seg, "", &stat->data.range.range);
+	return 0;
+}
+
+static inline u64 _statistic_add_range(struct statistic_entry_range *range,
+				       s64 value, u64 incr)
+{
+	range->num += incr;
+	range->acc += value * incr;
+	if (value < range->min)
+		range->min = value;
+	if (value > range->max)
+		range->max = value;
+	return range->num;
+}
+
+static u64 statistic_add_range(struct statistic *stat, s64 value, u64 incr)
+{
+	if (value < stat->range_min || value > stat->range_max) {
+		stat->hits_out_of_range++;
+		return 0;
+	}
+	return _statistic_add_range(&stat->data.range.range, value, incr);
+}
+
+/**
+ * statistic_define_range - instantiate statistic as small set of values
+ *    that describe a range of observed discriminitors
+ * @stat: statistic to be defined
+ * @range_min: lower bound of discriminators
+ * @range_max: upper bound of discriminators
+ *
+ * Determines that the statistic provides the minimum, average and maximum
+ * of the numbers reported by the exploiter. Besides the number of updates
+ * is counted. Statistic events with increments larger than 1 are counted
+ * as multiple occurences of a particular discrimintator with regard to
+ * the computation of the average.
+ * For example, this statistic type could be used as a fill level or
+ * utilisation indicator for queues.
+ *
+ * The output format of a range statistic found in the "data" file is:
+ * <statistic name> <total of increments> <minimum> <average> <maximum>
+ *
+ * This (re)definition function is available both to exploiting device drivers
+ * and to the user through the "definition" file. Device driver programmers
+ * might find it user-friendly to provide a default definition for a
+ * particular statistic by calling this or a related function. A previous
+ * definition is replaced by the new one. In addition, the statistic must be
+ * started in order to make it gather data. A line in the interface's "definition"
+ * file holds specifics about the named statistic.
+ *
+ * This routine always succeeds and returns 0.
+ */
+int statistic_define_range(struct statistic *stat, s64 range_min, s64 range_max)
+{
+	unsigned long flags;
+
+	statistic_lock(stat->interface, flags);
+
+	if (stat->release)
+		stat->release(stat);
+
+	stat->type = STATISTIC_DEF_TYPE_RANGE;
+	stat->range_min = range_min;
+	stat->range_max = range_max;
+
+	stat->release = NULL;
+	stat->reset = statistic_reset_range;
+	stat->format_data = statistic_format_data_range;
+	stat->format_def = NULL;
+	stat->add = statistic_add_range;
+
+	statistic_reset_nolock(stat);
+
+	statistic_unlock(stat->interface, flags);
+
+	return 0;
+}
+
+/* code concerned with fixed array statistics */
+
+static inline s64 statistic_array_calc_value_lin(struct statistic *stat,
+						 int index)
+{
+	return stat->range_min + (stat->data.array.base_interval * index);
+}
+
+static inline s64 statistic_array_calc_value_log2(struct statistic *stat,
+						  int index)
+{
+	return stat->range_min +
+		(index ? (stat->data.array.base_interval << (index - 1)) : 0);
+}
+
+static inline s64 statistic_array_calc_value(struct statistic *stat, int index)
+{
+	if (stat->data.array.scale == STATISTIC_DEF_SCALE_LIN)
+		return statistic_array_calc_value_lin(stat, index);
+	else
+		return statistic_array_calc_value_log2(stat, index);
+}
+
+static inline int statistic_array_calc_index_lin(struct statistic *stat,
+						 s64 value)
+{
+	unsigned long long index = value - stat->range_min;
+	do_div(index, stat->data.array.base_interval);
+	return index;
+}
+
+static inline int statistic_array_calc_index_log2(struct statistic *stat,
+						  s64 value)
+{
+	unsigned long long index;
+
+	for (index = 0;
+	     index < (stat->data.array.entries - 1) &&
+	     value > statistic_array_calc_value_log2(stat, index);
+	     index++);
+	return index;
+}
+
+static inline int statistic_array_calc_index(struct statistic *stat, s64 value)
+{
+	if (stat->data.array.scale == STATISTIC_DEF_SCALE_LIN)
+		return statistic_array_calc_index_lin(stat, value);
+	else
+		return statistic_array_calc_index_log2(stat, value);
+}
+
+static inline int statistic_alloc_array(struct statistic *stat)
+{
+	int i, size;
+
+	for (i = 0;
+	     statistic_array_calc_value(stat, i) <= stat->range_max;
+	     i++);
+	i++;
+	if (i < 2) {
+		printk("statistic: definition of ""%s"" as type=array failed "
+		       "because it would comprise less than 2 intervals\n",
+		       stat->name);
+		return -EINVAL;
+	}
+	stat->data.array.entries = i;
+
+	size = stat->data.array.entries * sizeof(u64);
+	stat->data.array.hits = kmalloc(size, GFP_KERNEL);
+	if (!stat->data.array.hits) {
+		printk("statistic: definition of ""%s"" as type=array failed "
+		       "due to memory constraints\n", stat->name);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void statistic_release_array(struct statistic *stat)
+{
+	kfree(stat->data.array.hits);
+	stat->data.array.hits = NULL;
+}
+
+static void statistic_reset_array(struct statistic *stat)
+{
+	int size = stat->data.array.entries * sizeof(u64);
+	memset(stat->data.array.hits, 0, size);
+}
+
+static inline int statistic_format_data_array_line(struct statistic *stat,
+					struct statistic_file_private *private,
+					int i, const char *prefix,u64 value)
+{
+	struct sgrb_seg *seg;
+
+	seg = sgrb_seg_find(&private->read_seg_lh, 256, GFP_ATOMIC);
+	if (!seg)
+		return -ENOMEM;
+
+	seg->offset += sprintf(seg->address + seg->offset,
+				"%s %s%lld %llu\n", stat->name,
+				prefix, (long long signed)value,
+				(long long unsigned)stat->data.array.hits[i]);
+	return 0;
+}
+
+static int statistic_format_data_array(struct statistic *stat,
+				       struct statistic_file_private *private)
+{
+	int i;
+	int retval;
+
+	for (i = 0; i < (stat->data.array.entries - 1); i++) {
+		retval = statistic_format_data_array_line(
+				stat, private, i, "<=",
+				statistic_array_calc_value(stat, i));
+		if (retval)
+			return retval;
+	}
+	retval = statistic_format_data_array_line(
+			stat, private, i, ">",
+			statistic_array_calc_value(stat, i - 1));
+	return retval;
+}
+
+static int statistic_format_def_array(struct statistic *stat, char *line)
+{
+	return  sprintf(line,
+			" %s%llu %s",
+			statistic_strings[STATISTIC_DEF_BASEINT],
+			(long long unsigned)stat->data.array.base_interval,
+			statistic_strings[stat->data.array.scale]);
+}
+
+static u64 statistic_add_array_lin(struct statistic *stat, s64 value, u64 incr)
+{
+	int index = statistic_array_calc_index_lin(stat, value);
+	return (stat->data.array.hits[index] += incr);
+}
+
+static u64 statistic_add_array_log2(struct statistic *stat, s64 value, u64 incr)
+{
+	int index = statistic_array_calc_index_log2(stat, value);
+	return (stat->data.array.hits[index] += incr);
+}
+
+/**
+ * statistic_define_array - instantiate statistic as fixed size array of
+ *    interval/counter pairs (histogram for intervals)
+ * @stat: statistic to be defined
+ * @range_min: lower bound of discriminators
+ * @range_max: upper bound of discriminators
+ * @base_interval: width of intervals between two discriminators (linear scale);
+ *         	   starting width of intervals (logarithmic scale, base 2)
+ * @scale: scale applied to discriminators (linear/logarithmic)
+ *
+ * Determines that the statistic maintains a counter for each interval
+ * as determined by the above parameters. These counters hold the total
+ * of the increments applicable to particular intervals. The first interval
+ * is determined by (<the smallest s64 value>, range_min). The last interval
+ * is (range_max, <the largest s64 value>). That means, this statistic
+ * discpline is capable of giving account of hits out of the specified range.
+ * Basically, the function implemented by this statistic discipline is a
+ * histogram for intervals.
+ *
+ * The output format of a fixed-size array statistic found in the "data" file
+ * is:
+ *
+ * <statistic name> "<="<discriminator 0> <total of increments for interval
+ *                                          (smallest s64, discriminator 0)>
+ * <statistic name> "<="<discriminator 1> <total of increments for interval
+ *                                          (discriminator 0, discriminator 1)>
+ * ...
+ * <statistic name> "<="<discriminator N> <total of increments for interval
+ *                                         (discriminator N-1, discriminator N)>
+ * <statistic name> ">"<discriminator N> <total of increments for interval
+ *                                         (discriminator N, largest s64)>
+ *
+ * This (re)definition function is available both to exploiting device drivers
+ * and to the user through the "definition" file. Device driver programmers
+ * might find it user-friendly to provide a default definition for a
+ * particular statistic by calling this or a related function. A previous
+ * definition is replaced by the new one. In addition, the statistic must be
+ * started in order to make it gather data. A line in the interface's "definition"
+ * file holds specifics about the named statistic.
+ *
+ * On success, 0 is returned.
+ *
+ * If some required memory could not be allocated this routine fails,
+ * the previous definition is preserved, and -ENOMEM is returned.
+ *
+ * If an invalid definition has been specified, the definition is not altered,
+ * and -EINVAL is returned.
+ */
+int statistic_define_array(struct statistic *stat, s64 range_min, s64 range_max,
+			   u32 base_interval, u8 scale)
+{
+	unsigned long flags;
+	int retval;
+	struct statistic *tmp;
+
+	if (range_min > range_max)
+		return -EINVAL;
+
+	if (scale != STATISTIC_DEF_SCALE_LIN &&
+	    scale != STATISTIC_DEF_SCALE_LOG2)
+		return -EINVAL;
+
+	tmp = kmalloc(sizeof(struct statistic), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+	memcpy(tmp, stat, sizeof(struct statistic));
+
+	tmp->type = STATISTIC_DEF_TYPE_ARRAY;
+	tmp->range_min = range_min;
+	tmp->range_max = range_max;
+	tmp->data.array.base_interval = base_interval;
+	tmp->data.array.scale = scale;
+
+	tmp->release = statistic_release_array;
+	tmp->reset = statistic_reset_array;
+	tmp->format_data = statistic_format_data_array;
+	tmp->format_def = statistic_format_def_array;
+ 	if (scale == STATISTIC_DEF_SCALE_LIN)
+		tmp->add = statistic_add_array_lin;
+ 	else
+		tmp->add = statistic_add_array_log2;
+
+	retval = statistic_alloc_array(tmp);
+	if (retval) {
+		kfree(tmp);
+		return retval;
+	}
+
+	statistic_lock(stat->interface, flags);
+	if (stat->release)
+		stat->release(stat);
+	memcpy(stat, tmp, sizeof(struct statistic));
+	statistic_reset_nolock(stat);
+ 	statistic_unlock(stat->interface, flags);
+
+	kfree(tmp);
+	return 0;
+}
+
+/* code concerned with adaptable list statistics */
+
+static void statistic_release_list(struct statistic *stat)
+{
+	struct statistic_entry_list *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &stat->data.list.entry_lh, list) {
+		list_del(&entry->list);
+		kfree(entry);
+	}
+}
+
+static void statistic_reset_list(struct statistic *stat)
+{
+	statistic_release_list(stat);
+	stat->data.list.hits_missed = 0;
+	stat->data.list.entries = 0;
+}
+
+static int statistic_format_data_list(struct statistic *stat,
+				      struct statistic_file_private *private)
+{
+	struct sgrb_seg *seg;
+	struct statistic_entry_list *entry;
+
+	list_for_each_entry(entry, &stat->data.list.entry_lh, list) {
+		seg = sgrb_seg_find(&private->read_seg_lh, 256, GFP_ATOMIC);
+		if (!seg)
+			return -ENOMEM;
+		seg->offset += sprintf(seg->address + seg->offset,
+					"%s 0x%llx %llu\n",
+					stat->name,
+					(long long signed)entry->value,
+					(long long unsigned)entry->hits);
+	}
+	return 0;
+}
+
+static int statistic_format_def_list(struct statistic *stat, char *line)
+{
+	return sprintf(line,
+			" %s%u %s%llu",
+			statistic_strings[STATISTIC_DEF_ENTRIESMAX],
+			stat->data.list.entries_max,
+			statistic_strings[STATISTIC_DEF_HITSMISSED],
+			(long long unsigned)stat->data.list.hits_missed);
+}
+
+static inline void statistic_add_list_sort(struct list_head *head,
+					   struct statistic_entry_list *entry)
+{
+	struct statistic_entry_list *sort =
+		list_prepare_entry(entry, head, list);
+
+	list_for_each_entry_continue_reverse(sort, head, list)
+		if (sort->hits >= entry->hits)
+			break;
+	if (sort->list.next != &entry->list &&
+	    (&sort->list == head || sort->hits >= entry->hits))
+		list_move(&entry->list, &sort->list);
+}
+
+static inline int statistic_add_list_new(struct statistic *stat, s64 value,
+					 u64 incr)
+{
+	struct statistic_entry_list *entry;
+
+	if (stat->data.list.entries == stat->data.list.entries_max)
+		return -ENOMEM;
+
+	entry = kmalloc(sizeof(struct statistic_entry_list), GFP_ATOMIC);
+	if (entry) {
+		entry->value = value;
+		entry->hits = incr;
+		stat->data.list.entries++;
+		list_add_tail(&entry->list, &stat->data.list.entry_lh);
+		return 0;
+	} else	return -ENOMEM;
+}
+
+static u64 statistic_add_list(struct statistic *stat, s64 value, u64 incr)
+{
+	struct statistic_entry_list *entry;
+	struct list_head *head = &stat->data.list.entry_lh;
+
+	if (value < stat->range_min || value > stat->range_max) {
+		stat->hits_out_of_range++;
+		return 0;
+	}
+
+	list_for_each_entry(entry, head, list) {
+		if (entry->value == value) {
+			entry->hits += incr;
+			statistic_add_list_sort(head, entry);
+			return entry->hits;
+		}
+	}
+	if (statistic_add_list_new(stat, value, incr)) {
+		stat->data.list.hits_missed++;
+		return 0;
+	} else
+		return incr;
+}
+
+/**
+ * statistic_define_list - instantiate statistic as adaptable-size list of
+ *    discriminator/counter pairs (histogram for discrete values)
+ * @stat: statistic to be defined
+ * @range_min: lower bound of discriminators
+ * @range_max: upper bound of discriminators
+ * @entries_max: limits the list size
+ *
+ * Determines that the statistic maintains a counter for each discrete
+ * discriminator that is reported along with increments by the exploiter.
+ * These counters hold the total of the increments applicable to particular
+ * discriminators. Hits that were out of the specified range, or that would have
+ * required list entries beyond the specified maximum, are discarded and their
+ * numbers are visible through the "definition" file.
+ * Basically, the function implemented by this statistic discipline is a
+ * histogram for discrete values. Which values make it into the histogram is
+ * determined by their order of appearance (first come, first served).
+ *
+ * The output format of an adaptable-size list statistic found in the "data"
+ * file is:
+ *
+ * <statistic name> <discriminator 0> <total of increments for discriminator 0>
+ * <statistic name> <discriminator 1> <total of increments for discriminator 1>
+ * ...
+ * <statistic name> <discriminator N> <total of increments for discriminator N>
+ *
+ * This (re)definition function is available both to exploiting device drivers
+ * and to the user through the "definition" file. Device driver programmers
+ * might find it user-friendly to provide a default definition for a
+ * particular statistic by calling this or a related function. A previous
+ * definition is replaced by the new one. In addition, the statistic must be
+ * started in order to make it gather data. A line in the interface's "definition"
+ * file holds specifics about the named statistic.
+ *
+ * This routine always succeeds and returns 0.
+ */
+int statistic_define_list(struct statistic *stat, s64 range_min, s64 range_max,
+			  u32 entries_max)
+{
+	unsigned long flags;
+
+	statistic_lock(stat->interface, flags);
+
+	if (stat->release)
+		stat->release(stat);
+
+	stat->type = STATISTIC_DEF_TYPE_LIST;
+	stat->range_min = range_min;
+	stat->range_max = range_max;
+	stat->data.list.entries_max = entries_max;
+	INIT_LIST_HEAD(&stat->data.list.entry_lh);
+
+	stat->release = statistic_release_list;
+	stat->reset = statistic_reset_list;
+	stat->format_data = statistic_format_data_list;
+	stat->format_def = statistic_format_def_list;
+	stat->add = statistic_add_list;
+
+	statistic_reset_nolock(stat);
+
+	statistic_unlock(stat->interface, flags);
+
+	return 0;
+}
+
+/* code concerned with raw, timestamped statistic events */
+
+static void statistic_release_raw(struct statistic *stat)
+{
+	sgrb_release(&stat->data.raw.rb);
+}
+
+static void statistic_reset_raw(struct statistic *stat)
+{
+	sgrb_reset(&stat->data.raw.rb);
+	stat->data.raw.next_serial = 0;
+}
+
+static int statistic_format_data_raw(struct statistic *stat,
+				     struct statistic_file_private *private)
+{
+	struct sgrb_seg *seg;
+	struct sgrb_ptr ptr;
+	struct statistic_entry_raw *entry;
+	char t[TIMESTAMP_SIZE];
+
+	sgrb_ptr_copy(&ptr, &stat->data.raw.rb.first);
+	while ((entry = sgrb_consume_nodelete(&stat->data.raw.rb, &ptr))) {
+		seg = sgrb_seg_find(&private->read_seg_lh, 256, GFP_ATOMIC);
+		if (!seg)
+			return -ENOMEM;
+		nsec_to_timestamp(t, entry->clock);
+		seg->offset += sprintf(seg->address + seg->offset,
+					"%s %s %llu %lld %llu\n",
+					stat->name, t,
+					(long long unsigned)entry->serial,
+					(long long signed)entry->value,
+					(long long unsigned)entry->incr);
+	}
+	return 0;
+}
+
+static int statistic_format_def_raw(struct statistic *stat, char *line)
+{
+	return sprintf(line,
+			" %s%u",
+			statistic_strings[STATISTIC_DEF_ENTRIESMAX],
+			stat->data.raw.entries_max);
+}
+
+static u64 statistic_add_raw(struct statistic *stat, s64 value, u64 incr)
+{
+	struct statistic_entry_raw *entry;
+
+	if (value < stat->range_min || value > stat->range_max) {
+		stat->hits_out_of_range++;
+		return 0;
+	}
+
+	entry = sgrb_produce_overwrite(&stat->data.raw.rb);
+
+	entry->clock = sched_clock();
+	entry->serial = stat->data.raw.next_serial++;
+	entry->value = value;
+	entry->incr = incr;
+
+	return incr;
+}
+
+/**
+ * statistic_define_raw - instantiate statistic as a record of incremental
+ *    updates as they have happened (history of statistic events)
+ * @stat: statistic to be defined
+ * @range_min: lower bound of discriminators
+ * @range_max: upper bound of discriminators
+ * @entries_max: maximum number of entries in ringbuffer
+ *
+ * Determines that the statistic does not maintain counters for any increments,
+ * but that it accumulates the reported updates in the form of a history of
+ * these updates. Updates out of the specified range are dropped, though their
+ * total number is readable through the "definition" file. Besides, once the
+ * allocated pages are completely occupied, new entries are written over the
+ * oldest ones (ringbuffer). Each entry is tagged with a unique serial number
+ * and a timestamp. A single entry consumes 32 bytes.
+ * This discipline might be used to relate (seldom) statistic events to other
+ * (debugging?) events, or to do calculations on raw data in user space.
+ *
+ * The output format of a "raw" statistic found in the "data" file is:
+ *
+ * <statistic name> <discriminator> <increment> <serial> <timestamp>
+ *
+ * This (re)definition function is available both to exploiting device drivers
+ * and to the user through the "definition" file. Device driver programmers
+ * might find it user-friendly to provide a default definition for a
+ * particular statistic by calling this or a related function. A previous
+ * definition is replaced by the new one. In addition, the statistic must be
+ * started in order to make it gather data. A line in the interface's "definition"
+ * file holds specifics about the named statistic.
+ *
+ * On success, 0 is returned.
+ *
+ * If some required memory could not be allocated this routine fails,
+ * the previous definition is preserved, and -ENOMEM is returned.
+ */
+int statistic_define_raw(struct statistic *stat, s64 range_min, s64 range_max,
+			 u32 entries_max)
+{
+	unsigned long flags;
+	int retval;
+	struct statistic *tmp;
+
+	tmp = kmalloc(sizeof(struct statistic), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+	memcpy(tmp, stat, sizeof(struct statistic));
+
+	tmp->type = STATISTIC_DEF_TYPE_RAW;
+	tmp->range_min = range_min;
+	tmp->range_max = range_max;
+	tmp->data.raw.entries_max = entries_max;
+
+	tmp->release = statistic_release_raw;
+	tmp->reset = statistic_reset_raw;
+	tmp->format_data = statistic_format_data_raw;
+	tmp->format_def = statistic_format_def_raw;
+	tmp->add = statistic_add_raw;
+
+	tmp->data.raw.next_serial = 0;
+	/*
+	 * sgrb_alloc initialises list head for us. We must not use pointers
+	 * to the stat's list through tmp in order to avoid list corruption.
+	 */
+	retval = sgrb_alloc(&tmp->data.raw.rb,
+			    sizeof(struct statistic_entry_raw),
+			    tmp->data.raw.entries_max, PAGE_SIZE, GFP_KERNEL);
+	if (retval) {
+		printk("statistic: definition of ""%s"" failed due to memory "
+		       "constraints\n", stat->name);
+		kfree(tmp);
+		return retval;
+	}
+
+	statistic_lock(stat->interface, flags);
+	if (stat->release)
+		stat->release(stat);
+	memcpy(stat, tmp, sizeof(struct statistic));
+	/*
+	 * The new ringbuffer's list is attached to tmp. memcpy() has left
+	 * some loose ends pointing to the soon to be kfree()-ed tmp.
+	 * So we need to migrate the list from tmp to stat here,
+	 * even if this looks like nasty poking inside ringbuffers.
+	 */
+	INIT_LIST_HEAD(&stat->data.raw.rb.seg_lh);
+	list_splice(&tmp->data.raw.rb.seg_lh, &stat->data.raw.rb.seg_lh);
+	statistic_reset_nolock(stat);
+ 	statistic_unlock(stat->interface, flags);
+
+	kfree(tmp);
+	return 0;
+}
+
+/* code concerned with history statistics */
+
+static void statistic_release_history(struct statistic *stat)
+{
+	sgrb_release(&stat->data.history.rb);
+}
+
+static void statistic_reset_history(struct statistic *stat)
+{
+	sgrb_reset(&stat->data.history.rb);
+	stat->data.history.checkpoint = sched_clock();
+}
+
+static inline void * statistic_add_history_entry(struct statistic *stat)
+{
+	u64 now, then, window, period, checkp, elapsed;
+	void *entry = NULL;
+
+	window = stat->data.history.window;
+	period = stat->data.history.period;
+	checkp = stat->data.history.checkpoint;
+
+	now = sched_clock();
+	if (now <= checkp)
+		entry = sgrb_entry(&stat->data.history.rb.last);
+	else	{
+		then = checkp;
+		elapsed = now - then;
+		/*
+		 * FIXME: replace loops by formula for supposedly
+		 * improved performance
+		 *  - would require something like do_div64_64()
+		 */
+#if 0
+		if (elapsed > window)
+			then = (now - window) + (period - elapsed % period);
+#endif
+
+		if (elapsed > window) {
+			for (; then < now - 2 * window; then += window);
+			for (; then < now - window; then += period);
+		}
+		for (; then < now; then += period) {
+			entry = sgrb_produce_overwrite(&stat->data.history.rb);
+			memset(entry, 0, stat->data.history.rb.entry_size);
+		}
+		if (stat->data.history.mode == STATISTIC_DEF_MODE_RANGE)
+			statistic_init_range(
+				(struct statistic_entry_range *)entry,
+				stat->range_min,
+				stat->range_max);
+		stat->data.history.checkpoint = then;
+	}
+	return entry;
+}
+
+static inline int statistic_format_data_history(struct statistic *stat,
+					struct statistic_file_private *private)
+{
+	struct sgrb_seg *seg;
+	struct sgrb_ptr ptr;
+	void *entry;
+	u64 time;
+	char t[TIMESTAMP_SIZE + 1];
+
+	/* enforce update for time of inactivity */
+	statistic_add_history_entry(stat);
+	time = stat->data.history.checkpoint -
+	       stat->data.history.period *
+			(stat->data.history.rb.entries - 1);
+	sgrb_ptr_copy(&ptr, &stat->data.history.rb.first);
+	while ((entry = sgrb_consume_nodelete(&stat->data.history.rb, &ptr))) {
+		seg = sgrb_seg_find(&private->read_seg_lh, 256, GFP_ATOMIC);
+		if (!seg)
+			return -ENOMEM;
+		t[0] = ' ';
+		nsec_to_timestamp(&t[1], time);
+		switch (stat->data.history.mode) {
+		case STATISTIC_DEF_MODE_INC :
+		case STATISTIC_DEF_MODE_PROD :
+			_statistic_format_data_value(
+				stat, seg, t, *(u64*)entry);
+			break;
+		case STATISTIC_DEF_MODE_RANGE :
+			_statistic_format_data_range(
+				stat, seg, t,
+				(struct statistic_entry_range *)entry);
+			break;
+		default :
+			break;
+		}
+		time += stat->data.history.period;
+	}
+	return 0;
+}
+
+static inline int statistic_format_def_history(struct statistic *stat,
+					       char *line)
+{
+	unsigned long long period = stat->data.history.period;
+
+	do_div(period, 1000);
+	return sprintf(line,
+			" %s%u %s %s%llu",
+			statistic_strings[STATISTIC_DEF_ENTRIESMAX],
+			stat->data.history.entries_max,
+	    		statistic_strings[stat->data.history.mode],
+			statistic_strings[STATISTIC_DEF_PERIOD],
+			period);
+}
+
+static u64 statistic_add_history_increments(struct statistic *stat, s64 value,
+					    u64 incr)
+{
+	if (value < stat->range_min || value > stat->range_max) {
+		stat->hits_out_of_range++;
+		return 0;
+	}
+	return _statistic_add_value_increments(
+			(s64*) statistic_add_history_entry(stat), value, incr);
+}
+
+static u64 statistic_add_history_products(struct statistic *stat, s64 value,
+					  u64 incr)
+{
+	if (value < stat->range_min || value > stat->range_max) {
+		stat->hits_out_of_range++;
+		return 0;
+	}
+	return _statistic_add_value_products(
+			(s64*) statistic_add_history_entry(stat), value, incr);
+}
+
+static u64 statistic_add_history_range(struct statistic *stat, s64 value,
+				       u64 incr)
+{
+	if (value < stat->range_min || value > stat->range_max) {
+		stat->hits_out_of_range++;
+		return 0;
+	}
+	return _statistic_add_range(
+			(struct statistic_entry_range *)
+				statistic_add_history_entry(stat),
+			value, incr);
+}
+
+/**
+ * statistic_define_history - instantiate statistic as a history
+ *    that accumulates all updates per defined period of time
+ * @stat: statistic to be defined
+ * @range_min: lower bound of discriminators
+ * @range_max: upper bound of discriminators
+ * @entries_max: number of entries in the history buffer
+ * @period: time to elapse for each entry in history
+ * @mode: accumulate increments only, or products of discriminator and increment
+ *
+ * Determines that the statistic does maintain data per fixed period
+ * of time. All updates within a particular period of time are added up. When
+ * that period has passed, the next entry in the history buffer is used to start
+ * over with accumulation. The history buffer is a ringbuffer. That is, the
+ * oldest entry is replaced by the newest, if the history buffer has been filled
+ * up. Updates out of the specified range are dropped, though their total number
+ * is readable through the "definition" file. Each entry is tagged with a
+ * timestamp. Each entry consumes 8 bytes (mode=increments, mode=products)
+ * or 32 bytes (mode=range).
+ * This statistic discipline basically is other basic discplines enhanced by
+ * another dimension, time.
+ *
+ * Depending on the definition, accumulation is done as
+ *
+ * a) mode=increments	: see statistic_define_value()
+ * b) mode=products	: see statistic_define_value()
+ * c) mode=range	: see statistic_define_range()
+ *
+ * The output format of a "history" statistic found in the "data" file is
+ * similar to those documented for the underlying basic types (see mode
+ * parameter), with the exception of a timestamp inserted after the
+ * <statistic name> field that allows to identify an entries of particular
+ * age (timestamp marks the end of periods).
+ *
+ * This (re)definition function is available both to exploiting device drivers
+ * and to the user through the "definition" file. Device driver programmers
+ * might find it user-friendly to provide a default definition for a
+ * particular statistic by calling this or a related function. A previous
+ * definition is replaced by the new one. In addition, the statistic must be
+ * started in order to make it gather data. A line in the interface's "definition"
+ * file holds specifics about the named statistic.
+ *
+ * On success, 0 is returned.
+ *
+ * If some required memory could not be allocated this routine fails,
+ * the previous definition is preserved, and -ENOMEM is returned.
+ *
+ * If an invalid definition has been specified, the definition is not altered,
+ * and -EINVAL is returned.
+ */
+int statistic_define_history(struct statistic *stat, s64 range_min,
+		     s64 range_max, u32 entries_max, u64 period, int mode)
+{
+	unsigned long flags;
+	int retval;
+	struct statistic *tmp;
+	int entry_size;
+
+ 	if (mode != STATISTIC_DEF_MODE_INC &&
+ 	    mode != STATISTIC_DEF_MODE_PROD &&
+ 	    mode != STATISTIC_DEF_MODE_RANGE)
+ 		return -EINVAL;
+
+	tmp = kmalloc(sizeof(struct statistic), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+	memcpy(tmp, stat, sizeof(struct statistic));
+
+ 	period *= 1000;	/* microseconds to nanoseconds */
+
+	tmp->type = STATISTIC_DEF_TYPE_HISTORY;
+	tmp->range_min = range_min;
+	tmp->range_max = range_max;
+	tmp->data.history.entries_max = entries_max;
+	tmp->data.history.mode = mode;
+	tmp->data.history.period = period;
+	tmp->data.history.window = entries_max * period;
+
+	tmp->release = statistic_release_history;
+	tmp->reset = statistic_reset_history;
+	tmp->format_data = statistic_format_data_history;
+	tmp->format_def = statistic_format_def_history;
+ 	if (mode == STATISTIC_DEF_MODE_INC)
+		tmp->add = statistic_add_history_increments;
+ 	else if (mode == STATISTIC_DEF_MODE_PROD)
+		tmp->add = statistic_add_history_products;
+ 	else
+		tmp->add = statistic_add_history_range;
+
+	entry_size = (STATISTIC_DEF_MODE_RANGE ?
+		      sizeof(struct statistic_entry_range) :
+		      sizeof(s64));
+	/*
+	 * sgrb_alloc initialises list head for us. We must not use pointers
+	 * to the stat's list through tmp in order to avoid list corruption.
+	 */
+	retval = sgrb_alloc(&tmp->data.history.rb, entry_size,
+			    tmp->data.history.entries_max,
+			    PAGE_SIZE, GFP_KERNEL);
+	if (retval) {
+		printk("statistic: definition of ""%s"" failed due to memory "
+		       "constraints\n", stat->name);
+		kfree(tmp);
+		return retval;
+	}
+
+	statistic_lock(stat->interface, flags);
+	if (stat->release)
+		stat->release(stat);
+	memcpy(stat, tmp, sizeof(struct statistic));
+	/*
+	 * The new ringbuffer's list is attached to tmp. memcpy() has left
+	 * some loose ends pointing to the soon to be kfree()-ed tmp.
+	 * So we need to migrate the list from tmp to stat here,
+	 * even if this looks like nasty poking inside ringbuffers.
+	 */
+	INIT_LIST_HEAD(&stat->data.history.rb.seg_lh);
+	list_splice(&tmp->data.history.rb.seg_lh,
+		    &stat->data.history.rb.seg_lh);
+	statistic_reset_nolock(stat);
+ 	statistic_unlock(stat->interface, flags);
+
+	kfree(tmp);
+	return 0;
+}
+
+/* code concerned with user interface */
+
+/**
+ * statistic_interface_create - create debugfs files for a collection of
+ *    related statistics
+ * @interface_ptr: reference to struct statistic_interface pointer
+ * @name: name of debugfs directory to be created
+ *
+ * Create a debugfs directory in "statistics" as well as the "data" and
+ * "definition" files. Creating this user interface is prequisite for
+ * attaching statistics to an interface.
+ *
+ * On success, 0 is returned and the struct statistic_interface pointer
+ * provided by the caller points to a newly allocated struct.
+ *
+ * If the struct statistic_interface pointer provided by the caller
+ * is not NULL (used), this routine fails, the struct statistic_interface
+ * pointer is not changed, and -EINVAL is returned.
+ *
+ * If some required memory could not be allocated, or the creation
+ * of debugfs entries failed, this routine fails, the struct
+ * statistic_interface pointer is not changed, and -ENOMEM is returned.
+ */
+int statistic_interface_create(struct statistic_interface **interface_ptr,
+			       const char *name)
+{
+	struct statistic_interface *interface;
+	int retval;
+
+	if (*interface_ptr)
+		return -EINVAL;
+
+	interface = kmalloc(sizeof(struct statistic_interface), GFP_KERNEL);
+	if (!interface)
+		return -ENOMEM;
+	memset(interface, 0, sizeof(struct statistic_interface));
+	INIT_LIST_HEAD(&interface->statistic_lh);
+	spin_lock_init(&interface->lock);
+
+	down(&statistic_globals.sem);
+
+	interface->debugfs_dir = debugfs_create_dir(
+					name, statistic_globals.root_dir);
+	if (!interface->debugfs_dir) {
+		retval = -ENOMEM;
+		goto failed_dir;
+	}
+
+	interface->data_file = debugfs_create_file(
+					STATISTIC_FILENAME_DATA,
+					S_IFREG | S_IRUSR,
+					interface->debugfs_dir,
+					(void*) interface,
+					&statistic_data_file_ops);
+	if (!interface->data_file) {
+		retval = -ENOMEM;
+		goto failed_data;
+	}
+
+	interface->def_file = debugfs_create_file(
+					STATISTIC_FILENAME_DEF,
+					S_IFREG | S_IRUSR | S_IWUSR,
+					interface->debugfs_dir,
+					(void*) interface,
+					&statistic_def_file_ops);
+	if (!interface->def_file) {
+		retval = -ENOMEM;
+		goto failed_def;
+	}
+
+	list_add_tail(&interface->list, &statistic_globals.interface_lh);
+	*interface_ptr = interface;
+	retval = 0;
+	goto out;
+
+failed_def:
+	debugfs_remove(interface->data_file);
+
+failed_data:
+failed_dir:
+	kfree(interface);
+	interface = NULL;
+
+out:
+	up(&statistic_globals.sem);
+
+	return retval;
+}
+
+/**
+ * statistic_interface_remove - remove debugfs files for statistics
+ * @interface_ptr: reference to struct statistic_interface pointer
+ *
+ * Remove a debugfs directory in "statistics" along with its "data" and
+ * "definition" files. Removing this user interface also causes the removal
+ * of all statistics attached to the interface.
+ *
+ * On success, 0 is returned and the struct statistic_interface pointer
+ * provided by the caller is set to NULL.
+ *
+ * If the struct statistic_interface pointer provided by the caller
+ * is NULL (unused), this routine fails, the struct statistic_interface
+ * pointer is not changed, and -EINVAL is returned.
+ */
+int statistic_interface_remove(struct statistic_interface **interface_ptr)
+{
+	struct statistic_interface *interface = *interface_ptr;
+	struct statistic *stat, *tmp;
+
+	if (!interface)
+		return -EINVAL;
+
+	down(&statistic_globals.sem);
+
+	list_for_each_entry_safe(stat, tmp, &interface->statistic_lh, list)
+		statistic_remove(stat->stat_ptr);
+
+	debugfs_remove(interface->data_file);
+	debugfs_remove(interface->def_file);
+	debugfs_remove(interface->debugfs_dir);
+
+	list_del(&interface->list);
+	kfree(interface);
+	*interface_ptr = NULL;
+
+	up(&statistic_globals.sem);
+
+	return 0;
+}
+
+static int statistic_interface_generic_open(struct inode *inode,
+		struct file *file, struct statistic_interface **interface,
+		struct statistic_file_private **private)
+{
+	*interface = (struct statistic_interface *) inode->u.generic_ip;
+	BUG_ON(!interface);
+
+	*private = kmalloc(sizeof(struct statistic_file_private), GFP_KERNEL);
+	if (!(*private))
+		return -ENOMEM;
+
+	memset(*private, 0, sizeof(struct statistic_file_private));
+	INIT_LIST_HEAD(&(*private)->read_seg_lh);
+	INIT_LIST_HEAD(&(*private)->write_seg_lh);
+	file->private_data = *private;
+	return 0;
+}
+
+static int statistic_interface_generic_close(struct inode *inode,
+					     struct file *file)
+{
+	struct statistic_file_private *private;
+
+	private = (struct statistic_file_private *) file->private_data;
+	BUG_ON(!private);
+
+	sgrb_seg_release_all(&private->read_seg_lh);
+	sgrb_seg_release_all(&private->write_seg_lh);
+
+	kfree(private);
+	return 0;
+}
+
+static ssize_t statistic_interface_generic_read(struct file *file,
+				char __user *buf, size_t len, loff_t *offset)
+{
+	struct statistic_file_private *private;
+	struct sgrb_seg *seg;
+	size_t seg_offset, seg_residual, seg_transfer;
+	size_t transfered = 0;
+	loff_t pos = 0;
+
+	private = (struct statistic_file_private *) file->private_data;
+	BUG_ON(!private);
+
+	list_for_each_entry(seg, &private->read_seg_lh, list) {
+		if (!len)
+			break;
+		if (*offset >= pos  &&
+		    *offset <= (pos + seg->offset)) {
+			seg_offset = *offset - pos;
+			seg_residual = seg->offset - seg_offset;
+			seg_transfer = min(len, seg_residual);
+			if (copy_to_user(buf + transfered,
+					 seg->address + seg_offset,
+					 seg_transfer))
+				return -EFAULT;
+			transfered += seg_transfer;
+			*offset += seg_transfer;
+			pos += seg_transfer + seg_offset;
+			len -= seg_transfer;
+		} else
+			pos += seg->offset;
+	}
+	return transfered;
+}
+
+static ssize_t statistic_interface_generic_write(struct file *file,
+			const char __user *buf, size_t len, loff_t *offset)
+{
+	struct statistic_file_private *private;
+	struct sgrb_seg *seg;
+	size_t seg_residual, seg_transfer;
+	size_t transfered = 0;
+
+	private = (struct statistic_file_private *) file->private_data;
+	BUG_ON(!private);
+
+	if (*offset != private->write_seg_total_size)
+		return -EPIPE;
+
+	while (len) {
+		seg = sgrb_seg_find(&private->write_seg_lh, 1, GFP_KERNEL);
+		if (!seg)
+			return -ENOMEM;
+		seg_residual = seg->size - seg->offset;
+		seg_transfer = min(len, seg_residual);
+		if (copy_from_user(seg->address + seg->offset,
+				   buf + transfered,
+				   seg_transfer))
+			return -EFAULT;
+		private->write_seg_total_size += seg_transfer;
+		seg->offset += seg_transfer;
+		transfered += seg_transfer;
+		*offset += seg_transfer;
+		len -= seg_transfer;
+	}
+	return transfered;
+}
+
+static int statistic_interface_def_open(struct inode *inode, struct file *file)
+{
+	struct statistic_interface *interface;
+	struct statistic_file_private *private;
+	struct statistic *stat;
+	unsigned long flags;
+	int retval = 0;
+
+	retval = statistic_interface_generic_open(
+			inode, file, &interface, &private);
+	if (retval)
+		return retval;
+
+	statistic_lock(interface, flags);
+	list_for_each_entry(stat, &interface->statistic_lh, list) {
+		retval = statistic_format_def(stat, private);
+		if (retval) {
+			statistic_interface_def_close(inode, file);
+			break;
+		}
+	}
+	statistic_unlock(interface, flags);
+	return retval;
+}
+
+static int statistic_parse_definitions_line(
+			struct statistic_interface *interface, char *def)
+{
+	struct statistic *stat;
+	char * p;
+	substring_t args[MAX_OPT_ARGS];
+	int token, type = 0, scale = 0;
+	u32 entries_max, base_interval;
+	s64 range_min, range_max;
+	u64 period = 0;
+	int mode = 0;
+	char *name = NULL;
+	int retval = 0;
+	unsigned long flags;
+	int got_type = 0, got_scale = 0, got_entries_max = 0,
+	    got_range_min = 0, got_range_max = 0,
+	    got_base_interval = 0, got_period = 0, got_mode = 0, got_reset = 0,
+	    got_on = 0, got_off = 0, redefinition = 0, new_type = 0,
+	    was_on = 0;
+
+	if (!def)
+		return 1;
+
+	while ((p = strsep (&def, " ")) != NULL) {
+		token = match_token(p, statistic_def, args);
+		switch (token) {
+		case STATISTIC_DEF_NAME :
+			if (!name)
+				name = match_strdup(&args[0]);
+			break;
+		case STATISTIC_DEF_TYPE_VALUE :
+		case STATISTIC_DEF_TYPE_RANGE :
+		case STATISTIC_DEF_TYPE_ARRAY :
+		case STATISTIC_DEF_TYPE_LIST :
+		case STATISTIC_DEF_TYPE_RAW :
+		case STATISTIC_DEF_TYPE_HISTORY :
+			type = token;
+			got_type = 1;
+			break;
+		case STATISTIC_DEF_ON :
+			got_on = 1;
+			break;
+		case STATISTIC_DEF_OFF :
+			got_off = 1;
+			break;
+		case STATISTIC_DEF_SCALE_LIN :
+		case STATISTIC_DEF_SCALE_LOG2 :
+			scale = token;
+			got_scale = 1;
+			break;
+		case STATISTIC_DEF_ENTRIESMAX :
+			match_int(&args[0], &entries_max);
+			got_entries_max = 1;
+			break;
+		case STATISTIC_DEF_RANGEMIN :
+			match_s64(&args[0], &range_min, 0);
+			got_range_min = 1;
+			break;
+		case STATISTIC_DEF_RANGEMAX :
+			match_s64(&args[0], &range_max, 0);
+			got_range_max = 1;
+			break;
+		case STATISTIC_DEF_BASEINT :
+			match_int(&args[0], &base_interval);
+			got_base_interval = 1;
+			break;
+		case STATISTIC_DEF_PERIOD :
+			match_u64(&args[0], &period, 0);
+			got_period = 1;
+			break;
+		case STATISTIC_DEF_MODE_INC :
+		case STATISTIC_DEF_MODE_PROD :
+		case STATISTIC_DEF_MODE_RANGE :
+			mode = token;
+			got_mode = 1;
+			break;
+		case STATISTIC_DEF_RESET :
+			got_reset = 1;
+			break;
+		default :
+			break;
+		}
+	}
+
+	redefinition =  got_type | got_mode | got_scale | got_entries_max |
+			got_range_min | got_range_max |
+			got_base_interval | got_period;
+
+	if (!name) {
+		if (redefinition)
+			goto out;
+		statistic_lock(interface, flags);
+		if (got_on)
+			list_for_each_entry(stat, &interface->statistic_lh, list)
+				statistic_start_nolock(stat);
+		if (got_off)
+			list_for_each_entry(stat, &interface->statistic_lh, list)
+				statistic_stop_nolock(stat);
+		if (got_reset) {
+			list_for_each_entry(stat, &interface->statistic_lh, list)
+				statistic_reset_nolock(stat);
+		}
+		statistic_unlock(interface, flags);
+		goto out;
+	}
+
+	statistic_lock(interface, flags);
+	list_for_each_entry(stat, &interface->statistic_lh, list) {
+		if (!strcmp(stat->name, name))
+			break;
+	}
+	statistic_unlock(interface, flags);
+	if (strcmp(stat->name, name))
+		goto out;
+
+	if (!redefinition) {
+		if (got_on)
+			statistic_start(stat);
+		if (got_off)
+			statistic_stop(stat);
+		if (got_reset)
+			statistic_reset(stat);
+		goto out;
+	}
+
+	if (statistic_stop(stat) == STATISTIC_DEF_ON)
+		was_on = 1;
+
+	if (!got_type)
+		type = stat->type;
+	else if (type != stat->type)
+		new_type = 1;
+
+	if (!got_range_min)
+		range_min = stat->range_min;
+	if (!got_range_max)
+		range_max = stat->range_max;
+
+        switch (type) {
+        case STATISTIC_DEF_TYPE_VALUE :
+		if (new_type && !got_mode) {
+			retval = -EINVAL;
+			break;
+		}
+		if (!got_mode)
+			mode = stat->data.value.mode;
+		retval = statistic_define_value(
+				stat, range_min, range_max, mode);
+		break;
+        case STATISTIC_DEF_TYPE_RANGE :
+		retval = statistic_define_range(
+				stat, range_min, range_max);
+		break;
+        case STATISTIC_DEF_TYPE_ARRAY :
+		if (new_type && (!got_base_interval || !got_scale)) {
+			retval = -EINVAL;
+			break;
+		}
+		if (!got_base_interval)
+			base_interval = stat->data.array.base_interval;
+		if (!got_scale)
+			scale = stat->data.array.scale;
+		retval = statistic_define_array(
+				stat, range_min, range_max, base_interval, scale);
+		break;
+        case STATISTIC_DEF_TYPE_LIST :
+		if (new_type && !got_entries_max) {
+			retval = -EINVAL;
+			break;
+		}
+		if (!got_entries_max)
+			entries_max = stat->data.list.entries_max;
+		retval = statistic_define_list(
+				stat, range_min, range_max, entries_max);
+		break;
+        case STATISTIC_DEF_TYPE_RAW :
+		if (new_type && !got_entries_max) {
+			retval = -EINVAL;
+			break;
+		}
+		if (!got_entries_max)
+			entries_max = stat->data.raw.entries_max;
+		retval = statistic_define_raw(
+				stat, range_min, range_max, entries_max);
+		break;
+        case STATISTIC_DEF_TYPE_HISTORY :
+		if (new_type && (!got_entries_max || !got_period ||
+				 !got_mode)) {
+			retval = -EINVAL;
+			break;
+		}
+		if (!got_entries_max)
+			entries_max = stat->data.history.entries_max;
+		if (!got_period)
+			period = stat->data.history.period;
+		if (!got_mode)
+			mode = stat->data.history.mode;
+		retval = statistic_define_history(
+				stat, range_min, range_max, entries_max,
+				period, mode);
+		break;
+        default :
+                retval = -EINVAL;
+        }
+
+	if (got_on || was_on)
+		statistic_start(stat);
+
+out:
+	kfree(name);
+	return retval;
+}
+
+static void statistic_interface_def_close_parse(
+				struct statistic_interface *interface,
+				struct list_head *line_lh, size_t *line_size)
+{
+	struct sgrb_seg *seg, *tmp;
+	char *buf;
+	int offset = 0;
+
+	if (!*line_size)
+		return;
+
+	buf = kmalloc(*line_size + 2, GFP_KERNEL);
+	if (!buf)
+		/*
+		 * FIXME:
+		 * Does it make sense to indicate -ENOMEM on close()?
+		 * Should we omit other new settings because we could not
+		 * process this line of definitions?
+		 */
+		return;
+	buf[*line_size] = ' ';
+	buf[*line_size + 1] = '\0';
+	*line_size = 0;
+
+	list_for_each_entry_safe(seg, tmp, line_lh, list) {
+		memcpy(buf + offset, seg->address, seg->size);
+		offset += seg->size;
+		list_del(&seg->list);
+		kfree(seg);
+	}
+
+	statistic_parse_definitions_line(interface, buf);
+
+	kfree(buf);
+}
+
+static int statistic_interface_def_close(struct inode *inode, struct file *file)
+{
+	struct statistic_interface *interface;
+	struct statistic_file_private *private;
+	struct sgrb_seg *seg, *seg_nl;
+	int offset;
+	struct list_head line_lh;
+	char *nl;
+	size_t line_size = 0;
+
+	INIT_LIST_HEAD(&line_lh);
+	interface = (struct statistic_interface *) inode->u.generic_ip;
+	private = (struct statistic_file_private *) file->private_data;
+
+	list_for_each_entry(seg, &private->write_seg_lh, list) {
+		for (offset = 0; offset < seg->offset; offset += seg_nl->size) {
+			seg_nl = kmalloc(sizeof(struct sgrb_seg), GFP_KERNEL);
+			if (!seg_nl)
+				/*
+				 * FIXME:
+				 * Should we omit other new settings because we
+				 * could not process this line of definitions?
+				 */
+				continue;
+			seg_nl->address = seg->address + offset;
+			nl = strnchr(seg_nl->address,
+				     seg->offset - offset, '\n');
+			if (nl) {
+				seg_nl->offset = nl - seg_nl->address;
+				if (seg_nl->offset)
+					seg_nl->offset--;
+			} else
+				seg_nl->offset = seg->offset - offset;
+			seg_nl->size = seg_nl->offset + 1;
+			line_size += seg_nl->size;
+			list_add_tail(&seg_nl->list, &line_lh);
+			if (nl)
+				statistic_interface_def_close_parse(
+					interface, &line_lh, &line_size);
+		}
+	}
+	if (!list_empty(&line_lh))
+		statistic_interface_def_close_parse(
+			interface, &line_lh, &line_size);
+
+	return statistic_interface_generic_close(inode, file);
+}
+
+static int statistic_interface_data_open(struct inode *inode, struct file *file)
+{
+	struct statistic_interface *interface;
+	struct statistic_file_private *private;
+	struct statistic *stat;
+	unsigned long flags;
+	int retval = 0;
+
+	retval = statistic_interface_generic_open(
+			inode, file, &interface, &private);
+	if (retval)
+		return retval;
+
+	statistic_lock(interface, flags);
+	list_for_each_entry(stat, &interface->statistic_lh, list) {
+		if (stat->format_data) {
+			retval = stat->format_data(stat, private);
+			if (retval) {
+				statistic_interface_generic_close(inode, file);
+				break;
+			}
+		}
+	}
+	statistic_unlock(interface, flags);
+
+	return retval;
+}
+
+postcore_initcall(statistic_init);
+module_exit(statistic_exit);
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL_GPL(statistic_interface_create);
+EXPORT_SYMBOL_GPL(statistic_interface_remove);
+EXPORT_SYMBOL_GPL(statistic_create);
+EXPORT_SYMBOL_GPL(statistic_remove);
+EXPORT_SYMBOL_GPL(statistic_define_value);
+EXPORT_SYMBOL_GPL(statistic_define_range);
+EXPORT_SYMBOL_GPL(statistic_define_array);
+EXPORT_SYMBOL_GPL(statistic_define_list);
+EXPORT_SYMBOL_GPL(statistic_define_raw);
+EXPORT_SYMBOL_GPL(statistic_define_history);
+EXPORT_SYMBOL_GPL(statistic_start);
+EXPORT_SYMBOL_GPL(statistic_stop);
+EXPORT_SYMBOL_GPL(statistic_reset);
diff -Nurp e/MAINTAINERS f/MAINTAINERS
--- e/MAINTAINERS	2005-12-14 12:51:52.000000000 +0100
+++ f/MAINTAINERS	2005-12-14 14:22:55.000000000 +0100
@@ -2464,6 +2464,13 @@ STARMODE RADIO IP (STRIP) PROTOCOL DRIVE
  W:	http://mosquitonet.Stanford.EDU/strip.html
  S:	Unsupported ?

+STATISTICS INFRASTRUCTURE
+P:	Martin Peschke
+M:	mp3@de.ibm.com
+M:	linux390@de.ibm.com
+W:	http://oss.software.ibm.com/developerworks/opensource/linux390
+S:	Supported
+
  STRADIS MPEG-2 DECODER DRIVER
  P:	Nathan Laredo
  M:	laredo@gnu.org

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2006-05-30 19:15 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-05-24 12:33 [Patch 5/6] statistics infrastructure Martin Peschke
2006-05-24 22:57 ` Andrew Morton
2006-05-29 22:17   ` Martin Peschke
2006-05-30  1:17     ` Andrew Morton
2006-05-29 22:17   ` [Patch] statistics infrastructure - update 1 Martin Peschke
2006-05-30  8:07     ` Heiko Carstens
2006-05-30 11:22       ` Martin Peschke
2006-05-30 17:17   ` [Patch 5/6] statistics infrastructure Martin Peschke
2006-05-30 19:19     ` Andrew Morton
2006-05-25  8:05 ` Nikita Danilov
2006-05-30 11:35   ` Martin Peschke
  -- strict thread matches above, loose matches on Subject: below --
2006-05-19 16:13 Martin Peschke
2005-12-16 12:27 [patch " Martin Peschke
2005-12-14 16:46 Martin Peschke
2005-12-14 18:38 ` Andi Kleen
2005-12-16  1:00   ` Martin Peschke

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox