All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Renato S. Yamane" <renatoyamane@mandic.com.br>
To: Thomas Gleixner <tglx@linutronix.de>
Cc: "Renato S. Yamane" <renatoyamane@mandic.com.br>,
	linux-kernel@vger.kernel.org, alan-jenkins@tuffmail.co.uk,
	devzero@web.de, mingo@elte.hu
Subject: Re: Kernel Linux 2.6.23.16 hangs when run updatedb
Date: Tue, 11 Mar 2008 11:33:47 -0300	[thread overview]
Message-ID: <47D6984B.5040108@mandic.com.br> (raw)
In-Reply-To: <alpine.LFD.1.00.0803110803280.3781@apollo.tec.linutronix.de>

[-- Attachment #1: Type: text/plain, Size: 1068 bytes --]

Hi Thomas,


Thomas Gleixner wrote:
> Renato, some questions:
> 1) is this fully reproducible with updatedb ?

Yes, this crash is fully reproducible.
All time wich I turn-on my laptopt I need kill find daemon in my Debian 
Etch. If I don't do that, my laptop hangs.

> 2) are you sure that this is the first stacktrace you captured, there
> might be some BUG before that which scrolled out of sight. Any chance
> to use a serial console?

I can't use scroll, because none key/mouse work after crash.
And, sorry I can't use a serial console. Any other idea?

> 3) Can you please recompile the kernel with CONFIF_DEBUG_INFO set
> and then run the following addresses from the backtrace through
> addr2line with the new vmlinux:
> # addr2line -e vmlinux 0xc013dad9 0xc0107c3b

Yes, I try compile 2.6.24.3 without any patch.

> 4) Looking at your .config it seems you have some more patches applied
> aside of the .16 stable. Can you please upload a full patch queue
> somewhere ?

I attached all patchs used in my 2.6.23.16.

Regards,
Renato S. Yamane


================

[-- Attachment #2: enable-4k-stacks-default-2.6.23.patch --]
[-- Type: text/x-diff, Size: 352 bytes --]

--- linux-2.6.20.orig/arch/i386/Kconfig.debug 
+++ linux-2.6.20/arch/i386/Kconfig.debug 
@@ -59,6 +59,7 @@
 config 4KSTACKS
 	bool "Use 4Kb for kernel stacks instead of 8Kb"
 	depends on DEBUG_KERNEL
+	default y
 	help
 	  If you say Y here the kernel will use a 4Kb stacksize for the
 	  kernel stack attached to each process/thread. This facilitates

[-- Attachment #3: genetic-cfq-sched-2.6.23.patch --]
[-- Type: text/x-diff, Size: 14766 bytes --]

Genetic I/O CFQ scheduler support.

Signed-off-by: Miguel Boton <mboton@gmail.com>

Index: linux.2.6.23/block/cfq-iosched.c
===================================================================
--- linux.2.6.23.orig/block/cfq-iosched.c
+++ linux.2.6.23/block/cfq-iosched.c
@@ -11,25 +11,45 @@
 #include <linux/elevator.h>
 #include <linux/rbtree.h>
 #include <linux/ioprio.h>
+#include <linux/genetic.h>
+#include <linux/random.h>
+#include <linux/debugfs.h>
 
 /*
  * tunables
  */
-static const int cfq_quantum = 4;		/* max queue in one round of service */
-static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
-static const int cfq_back_max = 16 * 1024;	/* maximum backwards seek, in KiB */
-static const int cfq_back_penalty = 2;		/* penalty of a backwards seek */
-
-static const int cfq_slice_sync = HZ / 10;
-static int cfq_slice_async = HZ / 25;
-static const int cfq_slice_async_rq = 2;
-static int cfq_slice_idle = HZ / 125;
+#define CFQ_QUANTUM		4
+#define CFQ_FIFO_EXPIRE_ASYNC	HZ / 4
+#define CFQ_FIFO_EXPIRE_SYNC	HZ / 8
+#define CFQ_BACK_MAX		16 * 1024
+#define CFQ_BACK_PENALTY	2
+
+#define CFQ_SLICE_SYNC		HZ / 10
+#define CFQ_SLICE_ASYNC		HZ / 25
+#define CFQ_SLICE_ASYNC_RQ	2
+#define CFQ_SLICE_IDLE		HZ / 125
+
+/* max queue in one round of service */
+static int cfq_quantum		= CFQ_QUANTUM;
+static int cfq_fifo_expire[2]	=
+		{ CFQ_FIFO_EXPIRE_ASYNC, CFQ_FIFO_EXPIRE_SYNC };
+/* maximum backwards seek, in KiB */
+static int cfq_back_max		= CFQ_BACK_MAX;
+/* penalty of a backwards seek */
+static int cfq_back_penalty	= CFQ_BACK_PENALTY;
+
+static int cfq_slice_sync	= CFQ_SLICE_SYNC;
+static int cfq_slice_async	= CFQ_SLICE_ASYNC;
+static int cfq_slice_async_rq	= CFQ_SLICE_ASYNC_RQ;
+static int cfq_slice_idle	= CFQ_SLICE_IDLE;
 
 /*
  * grace period before allowing idle class to get disk access
  */
 #define CFQ_IDLE_GRACE		(HZ / 10)
 
+static int cfq_idle_grace	= CFQ_IDLE_GRACE;
+
 /*
  * below this threshold, we consider thinktime immediate
  */
@@ -115,6 +131,10 @@
 	unsigned int cfq_slice_idle;
 
 	struct list_head cic_list;
+
+#ifdef CONFIG_GENETIC_IOSCHED_CFQ
+	struct list_head data_list;
+#endif
 };
 
 /*
@@ -197,6 +217,126 @@
 CFQ_CFQQ_FNS(sync);
 #undef CFQ_CFQQ_FNS
 
+#ifdef CONFIG_GENETIC_IOSCHED_CFQ
+
+struct disk_stats_snapshot *cfq_stats_snapshot;
+
+extern void disk_stats_snapshot(phenotype_t *pt);
+#ifdef CONFIG_FINGERPRINTING
+extern void disk_get_fingerprint(phenotype_t *pt);
+extern void disk_update_fingerprint(phenotype_t *pt);
+extern void *cfq_create_genes(phenotype_t *pt);
+#endif
+
+static void cfq_num_ops_create_child(genetic_child_t *child);
+static void cfq_throughput_create_child(genetic_child_t *child);
+static void cfq_latency_create_child(genetic_child_t *child);
+static void cfq_general_create_child(genetic_child_t *child);
+
+static void cfq_general_set_child_genes(void *in_genes);
+
+static void cfq_num_ops_calc_fitness(genetic_child_t *child);
+static void cfq_throughput_calc_fitness(genetic_child_t *child);
+static void cfq_latency_calc_fitness(genetic_child_t *child);
+
+static void cfq_general_calc_post_fitness(phenotype_t *in_pt);
+
+static void cfq_shift_mutation_rate(phenotype_t *in_pt);
+
+struct genetic_ops cfq_num_ops_genetic_ops = {
+	.create_child = cfq_num_ops_create_child,
+	.calc_fitness = cfq_num_ops_calc_fitness,
+};
+
+struct genetic_ops cfq_throughput_genetic_ops = {
+	.create_child = cfq_throughput_create_child,
+	.calc_fitness = cfq_throughput_calc_fitness,
+};
+
+struct genetic_ops cfq_latency_genetic_ops = {
+	.create_child = cfq_latency_create_child,
+	.calc_fitness = cfq_latency_calc_fitness,
+};
+
+struct genetic_ops cfq_general_genetic_ops = {
+	.create_child = cfq_general_create_child,
+	.set_child_genes = cfq_general_set_child_genes,
+	.combine_genes = genetic_generic_combine_genes,
+	.mutate_child = genetic_generic_mutate_child,
+	.calc_post_fitness = cfq_general_calc_post_fitness,
+	.take_snapshot = disk_stats_snapshot,
+	.shift_mutation_rate = cfq_shift_mutation_rate,
+	.gene_show = genetic_generic_gene_show,
+#ifdef CONFIG_FINGERPRINTING
+	.get_fingerprint = disk_get_fingerprint,
+	.update_fingerprint = disk_update_fingerprint,
+	.create_top_genes = cfq_create_genes,
+	.top_fitness_show = fingerprint_top_fitness_show,
+	.snapshot_show = fingerprint_snapshot_show,
+	.state_show = fingerprint_state_show,
+#endif
+};
+
+#define CFQ_NUM_CHILDREN		8
+
+#define CFQ_NUM_OPS_UID			1
+#define CFQ_NUM_OPS_NUM_GENES		0
+
+#define CFQ_THROUGHPUT_UID		2
+#define CFQ_THROUGHPUT_NUM_GENES	0
+
+#define CFQ_LATENCY_UID			4
+#define CFQ_LATENCY_NUM_GENES		0
+
+#define CFQ_GENERAL_UID (CFQ_NUM_OPS_UID | CFQ_THROUGHPUT_UID | CFQ_LATENCY_UID)
+#define CFQ_GENERAL_NUM_GENES		11
+
+struct cfq_genes {
+	int cfq_quantum;
+	int cfq_fifo_expire_async;
+	int cfq_fifo_expire_sync;
+	int cfq_back_max;
+	int cfq_back_penalty;
+	int cfq_slice_sync;
+	int cfq_slice_async;
+	int cfq_slice_async_rq;
+	int cfq_slice_idle;
+	int cfq_idle_grace;
+	unsigned long nr_requests;
+};
+
+gene_param_t cfq_gene_param[CFQ_GENERAL_NUM_GENES] = {
+	{ "cfq_quantum",
+		CFQ_QUANTUM / 2, 3 * CFQ_QUANTUM / 2, CFQ_QUANTUM, 0 },
+	{ "cfq_fifo_expire_async",
+		CFQ_FIFO_EXPIRE_ASYNC / 2, 3 * CFQ_FIFO_EXPIRE_ASYNC / 2, CFQ_FIFO_EXPIRE_ASYNC, 0 },
+	{ "cfq_fifo_expire_sync",
+		CFQ_FIFO_EXPIRE_SYNC / 2, 3 * CFQ_FIFO_EXPIRE_SYNC / 2, CFQ_FIFO_EXPIRE_SYNC, 0 },
+	{ "cfq_back_max",
+		CFQ_BACK_MAX / 2, 3 * CFQ_BACK_MAX / 2, CFQ_BACK_MAX, 0 },
+	{ "cfq_back_penalty",
+		CFQ_BACK_PENALTY / 2, 3 * CFQ_BACK_PENALTY / 2, CFQ_BACK_PENALTY, 0 },
+	{ "cfq_slice_sync",
+		CFQ_SLICE_SYNC / 2, 3 * CFQ_SLICE_SYNC / 2, CFQ_SLICE_SYNC, 0 },
+	{ "cfq_slice_async",
+		CFQ_SLICE_ASYNC / 2, 3 * CFQ_SLICE_ASYNC / 2, CFQ_SLICE_ASYNC, 0 },
+	{ "cfq_slice_async_rq",
+		CFQ_SLICE_ASYNC_RQ / 2, 3 * CFQ_SLICE_ASYNC_RQ / 2, CFQ_SLICE_ASYNC_RQ, 0 },
+	{ "cfq_slice_idle",
+		CFQ_SLICE_IDLE / 2, 3 * CFQ_SLICE_IDLE / 2, CFQ_SLICE_IDLE, 0 },
+	{ "cfq_idle_grace",
+		CFQ_IDLE_GRACE / 2, 3 * CFQ_IDLE_GRACE / 2, CFQ_IDLE_GRACE, 0 },
+	{ "nr_requests",
+		BLKDEV_MIN_RQ, BLKDEV_MAX_RQ * 30, BLKDEV_MAX_RQ, genetic_generic_iterative_mutate_gene }
+};
+
+extern long long disk_num_ops_calc_fitness(genetic_child_t *child);
+extern long long disk_throughput_calc_fitness(genetic_child_t *child);
+extern long long disk_latency_calc_fitness(genetic_child_t *child);
+
+LIST_HEAD(cfq_data_list);
+#endif
+
 static void cfq_dispatch_insert(struct request_queue *, struct request *);
 static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
 				       struct task_struct *, gfp_t);
@@ -813,7 +942,7 @@
 		 * the grace period has passed or arm the idle grace
 		 * timer
 		 */
-		end = cfqd->last_end_request + CFQ_IDLE_GRACE;
+		end = cfqd->last_end_request + cfq_idle_grace;
 		if (time_before(jiffies, end)) {
 			mod_timer(&cfqd->idle_class_timer, end);
 			cfqq = NULL;
@@ -2045,7 +2174,7 @@
 	/*
 	 * race with a non-idle queue, reset timer
 	 */
-	end = cfqd->last_end_request + CFQ_IDLE_GRACE;
+	end = cfqd->last_end_request + cfq_idle_grace;
 	if (!time_after_eq(jiffies, end))
 		mod_timer(&cfqd->idle_class_timer, end);
 	else
@@ -2101,6 +2230,10 @@
 
 	cfq_shutdown_timer_wq(cfqd);
 
+#ifdef CONFIG_GENETIC_IOSCHED_CFQ
+	list_del(&cfqd->data_list);
+#endif
+
 	kfree(cfqd);
 }
 
@@ -2137,6 +2270,10 @@
 	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
 	cfqd->cfq_slice_idle = cfq_slice_idle;
 
+#ifdef CONFIG_GENETIC_IOSCHED_CFQ
+	list_add_tail(&cfqd->data_list, &cfq_data_list);
+#endif
+
 	return cfqd;
 }
 
@@ -2275,6 +2412,46 @@
 {
 	int ret;
 
+#ifdef CONFIG_GENETIC_IOSCHED_CFQ
+	genetic_t *genetic = NULL;
+
+	cfq_stats_snapshot = (struct disk_stats_snapshot *)
+		kmalloc(sizeof(struct disk_stats_snapshot), GFP_KERNEL);
+	if (!cfq_stats_snapshot)
+		panic("cfq: failed to malloc enough space");
+
+	ret = genetic_init(&genetic, CFQ_NUM_CHILDREN, 2 * HZ,
+			    1, "cfq-ioscheduler");
+	if (ret)
+		panic("cfq: failed to init genetic lib");
+
+	if (genetic_register_phenotype(genetic, &cfq_num_ops_genetic_ops,
+				       CFQ_NUM_CHILDREN, "num_ops",
+				       CFQ_NUM_OPS_NUM_GENES,
+				       CFQ_NUM_OPS_UID))
+		panic("cfq: failed to register num_ops phenotype");
+
+	if (genetic_register_phenotype(genetic, &cfq_throughput_genetic_ops,
+				       CFQ_NUM_CHILDREN, "throughput",
+				       CFQ_THROUGHPUT_NUM_GENES,
+				       CFQ_THROUGHPUT_UID))
+		panic("cfq: failed to register throughput phenotype");
+
+	if (genetic_register_phenotype(genetic, &cfq_latency_genetic_ops,
+				       CFQ_NUM_CHILDREN, "latency",
+				       CFQ_LATENCY_NUM_GENES,
+				       CFQ_LATENCY_UID))
+		panic("cfq: failed to register latency phenotype");
+
+	if (genetic_register_phenotype(genetic, &cfq_general_genetic_ops,
+				       CFQ_NUM_CHILDREN, "general",
+				       CFQ_GENERAL_NUM_GENES,
+				       CFQ_GENERAL_UID))
+	panic("cfq: failed to register general phenotype");
+
+	genetic_start(genetic);
+#endif
+
 	/*
 	 * could be 0 on HZ < 1000 setups
 	 */
@@ -2306,6 +2473,201 @@
 	cfq_slab_kill();
 }
 
+#ifdef CONFIG_GENETIC_IOSCHED_CFQ
+
+static void cfq_num_ops_create_child(genetic_child_t *child)
+{
+	BUG_ON(!child);
+
+	child->genes = 0;
+	child->gene_param = 0;
+	child->num_genes = CFQ_NUM_OPS_NUM_GENES;
+	child->stats_snapshot = cfq_stats_snapshot;
+}
+
+static void cfq_throughput_create_child(genetic_child_t *child)
+{
+	BUG_ON(!child);
+
+	child->genes = 0;
+	child->gene_param = 0;
+	child->num_genes = CFQ_THROUGHPUT_NUM_GENES;
+	child->stats_snapshot = cfq_stats_snapshot;
+}
+
+static void cfq_latency_create_child(genetic_child_t *child)
+{
+	BUG_ON(!child);
+
+	child->genes = 0;
+	child->gene_param = 0;
+	child->num_genes = CFQ_LATENCY_NUM_GENES;
+	child->stats_snapshot = cfq_stats_snapshot;
+}
+
+/* need to create the genes for the child */
+static void cfq_general_create_child(genetic_child_t *child)
+{
+	BUG_ON(!child);
+
+	child->genes = kmalloc(sizeof(struct cfq_genes), GFP_KERNEL);
+	if (!child->genes)
+	panic("cfq_general_create_child: error mallocing space");
+
+	child->gene_param = cfq_gene_param;
+	child->num_genes = CFQ_GENERAL_NUM_GENES;
+	child->stats_snapshot = cfq_stats_snapshot;
+
+	genetic_create_child_spread(child, CFQ_NUM_CHILDREN-1);
+
+	((struct cfq_genes *)child->genes)->nr_requests = BLKDEV_MAX_RQ;
+}
+
+static void cfq_shift_mutation_rate(phenotype_t *in_pt)
+{
+	struct list_head *p;
+	phenotype_t *pt;
+	int count = 0;
+	long rate = 0;
+
+	list_for_each(p, &in_pt->genetic->phenotype) {
+		pt = list_entry(p, phenotype_t, phenotype);
+
+		/* Look at everyone else that contributes to this
+		   phenotype */
+		if (pt->uid & CFQ_GENERAL_UID && pt->uid != CFQ_GENERAL_UID) {
+
+			switch (pt->uid) {
+			case CFQ_NUM_OPS_UID:
+			case CFQ_THROUGHPUT_UID:
+			case CFQ_LATENCY_UID:
+				rate += pt->mutation_rate;
+				count++;
+				break;
+			default:
+				BUG();
+			}
+		}
+	}
+
+	/* If we are a general phenotype that is made up of other
+	   phenotypes then we take the average */
+	if (count)
+		in_pt->mutation_rate = (rate / count);
+	else
+		BUG();
+}
+
+static void cfq_general_set_child_genes(void *in_genes)
+{
+	struct cfq_genes *genes = (struct cfq_genes *)in_genes;
+	struct list_head *d;
+	struct cfq_data *cfqd;
+
+	list_for_each(d, &cfq_data_list) {
+		cfqd = list_entry(d, struct cfq_data, data_list);
+
+		cfqd->cfq_quantum = genes->cfq_quantum;
+		cfqd->cfq_fifo_expire[0] = genes->cfq_fifo_expire_async;
+		cfqd->cfq_fifo_expire[1] = genes->cfq_fifo_expire_sync;
+		cfqd->cfq_back_max = genes->cfq_back_max;
+		cfqd->cfq_back_penalty = genes->cfq_back_penalty;
+		cfqd->cfq_slice[0] = genes->cfq_slice_async;
+		cfqd->cfq_slice[1] = genes->cfq_slice_sync;
+		cfqd->cfq_slice_async_rq = genes->cfq_slice_async_rq;
+		cfqd->cfq_slice_idle = genes->cfq_slice_idle;
+		cfqd->queue->nr_requests = genes->nr_requests;
+	}
+}
+
+static void cfq_num_ops_calc_fitness(genetic_child_t *child)
+{
+	child->fitness = disk_num_ops_calc_fitness(child);
+}
+
+static void cfq_throughput_calc_fitness(genetic_child_t *child)
+{
+	child->fitness = disk_throughput_calc_fitness(child);
+}
+
+static void cfq_latency_calc_fitness(genetic_child_t *child)
+{
+	child->fitness = disk_latency_calc_fitness(child);
+}
+
+/* Make the general the one that takes into account all the fitness
+ * routines, since these are the common genes that effect everything.
+ */
+static void cfq_general_calc_post_fitness(phenotype_t *in_pt)
+{
+	struct list_head *p;
+	phenotype_t *pt;
+	genetic_t *genetic = in_pt->genetic;
+	int ranking[CFQ_NUM_CHILDREN];
+	int weight = 1;
+	int i;
+
+	memset(ranking, 0, sizeof(ranking));
+
+	list_for_each(p, &genetic->phenotype) {
+		pt = list_entry(p, phenotype_t, phenotype);
+
+		/* Look at everyone else that contributes to this
+		   phenotype */
+		if (pt->uid & CFQ_GENERAL_UID && pt->uid != CFQ_GENERAL_UID) {
+
+			switch (pt->uid) {
+			case CFQ_NUM_OPS_UID:
+				weight = 2;
+				break;
+			case CFQ_THROUGHPUT_UID:
+				weight = 2;
+				break;
+			case CFQ_LATENCY_UID:
+				weight = 1;
+				break;
+			default:
+				BUG();
+			}
+
+			for (i = 0; i < pt->num_children; i++)
+				ranking[pt->child_ranking[i]->id] += (i * weight);
+		}
+	}
+
+	for (i = 0; i < in_pt->num_children; i++)
+		in_pt->child_ranking[i]->fitness = ranking[i];
+
+}
+
+#ifdef CONFIG_FINGERPRINTING
+void *cfq_create_genes(phenotype_t *pt)
+{
+	struct cfq_genes *genes = kmalloc(sizeof(struct cfq_genes), GFP_KERNEL);
+
+	if (!genes) {
+		printk(KERN_ERR "cfq_create_genes: unable to alloc space\n");
+		return 0;
+	}
+
+	/* at some point...make these intelligent depending on what
+	 * the workload is
+	 */
+	genes->cfq_quantum = CFQ_QUANTUM;
+	genes->cfq_back_max = CFQ_BACK_MAX;
+	genes->cfq_back_penalty = CFQ_BACK_PENALTY;
+	genes->cfq_slice_sync = CFQ_SLICE_SYNC;
+	genes->cfq_slice_async = CFQ_SLICE_ASYNC;
+	genes->cfq_slice_async_rq = CFQ_SLICE_ASYNC_RQ;
+	genes->cfq_idle_grace = CFQ_IDLE_GRACE;
+	genes->nr_requests = BLKDEV_MAX_RQ;
+
+	return (void *)genes;
+}
+#endif /* CONFIG_FINGERPRINTING */
+
+#endif
+
 module_init(cfq_init);
 module_exit(cfq_exit);
 
Index: linux.2.6.23/block/Kconfig.iosched
===================================================================
--- linux.2.6.23.orig/block/Kconfig.iosched
+++ linux.2.6.23/block/Kconfig.iosched
@@ -77,6 +77,15 @@
         anticipatory scheduler autonomically and will adapt tunables
         depending on the present workload.
 
+config GENETIC_IOSCHED_CFQ
+        bool "Genetic CFQ I/O scheduler (EXPERIMENTAL)"
+        depends on IOSCHED_CFQ && GENETIC_LIB && EXPERIMENTAL
+        default n
+        ---help---
+        This will use a genetic algorithm to tweak the tunables of the
+        CFQ scheduler autonomically and will adapt tunables
+        depending on the present workload.
+
 endmenu
 
 endif

[-- Attachment #4: genetic-io-sched-2.6.23.patch --]
[-- Type: text/x-diff, Size: 7733 bytes --]

Index: linux/block/genhd.c
===================================================================
--- linux.orig/block/genhd.c
+++ linux/block/genhd.c
@@ -30,6 +30,8 @@ static struct blk_major_name {
 	char name[16];
 } *major_names[BLKDEV_MAJOR_HASH_SIZE];
 
+LIST_HEAD(gendisks);
+
 /* index in the above - for now: assume no multimajor ranges */
 static inline int major_to_index(int major)
 {
@@ -395,19 +397,22 @@ static ssize_t disk_stats_read(struct ge
 		jiffies_to_msecs(disk_stat_read(disk, io_ticks)),
 		jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
 }
+
+#ifdef CONFIG_FINGERPRINTING
 static ssize_t disk_fp_read(struct gendisk * disk, char *page)
 {
-	return sprintf(page, "reads: %llx\n"
-		       "writes: %llx\n"
-		       "head_pos: %llx\n"
-		       "avg_dist: %llx\n"
-		       "avg_size: %llx\n",
+	return sprintf(page, "reads: %lld\n"
+		       "writes: %lld\n"
+		       "head_pos: %lld\n"
+		       "avg_dist: %lld\n"
+		       "avg_size: %lld\n",
 		       (unsigned long long)disk->fp_ss->reads,
 		       (unsigned long long)disk->fp_ss->writes,
 		       (unsigned long long)disk->fp_ss->head_pos,
 		       (unsigned long long)disk->fp_ss->avg_dist,
 		       (unsigned long long)disk->fp_ss->avg_size);
 }
+#endif
 
 static struct disk_attribute disk_attr_uevent = {
 	.attr = {.name = "uevent", .mode = S_IWUSR },
@@ -433,10 +438,13 @@ static struct disk_attribute disk_attr_s
 	.attr = {.name = "stat", .mode = S_IRUGO },
 	.show	= disk_stats_read
 };
+
+#ifdef CONFIG_FINGERPRINTING
 static struct disk_attribute disk_attr_fp = {
 	.attr = {.name = "fp", .mode = S_IRUGO },
 	.show	= disk_fp_read
 };
+#endif
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 
@@ -476,7 +484,9 @@ static struct attribute * default_attrs[
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&disk_attr_fail.attr,
 #endif
-	&disk_attr_fp.attr,
+#ifdef CONFIG_FINGERPRINTING
+        &disk_attr_fp.attr,
+#endif
 	NULL,
 };
 
@@ -485,6 +495,7 @@ static void disk_release(struct kobject 
 	struct gendisk *disk = to_disk(kobj);
 	kfree(disk->random);
 	kfree(disk->part);
+	list_del(&disk->gendisks);
 	free_disk_stats(disk);
 	kfree(disk);
 }
@@ -685,8 +696,9 @@ struct gendisk *alloc_disk_node(int mino
 		kobj_set_kset_s(disk,block_subsys);
 		kobject_init(&disk->kobj);
 		rand_initialize_disk(disk);
+		list_add_tail(&disk->gendisks, &gendisks);
 		INIT_WORK(&disk->async_notify,
 			media_change_notify_thread);
 	}
 
 	disk->fp_ss = kmalloc(sizeof(struct fp_snapshot), GFP_KERNEL);
Index: linux/block/ll_rw_blk.c
===================================================================
--- linux.orig/block/ll_rw_blk.c
+++ linux/block/ll_rw_blk.c
@@ -21,6 +21,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>	/* for max_pfn/max_low_pfn */
+#include <linux/genetic.h>
 #include <linux/completion.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
@@ -2694,6 +2695,143 @@ static inline void add_request(request_q
 	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
 }
  
+#if defined(CONFIG_GENETIC_IOSCHED_AS) || \
+    defined(CONFIG_GENETIC_IOSCHED_DEADLINE) || \
+    defined(CONFIG_GENETIC_IOSCHED_CFQ)
+extern struct list_head gendisks;
+
+void disk_stats_snapshot(phenotype_t * pt)
+{
+	struct list_head * d;
+	struct gendisk *disk;
+	struct disk_stats_snapshot * ss = (struct disk_stats_snapshot *)pt->child_ranking[0]->stats_snapshot;
+
+	memset(ss, 0, sizeof(struct disk_stats_snapshot));
+
+	list_for_each(d, &gendisks) {
+	    disk = list_entry(d, struct gendisk, gendisks);
+
+	    disk_round_stats(disk);
+
+	    ss->reads += disk_stat_read(disk, ios[READ]);
+	    ss->writes += disk_stat_read(disk, ios[WRITE]);
+	    ss->read_sectors += disk_stat_read(disk, sectors[READ]);
+	    ss->write_sectors += disk_stat_read(disk, sectors[WRITE]);
+	    ss->time_in_queue += disk_stat_read(disk, time_in_queue);
+	}
+}
+
+long long disk_num_ops_calc_fitness(genetic_child_t * child)
+{
+	struct list_head * d;
+	struct gendisk *disk;
+	struct disk_stats_snapshot * ss = (struct disk_stats_snapshot *)child->stats_snapshot;
+	long long reads = 0;
+	long long writes = 0;
+
+	list_for_each(d, &gendisks) {
+	    disk = list_entry(d, struct gendisk, gendisks);
+
+	    disk_round_stats(disk);
+
+	    reads += disk_stat_read(disk, ios[READ]);
+	    writes += disk_stat_read(disk, ios[WRITE]);
+	}
+
+	reads -= ss->reads;
+	writes -= ss->writes;
+
+	return reads + writes;
+}
+
+long long disk_throughput_calc_fitness(genetic_child_t * child)
+{
+	struct list_head * d;
+	struct gendisk *disk;
+	struct disk_stats_snapshot * ss = (struct disk_stats_snapshot *)child->stats_snapshot;
+	long long read_sectors = 0;
+	long long write_sectors = 0;
+
+	list_for_each(d, &gendisks) {
+	    disk = list_entry(d, struct gendisk, gendisks);
+
+	    disk_round_stats(disk);
+
+	    read_sectors += disk_stat_read(disk, sectors[READ]);
+	    write_sectors += disk_stat_read(disk, sectors[WRITE]);
+	}
+
+	read_sectors -= ss->read_sectors;
+	write_sectors -= ss->write_sectors;
+
+	return read_sectors + write_sectors;
+}
+
+long long disk_latency_calc_fitness(genetic_child_t * child)
+{
+	struct list_head * d;
+	struct gendisk *disk;
+	struct disk_stats_snapshot * ss = (struct disk_stats_snapshot *)child->stats_snapshot;
+	long long time_in_queue = 0;
+
+	list_for_each(d, &gendisks) {
+	    disk = list_entry(d, struct gendisk, gendisks);
+
+	    disk_round_stats(disk);
+
+	    time_in_queue += disk_stat_read(disk, time_in_queue);
+	}
+
+	time_in_queue = -(time_in_queue - ss->time_in_queue);
+
+	return time_in_queue;
+}
+
+#ifdef CONFIG_FINGERPRINTING
+
+void disk_update_fingerprint(phenotype_t * pt)
+{
+	struct list_head * d;
+	struct gendisk *disk;
+
+	BUG_ON(!pt->fp_ss);
+
+	/* tally up all the other disk snapshots */
+	list_for_each(d, &gendisks) {
+	    disk = list_entry(d, struct gendisk, gendisks);
+
+	    consolidate_fp_snapshot(pt->fp_ss, disk->fp_ss);
+
+	    /* reset it for the next generation */
+	    reset_fp_snapshot(disk->fp_ss);
+	}
+
+}
+
+void disk_get_fingerprint(phenotype_t * pt)
+{
+	struct list_head * d;
+	struct gendisk *disk;
+
+	BUG_ON(!pt->fp_ss);
+
+	/* tally up all the other disk snapshots */
+	list_for_each(d, &gendisks) {
+	    disk = list_entry(d, struct gendisk, gendisks);
+
+	    consolidate_fp_snapshot(pt->fp_ss, disk->fp_ss);
+
+	    /* reset it for the next generation */
+	    reset_fp_snapshot(disk->fp_ss);
+	}
+
+	calc_fp(pt->fp, pt->fp_ss);
+}
+
+#endif /* CONFIG_FINGERPRINTING */
+
+#endif
+
 /*
  * disk_round_stats()	- Round off the performance stats on a struct
  * disk_stats.
Index: linux/include/linux/genhd.h
===================================================================
--- linux.orig/include/linux/genhd.h
+++ linux/include/linux/genhd.h
@@ -134,6 +134,7 @@ struct gendisk {
 	atomic_t sync_io;		/* RAID */
 	unsigned long stamp;
 	int in_flight;
+	struct list_head gendisks;
 #ifdef	CONFIG_SMP
 	struct disk_stats *dkstats;
 #else
Index: linux/include/linux/blkdev.h
===================================================================
--- linux.orig/include/linux/blkdev.h
+++ linux/include/linux/blkdev.h
@@ -861,6 +861,20 @@ void kblockd_flush_work(struct work_stru
 	MODULE_ALIAS("block-major-" __stringify(major) "-*")
 
 
+#if defined(CONFIG_GENETIC_IOSCHED_AS) || \
+    defined(CONFIG_GENETIC_IOSCHED_DEADLINE) || \
+    defined(CONFIG_GENETIC_IOSCHED_CFQ)
+
+struct disk_stats_snapshot
+{
+        unsigned long reads;
+        unsigned long writes;
+        unsigned long read_sectors;
+        unsigned long write_sectors;
+        unsigned long time_in_queue;
+};
+#endif /* CONFIG_GENETIC_IOSCHED_AS || CONFIG_GENETIC_IOSCHED_CFQ */
+
 #else /* CONFIG_BLOCK */
 /*
  * stubs for when the block layer is configured out


[-- Attachment #5: genetic-lib-2.6.23.patch --]
[-- Type: text/x-diff, Size: 57025 bytes --]

Index: linux-2.6.23/include/linux/genetic.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/linux/genetic.h
@@ -0,0 +1,285 @@
+#ifndef __LINUX_GENETIC_H
+#define __LINUX_GENETIC_H
+/*
+ * include/linux/genetic.h
+ *
+ * Jake Moilanen <moilanen@austin.ibm.com>
+ * Copyright (C) 2004 IBM
+ *
+ * Genetic algorithm library
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as published
+ * by the Free Software Foundation. 
+ */
+
+#include <linux/list.h>
+#include <linux/timer.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+
+
+#define GENETIC_HISTORY_SIZE		0x8
+#define GENETIC_HISTORY_MASK		(GENETIC_HISTORY_SIZE - 1)
+
+/* percentage of total number genes to mutate */
+#define GENETIC_DEFAULT_MUTATION_RATE	15
+
+/* XXX TODO Make this an adjustable runtime variable */
+/* Percentage that an iteration can jump within the range */
+#define GENETIC_ITERATIVE_MUTATION_RANGE 20
+
+/* the rate that GENETIC_DEFAULT_MUTATION_RATE itself can change */
+#define GENETIC_DEFAULT_MUTATION_RATE_CHANGE 4
+#define GENETIC_MAX_MUTATION_RATE	45
+#define GENETIC_MIN_MUTATION_RATE	10
+
+#define GENETIC_DEBUG			0
+
+#ifdef CONFIG_FINGERPRINTING
+#define FP_DECAY			90
+#define GENETIC_NUM_DEBUG_POINTS	5
+#else
+#define GENETIC_NUM_DEBUG_POINTS	4
+#endif
+
+#define GENETIC_PRINT_DEBUG		0
+#define gen_dbg(format, arg...) do { if (GENETIC_PRINT_DEBUG) printk(KERN_EMERG __FILE__ ": " format "\n" , ## arg); } while (0)
+#define gen_trc(format, arg...) do { if (GENETIC_PRINT_DEBUG) printk(KERN_EMERG __FILE__ ":%s:%d\n" , __FUNCTION__, __LINE__); } while (0)
+
+struct gene_param_s;
+struct genetic_s;
+struct phenotype_s;
+
+struct genetic_child_s {
+	struct list_head	list;
+	long long		fitness;
+	unsigned long		num_genes;
+	void			*genes;
+	struct gene_param_s	*gene_param;
+	void			*stats_snapshot;
+	int			id;
+};
+
+typedef struct genetic_child_s genetic_child_t;
+
+/* Here's a generic idea of what it the genes could look like */
+struct gene_param_s {
+	char 		*name;
+	unsigned long	min;
+	unsigned long	max;
+	unsigned long	initial;
+	void		(*mutate_gene)(genetic_child_t *, unsigned long);
+};
+
+typedef struct gene_param_s gene_param_t;
+
+struct phenotype_s {
+	struct list_head	phenotype;
+
+	struct list_head	children_queue[2];
+	struct list_head	*run_queue;
+	struct list_head	*finished_queue;
+	struct genetic_ops	*ops;
+
+	char			*name;
+
+	struct genetic_s	*genetic;		/* point back
+							 * to genetic
+							 * struct
+							 */
+
+	unsigned long		num_children;		  /* Must be power of 2 */
+	unsigned long		natural_selection_cutoff; /* How many children
+							   * will survive
+							   */
+	void			*stats_snapshot;
+	unsigned long		child_number;
+
+	/* percentage of total number of genes to mutate */
+	long			mutation_rate;
+	unsigned long		num_mutations;
+	unsigned long		num_genes;
+
+	genetic_child_t		**child_ranking;
+
+	void			(*natural_selection)(struct phenotype_s *);
+
+	/* This UID is bitmap comprised of other phenotypes that contribute
+	   to the genes */
+	unsigned long		uid;
+
+	/* performance metrics */
+	long long		avg_fitness;
+	long long		last_gen_avg_fitness;
+
+	unsigned long		fitness_history_index;
+	long long		fitness_history[GENETIC_HISTORY_SIZE];
+
+#if GENETIC_DEBUG
+	unsigned long		debug_size;	/* number of longs in
+						   debug history */
+	unsigned long		debug_index;
+	long long		*debug_history;
+#endif
+#ifdef CONFIG_FINGERPRINTING
+	struct dentry 		*fp_dir;
+	struct fingerprint	*fp;
+	struct fp_snapshot	*fp_ss;
+	unsigned long		***top_child;
+	long long		***top_fitness;
+	int			last_fingerprint;
+#else
+	long long		top_fitness;
+#endif
+
+	long long		from_top;
+
+};
+
+typedef struct phenotype_s phenotype_t;
+
+/**
+ * struct genetic_s - contains all data structures for a genetic plugin
+ * @name: string that will identify this genetic alg. in debugfs and printk
+ * @phenotype: list of all registered phenotypes
+ * @child_number: the running child index (< @num_children)
+ * @child_life_time: time in ms each child is ran before being swapped
+ * @num_children: number of children in each generation (must be a power of 2)
+ * @generation_number: increased once every child in a generation is ran
+ * @defaults: when 1 the genetic library will hold all genes at defaults
+ * @fingerprinting: when 1 the genetic library wil use gene fingerprinting if CONFIG_FINGERPRINTING
+ */
+struct genetic_s {
+	char			*name;
+	struct timer_list	timer;
+
+	struct list_head	phenotype;
+
+	unsigned long		child_number;
+	unsigned long		child_life_time;
+	unsigned long		num_children;
+
+	unsigned long		generation_number;
+	int 			defaults;
+
+	/* private */
+	struct dentry		*dir;
+	struct dentry 		*phenotypes_dir;
+	struct dentry 		*fingerprinting_dir;
+#ifdef CONFIG_FINGERPRINTING
+	int			fingerprinting;
+#endif
+};
+
+typedef struct genetic_s genetic_t;
+
+struct genetic_ops {
+	void			(*create_child)(genetic_child_t *);
+	void			(*set_child_genes)(void *);
+	void			(*calc_fitness)(genetic_child_t *);
+	void			(*combine_genes)(genetic_child_t *, genetic_child_t *,
+						 genetic_child_t *);
+	void			(*mutate_child)(genetic_child_t *);
+	void			(*calc_post_fitness)(phenotype_t *); /* Fitness routine used when
+								      * need to take into account
+								      * other phenotype fitness
+								      * results after they ran
+								      */
+	void			(*take_snapshot)(phenotype_t *);
+	void			(*shift_mutation_rate)(phenotype_t *);
+	int			(*gene_show)(struct seq_file *, void *);
+#ifdef CONFIG_FINGERPRINTING
+	void			(*get_fingerprint)(phenotype_t *);
+	void			(*update_fingerprint)(phenotype_t *);
+	void *			(*create_top_genes)(phenotype_t *);
+	int			(*top_fitness_show)(struct seq_file *, void *);
+	int			(*snapshot_show)(struct seq_file *, void *);
+	int			(*state_show)(struct seq_file *, void *);
+#endif
+};
+
+/* Setup routines */
+int __init genetic_init(genetic_t ** in_genetic, unsigned long num_children,
+			unsigned long child_life_time, int fingerprinting,
+			char * name);
+int __init genetic_register_phenotype(genetic_t * genetic, struct genetic_ops * ops,
+				      unsigned long num_children, char * name,
+				      unsigned long num_genes, unsigned long uid);
+void __init genetic_start(genetic_t * genetic);
+
+/* Generic helper functions */
+void genetic_generic_mutate_child(genetic_child_t * child);
+void genetic_generic_iterative_mutate_gene(genetic_child_t * child, unsigned long gene_num);
+void genetic_generic_combine_genes(genetic_child_t * parent_a,
+				   genetic_child_t * parent_b,
+				   genetic_child_t * child);
+void genetic_create_child_spread(genetic_child_t * child, unsigned long num_children);
+void genetic_create_child_defaults(genetic_child_t * child);
+void genetic_general_shift_mutation_rate(phenotype_t * in_pt);
+int genetic_generic_gene_show(struct seq_file *s, void *unused);
+
+
+/* XXX do this more intelligently */
+#ifndef DIVLL_OP
+#define DIVLL_OP
+#if BITS_PER_LONG >= 64
+
+static inline void divll(long long *n, long div, long *rem)
+{
+        *rem = *n % div;
+        *n /= div;
+}
+
+#else
+
+static inline void divl(int32_t high, int32_t low,
+                        int32_t div,
+                        int32_t *q, int32_t *r)
+{
+        int64_t n = (u_int64_t)high << 32 | low;
+        int64_t d = (u_int64_t)div << 31;
+        int32_t q1 = 0;
+        int c = 32;
+        while (n > 0xffffffff) {
+                q1 <<= 1;
+                if (n >= d) {
+                        n -= d;
+                        q1 |= 1;
+                }
+                d >>= 1;
+                c--;
+        }
+        q1 <<= c;
+        if (n) {
+                low = n;
+                *q = q1 | (low / div);
+                *r = low % div;
+        } else {
+                *r = 0;
+                *q = q1;
+        }
+        return;
+}
+
+static inline void divll(long long *n, long div, long *rem)
+{
+        int32_t low, high;
+        low = *n & 0xffffffff;
+        high = *n >> 32;
+        if (high) {
+                int32_t high1 = high % div;
+                int32_t low1 = low;
+                high /= div;
+                divl(high1, low1, div, &low, (int32_t *)rem);
+                *n = (int64_t)high << 32 | low;
+        } else {
+                *n = low / div;
+                *rem = low % div;
+        }
+}
+#endif
+
+#endif /* #ifndef divll */
+
+#endif
Index: linux-2.6.23/lib/Kconfig
===================================================================
--- linux-2.6.23.orig/lib/Kconfig
+++ linux-2.6.23/lib/Kconfig
@@ -63,6 +63,12 @@ config AUDIT_GENERIC
 	depends on AUDIT && !AUDIT_ARCH
 	default y
 
+config GENETIC_LIB
+	bool "Genetic Library"
+	help
+	  This option will build in a genetic library that will tweak
+	  kernel parameters autonomically to improve performance.
+
 #
 # compression support is select'ed if needed
 #
Index: linux-2.6.23/lib/Makefile
===================================================================
--- linux-2.6.23.orig/lib/Makefile
+++ linux-2.6.23/lib/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_CRC32)	+= crc32.o
 obj-$(CONFIG_CRC7)	+= crc7.o
 obj-$(CONFIG_LIBCRC32C)	+= libcrc32c.o
 obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
+obj-$(CONFIG_GENETIC_LIB) += genetic.o
 
 obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
 obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
Index: linux-2.6.23/lib/genetic.c
===================================================================
--- /dev/null
+++ linux-2.6.23/lib/genetic.c
@@ -0,0 +1,892 @@
+/*
+ * Genetic Algorithm Library
+ *
+ * Jake Moilanen <moilanen@austin.ibm.com>
+ * Copyright (C) 2004-2005 IBM
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as published
+ * by the Free Software Foundation. 
+ */
+
+/*
+ * Life cycle
+ *
+ * 1.) Create random children
+ * 2.) Run tests
+ * 3.) Calculate fitness
+ * 4.) Take top preformers
+ * 5.) Make children
+ * 6.) Mutate
+ * 7.) Goto step 2
+ */
+
+/*
+ * TODO:
+ *
+ * - Check to make sure DEF_DESKTOP_TIMESLICE is operating correctly
+ * - fix fixup_timeslice
+ */
+
+#include <linux/genetic.h>
+#include <linux/timer.h>
+#include <linux/jiffies.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <linux/log2.h>
+#include <asm/uaccess.h>
+#include <asm/string.h>
+#include <asm/bug.h>
+
+#include "genetic-debug.c"
+
+#ifdef CONFIG_FINGERPRINTING
+#include <linux/fingerprinting.h>
+#include "fingerprinting.c"
+#endif
+
+char genetic_lib_version[] = "0.3.1";
+
+int mutation_rate_change = GENETIC_DEFAULT_MUTATION_RATE_CHANGE;
+int genetic_lib_enabled = 1;
+
+static void genetic_ns_top_parents(phenotype_t *);
+static void genetic_ns_award_top_parents(phenotype_t *);
+static int  genetic_create_children(phenotype_t *);
+static void genetic_split_performers(phenotype_t *);
+static void genetic_mutate(phenotype_t *);
+static void genetic_run_child(genetic_t * genetic);
+static void genetic_new_generation(genetic_t * genetic);
+
+void genetic_switch_child(unsigned long data);
+
+
+int __init genetic_init(genetic_t ** in_genetic, unsigned long num_children,
+			unsigned long child_life_time, int fingerprinting,
+			char * name)
+{
+	genetic_t * genetic;
+
+	if (!genetic_lib_enabled)
+		return 0;
+
+	printk(KERN_INFO "Initializing Genetic Library - version %s\n",
+			genetic_lib_version);
+
+	genetic = (genetic_t *)kmalloc(sizeof(genetic_t), GFP_KERNEL);
+	if (!genetic) {
+		printk(KERN_ERR "genetic_init: not enough memory\n");
+		return -ENOMEM;
+	}
+
+	*in_genetic = genetic;
+
+	genetic->name = (char *)kmalloc(strlen(name), GFP_KERNEL);
+	if (!genetic->name) {
+		printk(KERN_ERR "genetic_init: not enough memory\n");
+		kfree(genetic);
+		return -ENOMEM;
+	}
+
+	/* Init some of our values */
+	strcpy(genetic->name, name);
+
+	genetic->num_children = num_children;
+	genetic->child_life_time = child_life_time;
+
+	genetic->generation_number = 1;
+	genetic->child_number = 0;
+	genetic->defaults = 0;
+#ifdef CONFIG_FINGERPRINTING
+	genetic->fingerprinting = fingerprinting;
+#endif
+
+	/* Setup how long each child has to live */
+	init_timer(&genetic->timer);
+	genetic->timer.function = genetic_switch_child;
+	genetic->timer.data = (unsigned long)genetic;
+
+	INIT_LIST_HEAD(&genetic->phenotype);
+
+	/* Setup debugfs */
+	genetic->dir = genetic_create_tree(name, NULL);
+	genetic->phenotypes_dir = genetic_create_tree("phenotypes", genetic->dir);
+
+#ifdef CONFIG_FINGERPRINTING
+	if (fingerprinting)
+		genetic->fingerprinting_dir = genetic_create_tree("fingerprinting", genetic->dir);
+#endif
+
+	/* TODO add stack to the genetic track dentries for deallocation */
+	debugfs_create_file("stats", S_IFREG|S_IRUGO, genetic->dir,
+			genetic, &genetic_stat_operations);
+
+	debugfs_create_file("phenotype_average", S_IFREG|S_IRUGO, genetic->dir,
+			genetic, &genetic_phenotype_average_operations);
+
+	debugfs_create_bool("defaults", S_IWUSR|S_IFREG|S_IRUGO, genetic->dir,
+				&genetic->defaults);
+
+	return 0;
+}
+
+int __init genetic_register_phenotype(genetic_t * genetic,
+		struct genetic_ops * ops, unsigned long num_children,
+		char * name, unsigned long num_genes, unsigned long uid)
+{
+	phenotype_t * pt;
+	int rc;
+
+	if (!genetic_lib_enabled)
+		return 0;
+
+	printk(KERN_INFO "Initializing %s's phenotype %s\n", genetic->name,
+			name);
+
+	pt = (phenotype_t *)kmalloc(sizeof(phenotype_t), GFP_KERNEL);
+	if (!genetic) {
+		printk(KERN_ERR "genetic_register_phenotype: not enough\
+				memory\n");
+		return -ENOMEM;
+	}
+
+	pt->name = (char *)kmalloc(strlen(name), GFP_KERNEL);
+	if (!pt->name) {
+		printk(KERN_ERR "genetic_register_phenotype: not enough\
+				memory\n");
+		kfree(pt);
+		return -ENOMEM;
+	}
+
+	pt->child_ranking = (genetic_child_t **)kmalloc(num_children * sizeof(genetic_child_t *), GFP_KERNEL);
+	if (!pt->child_ranking) {
+		printk(KERN_ERR "genetic_register_phenotype: not enough\
+				memory\n");
+		kfree(pt->name);
+		kfree(pt);
+		return -ENOMEM;
+	}
+
+
+	strcpy(pt->name, name);
+
+	INIT_LIST_HEAD(&pt->children_queue[0]);
+	INIT_LIST_HEAD(&pt->children_queue[1]);
+
+	pt->run_queue = &pt->children_queue[0];
+	pt->finished_queue = &pt->children_queue[1];
+
+	pt->ops = ops;
+	pt->num_children = num_children;
+
+	pt->mutation_rate = GENETIC_DEFAULT_MUTATION_RATE;
+	pt->natural_selection = genetic_ns_top_parents;
+	pt->natural_selection_cutoff = num_children / 2;
+	pt->avg_fitness = 0;
+	pt->last_gen_avg_fitness = 0;
+	pt->child_number = 0;
+
+	pt->genetic = genetic;
+	pt->uid = uid;
+	pt->num_genes = num_genes;
+
+	pt->top_fitness = 0;
+
+#ifdef CONFIG_FINGERPRINTING
+	if (genetic->fingerprinting) {
+		if ((rc = genetic_init_fingerprinting(pt)) < 0)
+			return rc;
+	}
+#endif
+
+	/* Create some children */
+	rc = genetic_create_children(pt);
+	if (rc)
+		return rc;
+
+	list_add_tail(&pt->phenotype, &genetic->phenotype);
+
+	if (ops->gene_show) {
+		debugfs_create_file(name, S_IFREG|S_IRUGO,
+			genetic->phenotypes_dir, pt, &genetic_gene_operations);
+	}
+
+	return 0;
+}
+
+
+
+void __init genetic_start(genetic_t * genetic)
+{
+	if (!genetic_lib_enabled)
+		return;
+
+	genetic_run_child(genetic);
+	printk(KERN_INFO "%ld children started in %s genetic library\n",
+			genetic->num_children, genetic->name);
+}
+
+
+
+/* create some children, it is up to the lib user to come up w/ a good
+   distro of genes for it's children */
+static int genetic_create_children(phenotype_t * pt)
+{
+	unsigned long i;
+	genetic_child_t * child;
+
+	for (i = 0; i < pt->num_children; i++) {
+		pt->child_ranking[i] = (genetic_child_t *)kmalloc(
+				sizeof(genetic_child_t), GFP_KERNEL);
+
+		if (!pt->child_ranking[i]) {
+			printk(KERN_ERR "genetic_create_child: not enough\
+					 memory\n");
+
+			for (i = i - 1; i >= 0; i--)
+				kfree(pt->child_ranking[i]);
+
+			return -ENOMEM;
+		}
+
+		child = pt->child_ranking[i];
+
+		child->id = i;
+
+		pt->ops->create_child(child);
+
+		list_add_tail(&child->list, pt->run_queue);
+	}
+
+	return 0;
+}
+
+
+/* See how well child did and run the next one */
+void genetic_switch_child(unsigned long data)
+{
+	genetic_t * genetic = (genetic_t *)data;
+	genetic_child_t * child;
+
+	struct list_head * p;
+	phenotype_t * pt;
+
+	int new_generation = 0;
+#ifdef GENETIC_DEBUG_VERBOSE
+	printk(KERN_INFO "genetic_switch_child() for %s\n", genetic->name);
+#endif
+	list_for_each(p, &genetic->phenotype) {
+		pt = list_entry(p, phenotype_t, phenotype);
+
+		child = list_entry(pt->run_queue->next, genetic_child_t, list);
+
+#ifdef GENETIC_DEBUG_VERBOSE
+		printk(KERN_INFO "  phenotype %s\n", pt->name);
+#endif
+
+		list_del(&child->list);
+
+		list_add_tail(&child->list, pt->finished_queue);
+
+		if (pt->ops->calc_fitness)
+			pt->ops->calc_fitness(child);
+
+#ifdef GENETIC_DEBUG_VERBOSE
+		printk(KERN_INFO "  finished calc_fitness\n");
+#endif
+
+		pt->child_ranking[pt->child_number++] = child;
+
+		/* See if need more children */
+		if (list_empty(pt->run_queue))
+			new_generation = 1;
+
+	}
+
+	genetic->child_number++;
+
+	if (new_generation)
+		genetic_new_generation(genetic);
+
+	genetic_run_child(genetic);
+
+#ifdef GENETIC_DEBUG_VERBOSE
+	printk("exiting genetic_switch_child()\n");
+#endif
+}
+
+/* Set the childs genes for run */
+void genetic_run_child(genetic_t * genetic)
+{
+	struct list_head * p;
+	phenotype_t * pt;
+
+	genetic_child_t * child;
+	void * genes;
+
+	list_for_each(p, &genetic->phenotype) {
+		pt = list_entry(p, phenotype_t, phenotype);
+
+		child = list_entry(pt->run_queue->next, genetic_child_t, list);
+
+		/* genetic alg. disabled, only use default genes */
+		if (genetic->defaults)
+			genetic_create_child_defaults(child);
+
+		genes = child->genes;
+
+		if (pt->ops->set_child_genes)
+			pt->ops->set_child_genes(genes);
+
+		if (pt->ops->take_snapshot)
+			pt->ops->take_snapshot(pt);
+
+	}
+
+	/* set a timer interrupt */
+	genetic->timer.expires = jiffies + genetic->child_life_time;
+	add_timer(&genetic->timer);
+
+}
+
+/* This natural selection routine will take the top
+ * natural_select_cutoff and use them to make children for the next
+ * generation and keep the top half perfomers
+ *
+ * This assumes natural_select_cutoff is exactly half of num_children
+ * and num_children is a multable of 4.
+ */
+static void genetic_ns_top_parents(phenotype_t * pt)
+{
+	unsigned long i,j,k = 0;
+	unsigned long num_children = pt->num_children;
+	unsigned long cutoff = num_children - pt->natural_selection_cutoff;
+
+	for (i = cutoff, j = num_children - 1; i < j; i++, j--, k += 2) {
+	/* create child A */
+	pt->ops->combine_genes(pt->child_ranking[i],
+					pt->child_ranking[j],
+					pt->child_ranking[k]);
+
+	/* create child B */
+	pt->ops->combine_genes(pt->child_ranking[i],
+					pt->child_ranking[j],
+					pt->child_ranking[k+1]);
+	}
+}
+
+
+/* This natural selection routine just has top parents populating
+   bottom performers. */
+static void genetic_ns_award_top_parents(phenotype_t * pt)
+{
+	unsigned long i;
+	unsigned long num_children = pt->num_children;
+	unsigned long cutoff = num_children - pt->natural_selection_cutoff;
+
+	for (i = 0; i < cutoff; i += 2) {
+		pt->ops->combine_genes(pt->child_ranking[num_children - 1],
+					pt->child_ranking[num_children - 2],
+					pt->child_ranking[i]);
+
+		pt->ops->combine_genes(pt->child_ranking[num_children - 1],
+					pt->child_ranking[num_children - 2],
+					pt->child_ranking[i+1]);
+	}
+}
+
+static inline void genetic_swap(genetic_child_t ** a, genetic_child_t ** b)
+{
+	genetic_child_t * tmp = *a;
+
+	*a = *b;
+	*b = tmp;
+}
+
+/* bubble sort */
+/* XXX change this to quick sort */
+static void genetic_split_performers(phenotype_t * pt)
+{
+	int i, j;
+
+	for (i = pt->num_children; i > 1; i--)
+	for (j = 0; j < i - 1; j++)
+		if (pt->child_ranking[j]->fitness > pt->child_ranking[j+1]->fitness)
+		genetic_swap(&pt->child_ranking[j], &pt->child_ranking[j+1]);
+}
+
+static void genetic_mutate(phenotype_t * pt)
+{
+	long child_entry = -1;
+	int i;
+
+	if (!pt->num_genes)
+		return;
+
+	for (i = 0; i < pt->num_mutations; i++) {
+	get_random_bytes(&child_entry, sizeof(child_entry));
+	child_entry = child_entry % pt->num_children;
+
+	pt->ops->mutate_child(pt->child_ranking[child_entry]);
+	}
+}
+
+/* XXX This will either aid in handling new workloads, or send us on a
+   downward spiral */
+static void genetic_shift_mutation_rate(phenotype_t * pt, long long prev_gen_avg_fitness, long long avg_fitness)
+{
+
+	long long low_bound;
+	long long high_bound;
+	long dummy;
+
+	if (mutation_rate_change && pt->genetic->generation_number > 1) {
+
+		if (pt->ops->shift_mutation_rate) {
+			pt->ops->shift_mutation_rate(pt);
+		} else {
+
+			low_bound = avg_fitness * 90;
+			divll(&low_bound, 100, &dummy);
+
+			high_bound = avg_fitness * 110;
+			divll(&high_bound, 100, &dummy);
+
+			if (high_bound > prev_gen_avg_fitness)
+				pt->mutation_rate -= mutation_rate_change;
+			else if (low_bound < prev_gen_avg_fitness)
+				pt->mutation_rate += mutation_rate_change;
+
+			if (pt->mutation_rate > GENETIC_MAX_MUTATION_RATE)
+				pt->mutation_rate = GENETIC_MAX_MUTATION_RATE;
+			else if (pt->mutation_rate < GENETIC_MIN_MUTATION_RATE)
+				pt->mutation_rate = GENETIC_MIN_MUTATION_RATE;
+		}
+	}
+}
+
+void genetic_general_shift_mutation_rate(phenotype_t * in_pt)
+{
+	struct list_head * p;
+	phenotype_t * pt;
+	int count = 0;
+	long rate = 0;
+
+	list_for_each(p, &in_pt->genetic->phenotype) {
+		pt = list_entry(p, phenotype_t, phenotype);
+
+		if (in_pt->uid & pt->uid && in_pt->uid != pt->uid) {
+			rate += pt->mutation_rate;
+			count++;
+		}
+	}
+
+	/* If we are a general phenotype that is made up of other
+	   phenotypes then we take the average */
+	if (count)
+		in_pt->mutation_rate = (rate / count);
+	else
+		in_pt->mutation_rate = mutation_rate_change;
+}
+
+static void genetic_calc_stats(phenotype_t * in_pt)
+{
+	struct list_head * p;
+	phenotype_t * pt;
+	long long total_fitness = 0;
+	long long prev_gen_avg_fitness = in_pt->last_gen_avg_fitness;
+	long long tmp_fitness;
+	long dummy;
+	int i = 0;
+#ifdef CONFIG_FINGERPRINTING
+	int fp = in_pt->genetic->fingerprinting;
+	int numerical_fp;
+#endif
+
+	/* On a general phenotype, need to look at other metrics since
+	 * the fitness is normalized.  It always average the same.  It
+	 * assumes that this phenotype is registered last.
+	 */
+	if (in_pt->ops->calc_post_fitness) {
+
+#ifdef CONFIG_FINGERPRINTING
+		if (fp)
+			numerical_fp = create_fingerprint(in_pt->fp);
+
+		/* do we want this???? */
+		if ((fp && (in_pt->last_fingerprint == numerical_fp)) || !fp) {
+#else
+		if (1) {
+#endif
+
+#ifdef GENETIC_DEBUG_VERBOSE
+			printk(KERN_INFO "genetic_calc_stats() for %s\n", in_pt->name);
+#endif
+			list_for_each(p, &in_pt->genetic->phenotype) {
+				pt = list_entry(p, phenotype_t, phenotype);
+
+				/* for each child */
+				if (in_pt->uid & pt->uid && in_pt->uid != pt->uid) {
+					if (pt->avg_fitness) {
+						/* measure how far percentage-wise that we are from the top */
+						pt->from_top = (pt->last_gen_avg_fitness - pt->avg_fitness) * 100;
+#ifdef GENETIC_DEBUG_VERBOSE
+						printk("  name: %s from_top: %lld avg_fitness: %lld\n", pt->name, pt->from_top, pt->avg_fitness);
+#endif
+						divll(&pt->from_top, (pt->avg_fitness > 0) ? pt->avg_fitness : -pt->avg_fitness, &dummy);
+
+						total_fitness += pt->from_top;
+#ifdef GENETIC_DEBUG_VERBOSE
+						printk("  total_fitness: %lld\n", total_fitness);
+#endif
+					}
+				}
+
+				i++;
+
+			}
+
+		} else {
+				/* XXX horrible horrible hack...but
+				 * testing viability */
+				total_fitness = 0;
+				i = 1;
+		}
+
+		BUG_ON(!i);
+
+		in_pt->last_gen_avg_fitness = total_fitness;
+		divll(&in_pt->last_gen_avg_fitness, i, &dummy);
+
+#ifdef GENETIC_DEBUG_VERBOSE
+		printk("  in_pt->last_gent_avg_fitness: %lld\n", in_pt->last_gen_avg_fitness);
+#endif
+	} else {
+		/* calculate the avg fitness for this generation and avg fitness
+		 * so far */
+		for (i = 0; i < in_pt->num_children; i++)
+			total_fitness += in_pt->child_ranking[i]->fitness;
+
+		in_pt->last_gen_avg_fitness = total_fitness >> ilog2(in_pt->num_children);
+	}
+
+	/* Mutation rate calibration */
+	genetic_shift_mutation_rate(in_pt, prev_gen_avg_fitness,
+			in_pt->last_gen_avg_fitness);
+
+	in_pt->num_mutations = ((in_pt->num_children * in_pt->num_genes) * in_pt->mutation_rate) / 100;
+
+	/* calc new avg fitness */
+	tmp_fitness = in_pt->last_gen_avg_fitness - in_pt->avg_fitness;
+	divll(&tmp_fitness, in_pt->genetic->generation_number, &dummy);
+	in_pt->avg_fitness += tmp_fitness;
+
+	in_pt->fitness_history[in_pt->fitness_history_index++ & GENETIC_HISTORY_MASK] =
+		in_pt->last_gen_avg_fitness;
+
+#ifdef GENETIC_DEBUG_VERBOSE
+	printk("finished genetic_calc_stats()\n");
+#endif
+}
+
+
+void genetic_new_generation(genetic_t * genetic)
+{
+	struct list_head * tmp;
+
+	struct list_head * p;
+	phenotype_t * pt;
+
+	list_for_each(p, &genetic->phenotype) {
+		pt = list_entry(p, phenotype_t, phenotype);
+
+		/* Check to see if need to recalibrate fitness to take
+		   other phenotypes' rankings into account.  This
+		   should be ran after all phenotypes that have input
+		   have been ran. */
+		if (pt->ops->calc_post_fitness)
+			pt->ops->calc_post_fitness(pt);
+
+		dump_children(pt);
+
+		/* figure out top performers */
+		genetic_split_performers(pt);
+
+		/* calc stats */
+		genetic_calc_stats(pt);
+
+		dump_children(pt);
+
+		/* make some new children */
+		if (pt->num_genes)
+			pt->natural_selection(pt);
+
+		dump_children(pt);
+
+#ifdef CONFIG_FINGERPRINTING
+		if (pt->ops->get_fingerprint) {
+
+			pt->ops->get_fingerprint(pt);
+			reset_fp_snapshot(pt->fp_ss);
+
+			/* See if this generation was a top performer
+			 * for the current workload.
+			 * Do this after natural selection to get rid
+			 * of the bad apples
+			 */
+			update_top_performers(pt);
+
+			/* We know the workload, lets put some known
+			   good genes back in */
+			reintroduce_genes(pt);
+
+			pt->last_fingerprint = create_fingerprint(pt->fp);
+		}
+
+		dump_children(pt);
+#endif
+
+		/* mutate a couple of the next generation */
+		genetic_mutate(pt);
+
+		dump_children(pt);
+
+		/* Move the new children still sitting in the finished queue to
+		   the run queue */
+		tmp = pt->run_queue;
+		pt->run_queue = pt->finished_queue;
+		pt->finished_queue = tmp;
+
+		pt->child_number = 0;
+#if GENETIC_DEBUG
+		pt->debug_index = 0;
+#endif
+
+	}
+
+	genetic->child_number = 0;
+	genetic->generation_number++;
+
+}
+
+/**
+ * genetic_generic_random_mutate_gene - mutate child's gene to value in range
+ * @child: child whose gene we are mutating
+ * @gene_num: gene index from gene_param to mutate; gene must be unsigned long
+ *
+ * Mutate a gene picking a random value within the gene range that was
+ * specified in @child->gene_param.
+ */
+void genetic_generic_random_mutate_gene(genetic_child_t * child,
+		unsigned long gene_num)
+{
+	unsigned long *genes = (unsigned long *)child->genes;
+	unsigned long min = child->gene_param[gene_num].min;
+	unsigned long max = child->gene_param[gene_num].max;
+	unsigned long gene_value;
+	unsigned long range = max - min + 1;
+
+	/* create a mutation value */
+	get_random_bytes(&gene_value, sizeof(gene_value));
+
+	gene_value = gene_value % range;
+
+	genes[gene_num] = min + gene_value;
+}
+
+/**
+ * genetic_generic_iterative_mutate_gene
+ * @child: child whose gene we are mutating
+ * @gene_num: gene index from gene_param to mutate; gene must be unsigned long
+ */
+void genetic_generic_iterative_mutate_gene(genetic_child_t * child,
+		unsigned long gene_num)
+{
+	unsigned long *genes = (unsigned long *)child->genes;
+	unsigned long min = child->gene_param[gene_num].min;
+	unsigned long max = child->gene_param[gene_num].max;
+	long change;
+	unsigned long old_value = genes[gene_num];
+	unsigned long new_value;
+	unsigned long range = max - min + 1;
+
+	/* If under 5, random might work better */
+	if (range < 5)
+		return genetic_generic_random_mutate_gene(child, gene_num);
+
+	/* get the % of change */
+	get_random_bytes(&change, sizeof(change));
+
+	change = change % GENETIC_ITERATIVE_MUTATION_RANGE;
+
+	new_value = ((long)(change * range) / (long)100) + old_value;
+
+	if (new_value > max)
+		new_value = max;
+	else if (new_value < min)
+		new_value = min;
+
+	genes[gene_num] = new_value;
+}
+
+/**
+ * genetic_generic_mutate_child - mutate random gene in child
+ * @child: child whose gene we are mutating.
+ *
+ * Select a random gene and mutate it either using either the mutate_gene
+ * callback specified in '&struct gene_param' OR if that is NULL then use
+ * 'genetic_generic_random_mutate_gene()'
+ */
+void genetic_generic_mutate_child(genetic_child_t * child)
+{
+	long gene_num = -1;
+
+	/* pick a random gene */
+	get_random_bytes(&gene_num, sizeof(gene_num));
+
+	if (gene_num < 0)
+		gene_num = -gene_num;
+
+	gene_num = gene_num % child->num_genes;
+
+	if (child->gene_param[gene_num].mutate_gene)
+		child->gene_param[gene_num].mutate_gene(child, gene_num);
+	else
+		genetic_generic_random_mutate_gene(child, gene_num);
+}
+
+/**
+ * genetic_generic_mutate_child - set all genes to their initial value
+ */
+void genetic_create_child_defaults(genetic_child_t * child)
+{
+	int i;
+	unsigned long * genes = child->genes;
+
+	for (i = 0; i < child->num_genes; i++) {
+		genes[i] = child->gene_param[i].initial;
+	}
+}
+
+void genetic_create_child_spread(genetic_child_t * child,
+		unsigned long num_children)
+{
+	int i;
+	unsigned long range;
+	int range_incr;
+	int child_num = child->id;
+	long num_genes = child->num_genes;
+	unsigned long * genes = child->genes;
+
+	for (i = 0; i < num_genes; i++) {
+		range = child->gene_param[i].max - child->gene_param[i].min + 1;
+		range_incr = range / num_children;
+		if (range_incr)
+			genes[i] = child->gene_param[i].min +
+				(range_incr * child_num);
+		else
+			genes[i] = child->gene_param[i].min +
+				(child_num / (num_children / range));
+	}
+
+}
+
+#if 0
+/* Randomly pick which parent to use for each gene to create a child */
+void genetic_generic_combine_genes(genetic_child_t * parent_a,
+				   genetic_child_t * parent_b,
+				   genetic_child_t * child)
+{
+	unsigned long * genes_a = (unsigned long *)parent_a->genes;
+	unsigned long * genes_b = (unsigned long *)parent_b->genes;
+	unsigned long * child_genes = (unsigned long *)child->genes;
+
+	/* Assume parent_a and parent_b have same num_genes */
+	unsigned long num_genes = parent_a->num_genes;
+	int parent_selector;
+	int i;
+
+	get_random_bytes(&parent_selector, sizeof(parent_selector));
+
+	if ((sizeof(parent_selector) * 8) < num_genes)
+		BUG();
+
+	for (i = 0; i < num_genes; i++) {
+		/* Look at each bit to determine which parent to use */
+		if (parent_selector & 1) {
+			child_genes[i] = genes_a[i];
+		} else {
+			child_genes[i] = genes_b[i];
+		}
+		parent_selector >>= 1;
+	}
+}
+#else
+
+/**
+ * genetic_generic_combine_genes - create child using comb. of parent's genes
+ * @parent_a: gene doner
+ * @parent_b: gene doner
+ * @child: genes will be modified using a combination of a and b
+ */
+void genetic_generic_combine_genes(genetic_child_t * parent_a,
+				   genetic_child_t * parent_b,
+				   genetic_child_t * child)
+{
+	unsigned long * genes_a = (unsigned long *)parent_a->genes;
+	unsigned long * genes_b = (unsigned long *)parent_b->genes;
+	unsigned long * child_genes = (unsigned long *)child->genes;
+
+	/* Assume parent_a and parent_b have same num_genes */
+	unsigned long num_genes = parent_a->num_genes;
+	int percentage;
+	int i;
+
+	for (i = 0; i < num_genes; i++) {
+		get_random_bytes(&percentage, sizeof(percentage));
+
+		/* Get percentage */
+		percentage = percentage % 100;
+
+		if (percentage < 0)
+			percentage = -percentage;
+
+		/* Give child x% of parent A's genes value, plus
+		   100-x% of parent B's genes value */
+		child_genes[i] = (((genes_a[i]+1) * percentage) +
+			(genes_b[i] * (100 - percentage))) / 100;
+	}
+}
+#endif
+
+static int __init genetic_boot_setup(char *str)
+{
+	if (strcmp(str, "on") == 0)
+		genetic_lib_enabled = 1;
+	else if (strcmp(str, "off") == 0)
+		genetic_lib_enabled = 0;
+
+	return 1;
+}
+
+
+static int __init genetic_mutation_rate_change_setup(char *str)
+{
+	int i;
+
+	if (get_option(&str,&i)) {
+
+		if (i > GENETIC_MAX_MUTATION_RATE)
+			i = GENETIC_MAX_MUTATION_RATE;
+		else if (i < 0)
+			i = 0;
+
+		mutation_rate_change = i;
+	}
+
+	return 1;
+
+}
+__setup("genetic=", genetic_boot_setup);
+__setup("genetic_mutate_rate=", genetic_mutation_rate_change_setup);
Index: linux-2.6.23/lib/genetic-debug.c
===================================================================
--- /dev/null
+++ linux-2.6.23/lib/genetic-debug.c
@@ -0,0 +1,153 @@
+/*
+ * Genetic Algorithm Library Debugging Routines
+ *
+ * (C) Copyright 2006 IBM
+ * (C) Copyright 2006 Brandon Philips <brandon@ifup.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/genetic.h>
+#include <linux/seq_file.h>
+
+struct dentry * genetic_tree_root = NULL;
+
+/**
+ * genetic_stat_show - generates /debug/genetic/<name>/stats
+ */
+static int genetic_stat_show(struct seq_file *s, void *unused)
+{
+	genetic_t * genetic = (genetic_t *)s->private;
+
+	seq_printf(s, "name: %s\n", genetic->name);
+	seq_printf(s, "generation_number: %ld\n", genetic->generation_number);
+	seq_printf(s, "num_children: %ld\n", genetic->num_children);
+	seq_printf(s, "child_life_time: %ld\n", genetic->child_life_time);
+	seq_printf(s, "child_number: %ld\n", genetic->child_number);
+
+	return 0;
+}
+
+static int genetic_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, genetic_stat_show, inode->i_private);
+}
+
+static struct file_operations genetic_stat_operations = {
+	.open		= genetic_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+/**
+ * genetic_phenotype_average_show - /debug/genetic/<name>/phenotype_average
+ */
+static int genetic_phenotype_average_show(struct seq_file *s, void *unused)
+{
+	genetic_t * genetic = (genetic_t *)s->private;
+	struct list_head * p;
+	phenotype_t * pt;
+
+	list_for_each(p, &genetic->phenotype) {
+		pt = list_entry(p, phenotype_t, phenotype);
+		seq_printf(s, "%s: %lld\n", pt->name, pt->avg_fitness);
+	}
+
+	return 0;
+}
+
+static int genetic_phenotype_average_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, genetic_phenotype_average_show, inode->i_private);
+}
+
+static struct file_operations genetic_phenotype_average_operations = {
+	.open		= genetic_phenotype_average_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
+/**
+ * genetic_genes_show - /debug/genetic/<name>/gene
+ */
+int genetic_generic_gene_show(struct seq_file *s, void *unused)
+{
+	int i;
+	phenotype_t * pt = (phenotype_t *)s->private;
+
+	genetic_child_t * child = list_entry(pt->run_queue->next,
+						genetic_child_t, list);
+
+	unsigned long * genes = (unsigned long *)child->genes;
+
+	for (i = 0; i < pt->num_genes; i++)
+		seq_printf(s, "%s: %lu\n", child->gene_param[i].name, genes[i]);
+
+	return 0;
+}
+
+static int genetic_generic_gene_open(struct inode *inode, struct file *file)
+{
+	phenotype_t * pt = (phenotype_t *)inode->i_private;
+
+	return single_open(file, pt->ops->gene_show, inode->i_private);
+}
+
+static struct file_operations genetic_gene_operations = {
+	.open		= genetic_generic_gene_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
+static struct dentry *genetic_create_tree(const char *name, struct dentry *parent)
+{
+	struct dentry *dir = NULL;
+
+	if (!genetic_tree_root) {
+		genetic_tree_root = debugfs_create_dir("genetic", NULL);
+		if (!genetic_tree_root)
+			goto err;
+	}
+
+	if (!parent) parent = genetic_tree_root;
+
+	dir = debugfs_create_dir(name, parent);
+
+err:
+	return dir;
+}
+
+#if GENETIC_DEBUG
+/* Stores attributes into an array in the following format
+ * child_num fitness gene[0] gene[1] .... gene[num_genes-1]
+ * Add +1 to GENETIC_NUM_DEBUG_POINTS if add another dump_children
+ * call
+ */
+void dump_children(phenotype_t * pt)
+{
+	int i, j;
+	long * genes;
+	unsigned long debug_size = pt->debug_size;
+
+	for (i = 0; i < pt->num_children; i++) {
+		pt->debug_history[pt->debug_index++ % debug_size] = pt->child_ranking[i]->id;
+		pt->debug_history[pt->debug_index++ % debug_size] = pt->child_ranking[i]->fitness;
+
+		genes = (long *)pt->child_ranking[i]->genes;
+
+		for (j = 0; j < pt->child_ranking[i]->num_genes; j++) {
+			pt->debug_history[pt->debug_index++ % debug_size] = genes[j];
+		}
+	}
+}
+#else
+void dump_children(phenotype_t * pt) {}
+#endif /* GENETIC_DEBUG */
Index: linux-2.6.23/lib/fingerprinting.c
===================================================================
--- /dev/null
+++ linux-2.6.23/lib/fingerprinting.c
@@ -0,0 +1,291 @@
+static int create_fingerprint(struct fingerprint * fp)
+{
+	int numerical_fp = 0;
+
+	numerical_fp |= fp->type;
+	numerical_fp <<= 1;
+
+	numerical_fp |= fp->pattern;
+	numerical_fp <<= 1;
+
+	numerical_fp |= fp->size;
+
+	return numerical_fp;
+}
+
+static long long get_top_fitness(phenotype_t * pt, struct fingerprint * fp)
+{
+	return pt->top_fitness[fp->type][fp->pattern][fp->size];
+}
+
+
+static int top_fitness_open(struct inode *inode, struct file *file)
+{
+	phenotype_t * pt = (phenotype_t *)inode->i_private;
+
+	return single_open(file, pt->ops->top_fitness_show, inode->i_private);
+}
+
+static struct file_operations top_fitness_ops = {
+	.open		= top_fitness_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int snapshot_open(struct inode *inode, struct file *file)
+{
+	phenotype_t * pt = (phenotype_t *)inode->i_private;
+
+	return single_open(file, pt->ops->snapshot_show, inode->i_private);
+}
+
+static struct file_operations snapshot_ops = {
+	.open		= snapshot_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int state_open(struct inode *inode, struct file *file)
+{
+	phenotype_t * pt = (phenotype_t *)inode->i_private;
+
+	return single_open(file, pt->ops->state_show, inode->i_private);
+}
+
+static struct file_operations state_ops = {
+	.open		= state_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
+int genetic_init_fingerprinting(phenotype_t * pt)
+{
+	int i, j, k;
+	struct genetic_ops * ops = pt->ops;
+	int num_genes = pt->num_genes;
+
+	if (num_genes) {
+
+		pt->fp = (struct fingerprint *)kmalloc(
+				sizeof(struct fingerprint), GFP_KERNEL);
+
+		if (!pt->fp) {
+			printk(KERN_ERR "genetic_register_phenotype: not enough"
+				       	"memory\n");
+			return -ENOMEM;
+		}
+
+		reset_fp(pt->fp);
+
+		pt->fp_ss = (struct fp_snapshot *)kmalloc(
+				sizeof(struct fp_snapshot), GFP_KERNEL);
+
+		if (!pt->fp_ss) {
+			printk(KERN_ERR "genetic_register_phenotype: not enough"
+				       	"memory\n");
+			return -ENOMEM;
+		}
+
+		reset_fp_snapshot(pt->fp_ss);
+
+		pt->top_child = (unsigned long ***)kmalloc(
+				sizeof(unsigned long ***) * 2, GFP_KERNEL);
+
+		if (!pt->top_child) {
+			printk(KERN_ERR "genetic_register_phenotype: not enough"
+				       	"memory\n");
+			return -ENOMEM;
+		}
+
+		for (i = 0; i < 2; i++) {
+			pt->top_child[i] = (unsigned long **)kmalloc(
+					sizeof(unsigned long **) * 2,
+					GFP_KERNEL);
+
+			if (!pt->top_child[i]) {
+				printk(KERN_ERR "genetic_register_phenotype:\
+						 not enough memory\n");
+				return -ENOMEM;
+			}
+
+			for (j = 0; j < 2; j++) {
+				pt->top_child[i][j] = (unsigned long *)kmalloc(
+						sizeof(unsigned long *) * 2,
+						GFP_KERNEL);
+
+				if (!pt->top_child[i][j]) {
+					printk(KERN_ERR "genetic_register_phenotype: not enough memory\n");
+					return -ENOMEM;
+				}
+
+				for (k = 0; k < 2; k++) {
+					pt->top_child[i][j][k] = (unsigned long)ops->create_top_genes(pt);
+					if (!pt->top_child[i][j][k])
+						return -ENOMEM;
+				}
+			}
+		}
+	} /* if (num_genes) */
+
+	pt->top_fitness = (long long ***)kmalloc(sizeof(long long ***) * 2, GFP_KERNEL);
+	if (!pt->top_fitness) {
+		printk(KERN_ERR "genetic_register_phenotype: not enough"
+				"memory\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < 2; i++) {
+		pt->top_fitness[i] = (long long **)kmalloc(sizeof(long long **) * 2, GFP_KERNEL);
+		if (!pt->top_fitness[i]) {
+			printk(KERN_ERR "genetic_register_phenotype: not"
+					"enough memory\n");
+			return -ENOMEM;
+		}
+
+		for (j = 0; j < 2; j++) {
+			pt->top_fitness[i][j] = (long long *)kmalloc(
+					sizeof(long long *) * 2,
+					GFP_KERNEL);
+
+			if (!pt->top_fitness[i][j]) {
+				printk(KERN_ERR "genetic_register_phenotype: "
+						"not enough memory\n");
+				return -ENOMEM;
+			}
+
+			for (k = 0; k < 2; k++) {
+				pt->top_fitness[i][j][k] = 0;
+			}
+		}
+	}
+
+	pt->last_fingerprint = 0;
+
+	if (pt->genetic->fingerprinting_dir) {
+		pt->fp_dir = genetic_create_tree(pt->name,
+						 pt->genetic->fingerprinting_dir);
+
+		if (ops->top_fitness_show)
+			debugfs_create_file("top_fitness", S_IFREG|S_IRUGO,
+				pt->fp_dir, pt, &top_fitness_ops);
+
+		if (ops->snapshot_show)
+			debugfs_create_file("snapshot", S_IFREG|S_IRUGO,
+				pt->fp_dir, pt, &snapshot_ops);
+
+		if (ops->state_show)
+			debugfs_create_file("state", S_IFREG|S_IRUGO,
+				pt->fp_dir, pt, &state_ops);
+	}
+
+	return 0;
+}
+
+static void decay_fitness(phenotype_t * pt, struct fingerprint * fp)
+{
+	long long fitness;
+	long dummy;
+
+	fitness = get_top_fitness(pt, fp);
+
+	/* reduce the fitness to eventually get new genes in */
+	fitness *= FP_DECAY;
+	divll(&fitness, 100, &dummy);
+
+	pt->top_fitness[fp->type][fp->pattern][fp->size] = fitness;
+}
+
+static void update_phenotype_top_performer(phenotype_t * pt, struct fingerprint * fp)
+{
+	long long top_fitness;
+	unsigned long * genes;
+	long long * avg_genes;
+	long dummy;
+	int i, j;
+
+
+	/* Decay the top fitness so not to have a fluke and have a
+	 * high set which are less than optimal.  So decay the top
+	 * fitness so eventually these genes are phased out.
+	 */
+	decay_fitness(pt, fp);
+
+	top_fitness = get_top_fitness(pt, fp);
+
+	if (pt->last_gen_avg_fitness >= top_fitness) {
+
+		pt->top_fitness[fp->type][fp->pattern][fp->size] = pt->last_gen_avg_fitness;
+
+		/* We don't need to track this if there's no genes! */
+		if (!pt->num_genes)
+			return;
+
+		avg_genes = (long long *)kmalloc(sizeof(long long) * pt->num_genes, GFP_KERNEL);
+		if (!avg_genes) {
+			printk(KERN_ERR "update_top_performers: unable to alloc space\n");
+			return;
+		}
+
+		memset(avg_genes, 0, sizeof(long long) * pt->num_genes);
+
+		for (i = 0; i < pt->num_genes; i++) {
+			for (j = 0; j < pt->num_children; j++) {
+				genes = pt->child_ranking[j]->genes;
+				avg_genes[i] += genes[i];
+			}
+		}
+
+		for (j = 0; j < pt->num_genes; j++)
+			divll(&avg_genes[j], pt->num_children, &dummy);
+
+		genes = (unsigned long *)pt->top_child[fp->type][fp->pattern][fp->size];
+		for (j = 0; j < pt->num_genes; j++)
+			genes[j] = avg_genes[j];
+
+		kfree(avg_genes);
+	}
+}
+
+static void update_top_performers(phenotype_t * master)
+{
+	phenotype_t * pt;
+	struct list_head * p;
+
+	list_for_each(p, &master->genetic->phenotype) {
+		pt = list_entry(p, phenotype_t, phenotype);
+
+		if (master->uid & pt->uid && master->uid != pt->uid) {
+			update_phenotype_top_performer(pt, master->fp);
+		}
+	}
+	update_phenotype_top_performer(master, master->fp);
+}
+
+static void reintroduce_genes(phenotype_t * master)
+{
+	struct fingerprint * fp = master->fp;
+	phenotype_t * pt;
+	unsigned long * top_genes;
+	unsigned long * genes;
+	struct list_head * p;
+	int i;
+
+	list_for_each(p, &master->genetic->phenotype) {
+		pt = list_entry(p, phenotype_t, phenotype);
+
+		if (pt->num_genes) {
+
+			/* Do this more intelligently, so can have n-points on
+			   the fingerprint */
+			/* just take the first one */
+			top_genes = (unsigned long *)pt->top_child[fp->type][fp->pattern][fp->size];
+			genes = pt->child_ranking[0]->genes;
+			for (i = 0; i < pt->num_children; i++)
+				genes[i] = top_genes[i];
+		}
+	}
+}
Index: linux-2.6.23/include/linux/fingerprinting.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/linux/fingerprinting.h
@@ -0,0 +1,125 @@
+#ifndef __LINUX_FINGERPRINTING_H
+#define __LINUX_FINGERPRINTING_H
+
+/*
+ * include/linux/fingerprinting.h
+ *
+ * Jake Moilanen <moilanen@austin.ibm.com>
+ * Copyright (C) 2006 IBM
+ *
+ * I/O Workload Fingerprinting
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as published
+ * by the Free Software Foundation.
+*/
+
+#include <linux/types.h>
+#include <linux/bio.h>
+
+#define FP_TYPE_READ	0
+#define FP_TYPE_WRITE	1
+#define FP_PATTERN_SEQ	0
+#define FP_PATTERN_RAND	1
+#define FP_SIZE_SMALL	0
+#define FP_SIZE_LARGE	1
+#define FP_NUM_POINTS	(2 * 2 * 2)
+
+struct fingerprint {
+	__u8 type;
+	__u8 pattern;
+	__u8 size;
+};
+
+struct fp_snapshot {
+	/* type */
+	unsigned long reads;
+	unsigned long writes;
+	/* pattern */
+	unsigned long head_pos;
+	unsigned long avg_dist;
+	/* size */
+	unsigned long avg_size;
+};
+
+/* Number of reads/writes before classified as read */
+#define FP_CLASS_READ_WRITE_RATIO	2
+
+/* Number of sectors before pattern is random */
+#define FP_CLASS_PATTERN_RAND		25
+
+/* Number of sectors before size is large */
+#define FP_CLASS_SIZE_LARGE		8
+
+extern void update_fp_snapshot(struct bio * bio);
+extern void calc_fp(struct fingerprint * fp, struct fp_snapshot * fp_ss);
+extern void reset_fp_snapshot(struct fp_snapshot * ss);
+extern void reset_fp(struct fingerprint * fp);
+extern void consolidate_fp_snapshot(struct fp_snapshot * master, struct fp_snapshot * instance);
+extern int fingerprint_state_show(struct seq_file *s, void *unused);
+extern int fingerprint_snapshot_show(struct seq_file *s, void *unused);
+extern int fingerprint_top_fitness_show(struct seq_file *s, void *unused);
+
+/* XXX do this more intelligently */
+#ifndef DIVLL_OP
+#define DIVLL_OP
+#if BITS_PER_LONG >= 64
+
+static inline void divll(long long *n, long div, long *rem)
+{
+        *rem = *n % div;
+        *n /= div;
+}
+
+#else
+
+static inline void divl(int32_t high, int32_t low,
+                        int32_t div,
+                        int32_t *q, int32_t *r)
+{
+        int64_t n = (u_int64_t)high << 32 | low;
+        int64_t d = (u_int64_t)div << 31;
+        int32_t q1 = 0;
+        int c = 32;
+        while (n > 0xffffffff) {
+                q1 <<= 1;
+                if (n >= d) {
+                        n -= d;
+                        q1 |= 1;
+                }
+                d >>= 1;
+                c--;
+        }
+        q1 <<= c;
+        if (n) {
+                low = n;
+                *q = q1 | (low / div);
+                *r = low % div;
+        } else {
+                *r = 0;
+                *q = q1;
+        }
+        return;
+}
+
+static inline void divll(long long *n, long div, long *rem)
+{
+        int32_t low, high;
+        low = *n & 0xffffffff;
+        high = *n >> 32;
+        if (high) {
+                int32_t high1 = high % div;
+                int32_t low1 = low;
+                high /= div;
+                divl(high1, low1, div, &low, (int32_t *)rem);
+                *n = (int64_t)high << 32 | low;
+        } else {
+                *n = low / div;
+                *rem = low % div;
+        }
+}
+#endif
+
+#endif /* #ifndef divll */
+
+#endif /* __LINUX_FINGERPRINTINT_H */
Index: linux-2.6.23/block/fingerprinting.c
===================================================================
--- /dev/null
+++ linux-2.6.23/block/fingerprinting.c
@@ -0,0 +1,205 @@
+/*
+ * block/fingerprinting.c
+ *
+ * Jake Moilanen <moilanen@austin.ibm.com>
+ * Copyright (C) 2006 IBM
+ *
+ * I/O Workload Fingerprinting
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as published
+ * by the Free Software Foundation. 
+*/
+/* TODOS:
+ * - Abstract so no so IO specific
+ * - Abstract types
+ */
+
+#include <linux/genhd.h>
+#include <linux/blkdev.h>
+#include <linux/fingerprinting.h>
+#include <linux/genetic.h>
+
+int fingerprint_state_show(struct seq_file *s, void *unused)
+{
+	phenotype_t * pt = (phenotype_t *)s->private;
+	struct fingerprint * fp = pt->fp;
+
+	if (fp->type == FP_TYPE_READ)
+		seq_printf(s,    "read\t(%d)\n", FP_TYPE_READ);
+	else
+		seq_printf(s,    "write\t(%d)\n", FP_TYPE_WRITE);
+
+	if (fp->pattern == FP_PATTERN_SEQ)
+		seq_printf(s,    "sequential\t(%d)\n", FP_PATTERN_SEQ);
+	else
+		seq_printf(s,    "random\t(%d)\n", FP_PATTERN_RAND);
+
+	if (fp->size == FP_SIZE_SMALL)
+		seq_printf(s,    "small\t(%d)\n", FP_SIZE_SMALL);
+	else
+		seq_printf(s,    "large\t(%d)\n", FP_SIZE_LARGE);
+
+	return 0;
+}
+
+int fingerprint_snapshot_show(struct seq_file *s, void *unused)
+{
+	phenotype_t * pt = (phenotype_t *)s->private;
+	struct fp_snapshot * ss = pt->fp_ss;
+
+	seq_printf(s,    "read: %ld\n", ss->reads);
+	seq_printf(s,    "write: %ld\n", ss->writes);
+
+	seq_printf(s,    "avg_dist: %ld\n", ss->avg_dist);
+	seq_printf(s,    "avg_size: %ld\n", ss->avg_size);
+
+	return 0;
+}
+
+
+int fingerprint_top_fitness_show(struct seq_file *s, void *unused)
+{
+	int i, j, k;
+	phenotype_t * pt = (phenotype_t *)s->private;
+
+	for (i = 0; i < 2; i++)
+		for (j = 0; j < 2; j++)
+			for (k = 0; k < 2; k++)
+				seq_printf(s, "top_fitness[%d][%d][%d]: %lld\n",
+					     i, j, k, pt->top_fitness[i][j][k]);
+
+	return 0;
+}
+
+
+/* This assumes that address matches up w/ head_pos */
+static void update_avg_dist(struct fp_snapshot * ss, long head_pos)
+{
+	long long tmp_dist;
+	unsigned long total_ops = ss->reads + ss->writes;
+	long dummy;
+
+	/* set it the first time through */
+	if (!ss->head_pos) {
+		ss->head_pos = head_pos;
+		return;
+	}
+	tmp_dist = ss->head_pos - head_pos;
+	if (tmp_dist < 0)
+		tmp_dist = -tmp_dist;
+
+	tmp_dist = tmp_dist - ss->avg_dist;
+
+	divll(&tmp_dist, total_ops, &dummy);
+	ss->avg_dist += tmp_dist;
+
+	ss->head_pos = head_pos;
+
+}
+
+static void update_avg_size(struct fp_snapshot * ss, unsigned long size)
+{
+	unsigned long total_ops = ss->reads + ss->writes;
+	long long tmp_size;
+	long dummy;
+
+	tmp_size = size - ss->avg_size;
+	divll(&tmp_size, total_ops, &dummy);
+	ss->avg_size += tmp_size;
+//	ss->avg_size += (size - ss->avg_size) / total_ops;
+}
+
+void update_fp_snapshot(struct bio * bio)
+{
+	struct fp_snapshot * ss = bio->bi_bdev->bd_disk->fp_ss;
+
+	/* update type */
+	if (bio_data_dir(bio) == READ)
+		ss->reads++;
+	else
+		ss->writes++;
+
+	/* update pattern */
+//	update_avg_dist(ss, bio_to_phys(bio));
+	update_avg_dist(ss, bio->bi_sector);
+
+	/* update size */
+//	update_avg_size(ss, bio_iovec(bio)->bv_len);
+	update_avg_size(ss, bio_sectors(bio));
+
+}
+
+/* Use this when there's multiple disks, and need to consolidate to a
+ * system wide fingerprint
+ */
+void consolidate_fp_snapshot(struct fp_snapshot * master, struct fp_snapshot * instance)
+{
+	unsigned long total_ops;
+	long dummy;
+	long long total_dist;
+	long long total_size;
+
+	BUG_ON(!master);
+	BUG_ON(!instance);
+
+	total_dist = master->avg_dist * (master->reads + master->writes);
+	total_size = master->avg_size * (master->reads + master->writes);
+
+	/* update operations */
+	master->reads += instance->reads;
+	master->writes += instance->writes;
+	total_ops = master->reads + master->writes;
+
+	/* update distance */
+	total_dist += (instance->avg_dist * (instance->reads + instance->writes));
+	if (total_ops) {
+		divll(&total_dist, total_ops, &dummy);
+		master->avg_dist = total_dist;
+	} else
+		master->avg_dist = 0;
+
+	/* update size */
+	total_size += (instance->avg_size * (instance->reads + instance->writes));
+	if (total_ops) {
+		divll(&total_size, total_ops, &dummy);
+		master->avg_size = total_size;
+	} else
+		master->avg_size = 0;
+}
+
+void reset_fp_snapshot(struct fp_snapshot * ss)
+{
+	memset(ss, 0, sizeof(struct fp_snapshot));
+}
+
+void reset_fp(struct fingerprint * fp)
+{
+	memset(fp, 0, sizeof(struct fingerprint));
+}
+
+//void calc_fp(struct fingerprint * fp, struct fp_snapshot * fp_ss, struct block_device * dev)
+void calc_fp(struct fingerprint * fp, struct fp_snapshot * fp_ss)
+{
+	/* type */
+	if (fp_ss->reads > (fp_ss->writes * FP_CLASS_READ_WRITE_RATIO))
+		fp->type = FP_TYPE_READ;
+	else
+		fp->type = FP_TYPE_WRITE;
+
+	/* pattern */
+//	if (fp_ss->avg_dist >= (block_size(dev) * FP_CLASS_PATTERN_RAND))
+	if (fp_ss->avg_dist >= (512 * FP_CLASS_PATTERN_RAND))
+		fp->pattern = FP_PATTERN_RAND;
+	else
+		fp->pattern = FP_PATTERN_SEQ;
+
+	/* size */
+	if (fp_ss->avg_size > FP_CLASS_SIZE_LARGE)
+		fp->size = FP_SIZE_LARGE;
+	else
+		fp->size = FP_SIZE_SMALL;
+}
+
+
+
Index: linux-2.6.23/block/ll_rw_blk.c
===================================================================
--- linux-2.6.23.orig/block/ll_rw_blk.c
+++ linux-2.6.23/block/ll_rw_blk.c
@@ -30,6 +30,7 @@
 #include <linux/cpu.h>
 #include <linux/blktrace_api.h>
 #include <linux/fault-inject.h>
+#include <linux/fingerprinting.h>
 
 /*
  * for max sense size
@@ -2893,6 +2894,9 @@ static void init_request_from_bio(struct
 {
 	req->cmd_type = REQ_TYPE_FS;
 
+#ifdef CONFIG_FINGERPRINTING
+	update_fp_snapshot(bio);
+#endif
 	/*
 	 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
 	 */
Index: linux-2.6.23/include/linux/genhd.h
===================================================================
--- linux-2.6.23.orig/include/linux/genhd.h
+++ linux-2.6.23/include/linux/genhd.h
@@ -67,6 +67,7 @@ struct partition {
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/workqueue.h>
+#include <linux/fingerprinting.h>
 
 struct partition {
 	unsigned char boot_ind;		/* 0x80 - active */
@@ -91,6 +92,7 @@ struct gendisk {
 	struct disk_stats dkstats;
 #endif
 	struct work_struct async_notify;
+	struct fp_snapshot * fp_ss;
 };
 
 /* Structure for sysfs attributes on block devices */
Index: linux-2.6.23/block/genhd.c
===================================================================
--- linux-2.6.23.orig/block/genhd.c
+++ linux-2.6.23/block/genhd.c
@@ -445,6 +445,20 @@ static ssize_t disk_stats_read(struct ge
 		jiffies_to_msecs(disk_stat_read(disk, io_ticks)),
 		jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
 }
+static ssize_t disk_fp_read(struct gendisk * disk, char *page)
+{
+	return sprintf(page, "reads: %llx\n"
+		       "writes: %llx\n"
+		       "head_pos: %llx\n"
+		       "avg_dist: %llx\n"
+		       "avg_size: %llx\n",
+		       (unsigned long long)disk->fp_ss->reads,
+		       (unsigned long long)disk->fp_ss->writes,
+		       (unsigned long long)disk->fp_ss->head_pos,
+		       (unsigned long long)disk->fp_ss->avg_dist,
+		       (unsigned long long)disk->fp_ss->avg_size);
+}
+
 static struct disk_attribute disk_attr_uevent = {
 	.attr = {.name = "uevent", .mode = S_IWUSR },
 	.store	= disk_uevent_store
@@ -473,6 +487,10 @@ static struct disk_attribute disk_attr_s
 	.attr = {.name = "stat", .mode = S_IRUGO },
 	.show	= disk_stats_read
 };
+static struct disk_attribute disk_attr_fp = {
+	.attr = {.name = "fp", .mode = S_IRUGO },
+	.show	= disk_fp_read
+};
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 
@@ -513,6 +531,7 @@ static struct attribute * default_attrs[
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&disk_attr_fail.attr,
 #endif
+	&disk_attr_fp.attr,
 	NULL,
 };
 
@@ -745,6 +764,10 @@ struct gendisk *alloc_disk_node(int mino
 		INIT_WORK(&disk->async_notify,
 			media_change_notify_thread);
 	}
+
+	disk->fp_ss = kmalloc(sizeof(struct fp_snapshot), GFP_KERNEL);
+	memset(disk->fp_ss, 0, sizeof(struct fp_snapshot));
+
 	return disk;
 }
 
Index: linux-2.6.23/block/Kconfig
===================================================================
--- linux-2.6.23.orig/block/Kconfig
+++ linux-2.6.23/block/Kconfig
@@ -65,3 +65,9 @@ config BLK_DEV_BSG
 endif # BLOCK
 
 source block/Kconfig.iosched
+
+config FINGERPRINTING
+       bool "I/O Workload Fingerprinting"
+       help
+         Say Y here if you want workload data to be classified and
+         used to tune the I/O schedulers.  Otherwise say N.
\ No newline at end of file
Index: linux-2.6.23/block/Makefile
===================================================================
--- linux-2.6.23.orig/block/Makefile
+++ linux-2.6.23/block/Makefile
@@ -11,3 +11,6 @@ obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadli
 obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
 
 obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
+
+
+obj-$(CONFIG_FINGERPRINTING)	+= fingerprinting.o

[-- Attachment #6: improve-relatime-2.6.23.patch --]
[-- Type: text/x-diff, Size: 7924 bytes --]

Subject: [patch] [patch] implement smarter atime updates support
From: Ingo Molnar <mingo@elte.hu>

change relatime updates to be performed once per day. This makes
relatime a compatible solution for HSM, mailer-notification and
tmpwatch applications too.

also add the CONFIG_DEFAULT_RELATIME kernel option, which makes
"norelatime" the default for all mounts without an extra kernel
boot option.

add the "default_relatime=0" boot option to turn this off.

also add the /proc/sys/fs/default_relatime flag which can be changed
runtime to modify the behavior of subsequent new mounts.

tested by moving the date forward:

   # date
   Sun Aug  5 22:55:14 CEST 2007
   # date -s "Tue Aug  7 22:55:14 CEST 2007"
   Tue Aug  7 22:55:14 CEST 2007

access to a file did not generate disk IO before the date was set, and
it generated exactly one IO after the date was set.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/kernel-parameters.txt |    8 +++++
 fs/Kconfig                          |   22 ++++++++++++++
 fs/inode.c                          |   53 +++++++++++++++++++++++++++---------
 fs/namespace.c                      |   24 ++++++++++++++++
 include/linux/mount.h               |    3 ++
 kernel/sysctl.c                     |   17 +++++++++++
 6 files changed, 114 insertions(+), 13 deletions(-)

Index: linux/Documentation/kernel-parameters.txt
===================================================================
--- linux.orig/Documentation/kernel-parameters.txt
+++ linux/Documentation/kernel-parameters.txt
@@ -525,6 +525,10 @@ and is between 256 and 4096 characters. 
 			This is a 16-member array composed of values
 			ranging from 0-255.
 
+	default_relatime=
+			[FS] mount all filesystems with relative atime
+			updates by default.
+
 	default_utf8=   [VT]
 			Format=<0|1>
 			Set system-wide default UTF-8 mode for all tty's.
@@ -1468,6 +1472,10 @@ and is between 256 and 4096 characters. 
 			Format: <reboot_mode>[,<reboot_mode2>[,...]]
 			See arch/*/kernel/reboot.c or arch/*/kernel/process.c			
 
+	relatime_interval=
+			[FS] relative atime update frequency, in seconds.
+			(default: 1 day: 86400 seconds)
+
 	reserve=	[KNL,BUGS] Force the kernel to ignore some iomem area
 
 	reservetop=	[X86-32]
Index: linux/fs/Kconfig
===================================================================
--- linux.orig/fs/Kconfig
+++ linux/fs/Kconfig
@@ -2060,6 +2060,28 @@ config 9P_FS
 
 endmenu
 
+config DEFAULT_RELATIME
+	bool "Mount all filesystems with relatime by default"
+	default y
+	help
+	  If you say Y here, all your filesystems will be mounted
+	  with the "relatime" mount option. This eliminates many atime
+	  ('file last accessed' timestamp) updates (which otherwise
+	  is performed on every file access and generates a write
+	  IO to the inode) and thus speeds up IO. Atime is still updated,
+	  but only once per day.
+
+	  The mtime ('file last modified') and ctime ('file created')
+	  timestamp are unaffected by this change.
+
+	  Use the "norelatime" kernel boot option to turn off this
+	  feature.
+
+config DEFAULT_RELATIME_VAL
+	int
+	default "1" if DEFAULT_RELATIME
+	default "0"
+
 if BLOCK
 menu "Partition Types"
 
Index: linux/fs/inode.c
===================================================================
--- linux.orig/fs/inode.c
+++ linux/fs/inode.c
@@ -1162,6 +1162,41 @@ sector_t bmap(struct inode * inode, sect
 }
 EXPORT_SYMBOL(bmap);
 
+/*
+ * Relative atime updates frequency (default: 1 day):
+ */
+int relatime_interval __read_mostly = 24*60*60;
+
+/*
+ * With relative atime, only update atime if the
+ * previous atime is earlier than either the ctime or
+ * mtime.
+ */
+static int relatime_need_update(struct inode *inode, struct timespec now)
+{
+	/*
+	 * Is mtime younger than atime? If yes, update atime:
+	 */
+	if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0)
+		return 1;
+	/*
+	 * Is ctime younger than atime? If yes, update atime:
+	 */
+	if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0)
+		return 1;
+
+	/*
+	 * Is the previous atime value older than a day? If yes,
+	 * update atime:
+	 */
+	if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= relatime_interval)
+		return 1;
+	/*
+	 * Good, we can skip the atime update:
+	 */
+	return 0;
+}
+
 /**
  *	touch_atime	-	update the access time
  *	@mnt: mount the inode is accessed on
@@ -1191,22 +1226,14 @@ void touch_atime(struct vfsmount *mnt, s
 			return;
 		if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
 			return;
-
-		if (mnt->mnt_flags & MNT_RELATIME) {
-			/*
-			 * With relative atime, only update atime if the
-			 * previous atime is earlier than either the ctime or
-			 * mtime.
-			 */
-			if (timespec_compare(&inode->i_mtime,
-						&inode->i_atime) < 0 &&
-			    timespec_compare(&inode->i_ctime,
-						&inode->i_atime) < 0)
+	}
+	now = current_fs_time(inode->i_sb);
+	if (mnt) {
+		if (mnt->mnt_flags & MNT_RELATIME)
+			if (!relatime_need_update(inode, now))
 				return;
-		}
 	}
 
-	now = current_fs_time(inode->i_sb);
 	if (timespec_equal(&inode->i_atime, &now))
 		return;
 
Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c
+++ linux/fs/namespace.c
@@ -1107,6 +1107,7 @@ int do_add_mount(struct vfsmount *newmnt
 		goto unlock;
 
 	newmnt->mnt_flags = mnt_flags;
+
 	if ((err = graft_tree(newmnt, nd)))
 		goto unlock;
 
@@ -1362,6 +1363,24 @@ int copy_mount_options(const void __user
 }
 
 /*
+ * Allow users to disable (or enable) atime updates via a .config
+ * option or via the boot line, or via /proc/sys/fs/default_relatime:
+ */
+int default_relatime __read_mostly = CONFIG_DEFAULT_RELATIME_VAL;
+
+static int __init set_default_relatime(char *str)
+{
+	get_option(&str, &default_relatime);
+
+	printk(KERN_INFO "Mount all filesystems with"
+		"default relative atime updates: %s.\n",
+		default_relatime ? "enabled" : "disabled");
+
+	return 1;
+}
+__setup("default_relatime=", set_default_relatime);
+
+/*
  * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
  * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
  *
@@ -1409,6 +1428,11 @@ long do_mount(char *dev_name, char *dir_
 		mnt_flags |= MNT_NODIRATIME;
 	if (flags & MS_RELATIME)
 		mnt_flags |= MNT_RELATIME;
+	else if (default_relatime &&
+				!(flags & (MNT_NOATIME | MNT_NODIRATIME))) {
+		mnt_flags |= MNT_RELATIME;
+		flags |= MS_RELATIME;
+	}
 
 	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
 		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME);
Index: linux/include/linux/mount.h
===================================================================
--- linux.orig/include/linux/mount.h
+++ linux/include/linux/mount.h
@@ -103,5 +103,8 @@ extern void shrink_submounts(struct vfsm
 extern spinlock_t vfsmount_lock;
 extern dev_t name_to_dev_t(char *name);
 
+extern int default_relatime;
+extern int relatime_interval;
+
 #endif
 #endif /* _LINUX_MOUNT_H */
Index: linux/kernel/sysctl.c
===================================================================
--- linux.orig/kernel/sysctl.c
+++ linux/kernel/sysctl.c
@@ -30,6 +30,7 @@
 #include <linux/capability.h>
 #include <linux/smp_lock.h>
 #include <linux/fs.h>
+#include <linux/mount.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/kobject.h>
@@ -1206,6 +1207,22 @@ static ctl_table fs_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "default_relatime",
+		.data		= &default_relatime,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "relatime_interval",
+		.data		= &relatime_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
 	{
 		.ctl_name	= CTL_UNNUMBERED,

[-- Attachment #7: sched-cfs-boost-2.6.23.patch --]
[-- Type: text/x-diff, Size: 7378 bytes --]

---
 arch/i386/kernel/ioport.c   |   17 ++++++++++++++---
 arch/x86_64/kernel/ioport.c |   12 ++++++++++--
 drivers/block/loop.c        |    5 ++++-
 include/linux/sched.h       |    7 +++++++
 kernel/Kconfig.preempt      |   17 +++++++++++++++++
 kernel/sched.c              |   40 ++++++++++++++++++++++++++++++++++++++++
 kernel/workqueue.c          |    2 +-
 mm/oom_kill.c               |    4 +++-
 8 files changed, 96 insertions(+), 8 deletions(-)

Index: linux/arch/i386/kernel/ioport.c
===================================================================
--- linux.orig/arch/i386/kernel/ioport.c
+++ linux/arch/i386/kernel/ioport.c
@@ -64,9 +64,17 @@ asmlinkage long sys_ioperm(unsigned long
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
 		return -EINVAL;
-	if (turn_on && !capable(CAP_SYS_RAWIO))
-		return -EPERM;
-
+	if (turn_on) {
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+		/*
+		 * Task will be accessing hardware IO ports,
+		 * mark it as special with the scheduler too:
+		 */
+#ifdef CONFIG_BOOST_PRIVILEGED_TASKS
+		sched_privileged_task(current);
+#endif
+	}
 	/*
 	 * If it's the first ioperm() call in this thread's lifetime, set the
 	 * IO bitmap up. ioperm() is much less timing critical than clone(),
@@ -145,6 +153,9 @@ asmlinkage long sys_iopl(unsigned long u
 	if (level > old) {
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
+#ifdef CONFIG_BOOST_PRIVILEGED_TASKS
+		sched_privileged_task(current);
+#endif
 	}
 	t->iopl = level << 12;
 	regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
Index: linux/arch/x86_64/kernel/ioport.c
===================================================================
--- linux.orig/arch/x86_64/kernel/ioport.c
+++ linux/arch/x86_64/kernel/ioport.c
@@ -41,8 +41,13 @@ asmlinkage long sys_ioperm(unsigned long
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
 		return -EINVAL;
-	if (turn_on && !capable(CAP_SYS_RAWIO))
-		return -EPERM;
+	if (turn_on) {
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+#ifdef CONFIG_BOOST_PRIVILEGED_TASKS
+		sched_privileged_task(current);
+#endif
+	}
 
 	/*
 	 * If it's the first ioperm() call in this thread's lifetime, set the
@@ -113,6 +118,9 @@ asmlinkage long sys_iopl(unsigned int le
 	if (level > old) {
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
+#ifdef CONFIG_BOOST_PRIVILEGED_TASKS
+		sched_privileged_task(current);
+#endif
 	}
 	regs->eflags = (regs->eflags &~ X86_EFLAGS_IOPL) | (level << 12);
 	return 0;
Index: linux/drivers/block/loop.c
===================================================================
--- linux.orig/drivers/block/loop.c
+++ linux/drivers/block/loop.c
@@ -577,7 +577,12 @@ static int loop_thread(void *data)
 	struct loop_device *lo = data;
 	struct bio *bio;
 
	set_user_nice(current, -20);
+
+	/*
+	 * The loop thread is important enough to be given a boost:
+	 */
+	sched_privileged_task(current);
 
 	while (!kthread_should_stop() || lo->lo_bio) {
 
Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h
+++ linux/include/linux/sched.h
@@ -1336,6 +1336,13 @@ static inline int rt_mutex_getprio(struc
 #endif
 
 extern void set_user_nice(struct task_struct *p, long nice);
+/*
+ * Task has special privileges, give it more CPU power:
+ */
+extern void sched_privileged_task(struct task_struct *p);
+
+extern int sysctl_sched_privileged_nice_level;
+
 extern int task_prio(const struct task_struct *p);
 extern int task_nice(const struct task_struct *p);
 extern int can_nice(const struct task_struct *p, const int nice);
Index: linux/kernel/Kconfig.preempt
===================================================================
--- linux.orig/kernel/Kconfig.preempt
+++ linux/kernel/Kconfig.preempt
@@ -63,3 +63,20 @@ config PREEMPT_BKL
 	  Say Y here if you are building a kernel for a desktop system.
 	  Say N if you are unsure.
 
+config BOOST_PRIVILEGED_TASKS
+	bool "Boost privileged tasks"
+	default y
+	help
+	  This option instructs the kernel to guarantee more CPU time to
+	  some privileged tasks (like X), which is useful if you want to have a
+	  faster desktop even under high system load.
+
+	  This option works by automatically boosting task's priority via
+	  renicing it. NOTE: CFS does not suffer from "overscheduling"
+	  problems when some tasks are reniced, so if this is a
+	  predominantly desktop box it makes sense to select this
+	  option.
+
+	  Say Y here if you are building a kernel for a desktop system.
+	  Say N if you want X to be treated as a normal task.
+
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -3608,6 +3608,53 @@ out_unlock:
 EXPORT_SYMBOL(set_user_nice);
 
 /*
+ * Nice level for privileged tasks. (can be set to 0 for this
+ * to be turned off)
+ */
+int sysctl_sched_privileged_nice_level __read_mostly = -10;
+
+static int __init privileged_nice_level_setup(char *str)
+{
+	sysctl_sched_privileged_nice_level = simple_strtol(str, NULL, 0);
+	return 1;
+}
+__setup("privileged_nice_level=", privileged_nice_level_setup);
+
+/*
+ * Tasks with special privileges call this and gain extra nice
+ * levels:
+ */
+void sched_privileged_task(struct task_struct *p)
+{
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+	long new_nice = sysctl_sched_privileged_nice_level;
+	long old_nice = TASK_NICE(p);
+
+	if (new_nice >= old_nice)
+		return;
+	/*
+	 * Setting the sysctl to 0 turns off the boosting:
+	 */
+	if (unlikely(!new_nice))
+		return;
+
+	if (new_nice < -20)
+		new_nice = -20;
+	else if (new_nice > 19)
+		new_nice = 19;
+
+	set_user_nice(p, new_nice);
+
+	/* Set real-time policy */
+	if (!task_has_rt_policy(p)) {
+		sched_setscheduler(p, SCHED_FIFO, &param);
+		p->ioprio = (IOPRIO_CLASS_RT << IOPRIO_CLASS_SHIFT) | 4;
+	}
+}
+
+EXPORT_SYMBOL(sched_privileged_task);
+
+/*
  * can_nice - check if a task can reduce its nice value
  * @p: task
  * @nice: nice value
Index: linux/kernel/workqueue.c
===================================================================
--- linux.orig/kernel/workqueue.c
+++ linux/kernel/workqueue.c
@@ -285,7 +285,8 @@ static int worker_thread(void *__cwq)
 	if (cwq->wq->freezeable)
 		set_freezable();
 
 	set_user_nice(current, -5);
+	sched_privileged_task(current);
 
 	for (;;) {
 		prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
Index: linux/mm/oom_kill.c
===================================================================
--- linux.orig/mm/oom_kill.c
+++ linux/mm/oom_kill.c
@@ -295,7 +295,9 @@ static void __oom_kill_task(struct task_
 	 * all the memory it needs. That way it should be able to
 	 * exit() and clear out its resources quickly...
 	 */
-	p->time_slice = HZ;
+	if (p->policy == SCHED_NORMAL || p->policy == SCHED_BATCH)
+		sched_privileged_task(p);
+
 	set_tsk_thread_flag(p, TIF_MEMDIE);
 
 	force_sig(SIGKILL, p);
Index: linux/fs/jbd/journal.c
===================================================================
--- linux.orig/fs/jbd/journal.c
+++ linux/fs/jbd/journal.c
@@ -131,6 +132,8 @@ static int kjournald(void *arg)
 	printk(KERN_INFO "kjournald starting.  Commit interval %ld seconds\n",
 			journal->j_commit_interval / HZ);
 
+	sched_privileged_task(current);
+
 	/*
 	 * And now, wait forever for commit wakeup events.
 	 */

[-- Attachment #8: sched-cfs-tunables-2.6.23.patch --]
[-- Type: text/x-diff, Size: 7282 bytes --]

Index: linux-2.6.23-cfs/init/Kconfig
===================================================================
--- linux-2.6.23-cfs.orig/init/Kconfig
+++ linux-2.6.23-cfs/init/Kconfig
@@ -1,3 +1,5 @@
+source "init/Kconfig.cfs"
+
 config DEFCONFIG_LIST
 	string
 	depends on !UML
Index: linux-2.6.21-cfs/init/Kconfig.cfs
===================================================================
--- linux-2.6.23-cfs.orig/init/Kconfig.cfs
+++ linux-2.6.23-cfs/init/Kconfig.cfs
@@ -0,0 +1,130 @@
+menu "Completely Fair Scheduler Tunables"
+
+choice
+	prompt "CFS predefined setups"
+	default INTERACTIVE_DESKTOP
+
+config FAIR_DESKTOP
+	bool "Fair Desktop/Server"
+	help
+	  Fair Desktop.
+	  Use this option if you want a stable and fair desktop.
+
+	  Privileged tasks won't be reniced and "preemption latency" won't be
+	  modified.
+
+config INTERACTIVE_DESKTOP
+	bool "Interactive Desktop (Recommended)"
+	select BOOST_PRIVILEGED_TASKS
+	help
+	  Interactive Desktop.
+	  Use this option if you want a interactive desktop.
+
+	  Privileged tasks will be reniced to -10 value and "preemption latency"
+	  will be decreased in 0.5 msec.
+
+config HIGHLY_INTERACTIVE_DESKTOP	
+	bool "Highly Interactive Desktop"
+	select BOOST_PRIVILEGED_TASKS
+	help
+	  Highly Interactive Desktop.
+	  Use this option if you want a very high interactive desktop.
+
+	  Privileged tasks will be reniced to -19 value and "preemption latency"
+	  will be decreased in 1 msec.
+
+	  This option is not recommended, UNLESS you have really high latencies.
+
+config CUSTOM_SCHED_SETUP
+	bool "Custom scheduler Setup"
+	select BOOST_PRIVILEGED_TASKS
+	help
+	  Custom setup.
+	  Manual setup of "Completely Fair Scheduler" by the user.
+
+endchoice
+
+config CUSTOM_PRIVILEGED_TASKS_NICE_VALUE
+	int "Custom nice value for privileged tasks"
+	depends CUSTOM_SCHED_SETUP
+	range -20 20
+	default -10
+	help
+	  Privileged tasks default nice value.
+
+config CUSTOM_SCHED_LATENCY
+	int "Custom targeted preemption latency"
+	depends CUSTOM_SCHED_SETUP
+	range 0 100000
+	default 20000
+	help
+	  Targeted preemption latency value (in microseconds).
+
+config CUSTOM_SCHED_MIN_GRANULARITY
+	int "Custom minimal preemption granularity"
+	depends CUSTOM_SCHED_SETUP
+	range 0 10000
+	default 2000
+	help
+	  Minimal targeted preemption latency value (in microseconds).
+
+config CUSTOM_SCHED_WAKEUP_GRANULARITY
+	int "Custom SCHED_OTHER wakeup granularity"
+	depends CUSTOM_SCHED_SETUP
+	range 0 100000
+	default 1000
+	help
+	  SCHED_OTHER wakeup granularity value (in microseconds).
+
+config CUSTOM_SCHED_BATCH_WAKEUP_GRANULARITY
+	int "Custom SCHED_BATCH wakeup granularity"
+	depends CUSTOM_SCHED_SETUP
+	range 0 100000
+	default 25000
+	help
+	  SCHED_BATCH wakeup granularity value (in microseconds).
+
+config SYSCTL_PRIVILEGED_NICE_LEVEL
+	bool "Change privileged tasks nice level through sysctl"
+	default n
+	help
+	  If this option is enabled, a file called "sched_privileged_nice_level" will be created
+	  on /proc/sys/kernel that will allow to modify the privileged tasks priority.
+
+	  This *ONLY* will take effect on tasks that are executed after the change.
+
+endmenu
+
+config PRIVILEGED_TASKS_NICE_LEVEL
+	int
+	default 0 if FAIR_DESKTOP
+	default -10 if INTERACTIVE_DESKTOP
+	default -19 if HIGHLY_INTERACTIVE_DESKTOP
+	default CUSTOM_PRIVILEGED_TASKS_NICE_VALUE if CUSTOM_SCHED_SETUP
+
+config SCHED_LATENCY
+	int
+	default 20000 if FAIR_DESKTOP
+	default 15000 if INTERACTIVE_DESKTOP
+	default 10000 if HIGHLY_INTERACTIVE_DESKTOP
+	default CUSTOM_SCHED_LATENCY if CUSTOM_SCHED_SETUP
+
+config SCHED_MIN_GRANULARITY
+	int
+	default 2000 if FAIR_DESKTOP
+	default 1500 if INTERACTIVE_DESKTOP
+	default 1000 if HIGHLY_INTERACTIVE_DESKTOP
+	default CUSTOM_SCHED_MIN_GRANULARITY if CUSTOM_SCHED_SETUP
+
+config SCHED_WAKEUP_GRANULARITY
+	int
+	default 1000 if FAIR_DESKTOP
+	default 500 if INTERACTIVE_DESKTOP
+	default 100 if HIGHLY_INTERACTIVE_DESKTOP
+	default CUSTOM_SCHED_WAKEUP_GRANULARITY if CUSTOM_SCHED_SETUP
+
+config SCHED_BATCH_WAKEUP_GRANULARITY
+	int
+	default 25000 if FAIR_DESKTOP
+	default 20000 if INTERACTIVE_DESKTOP
+	default 15000 if HIGHLY_INTERACTIVE_DESKTOP
+	default CUSTOM_SCHED_BATCH_WAKEUP_GRANULARITY if CUSTOM_SCHED_SETUP
Index: linux-2.6.23-cfs/kernel/sched.c
===================================================================
--- linux-2.6.23-cfs.orig/kernel/sched.c
+++ linux-2.6.23-cfs/kernel/sched.c
@@ -3326,7 +3326,8 @@
  * Nice level for privileged tasks. (can be set to 0 for this
  * to be turned off)
  */
-int sysctl_sched_privileged_nice_level __read_mostly = -10;
+int sysctl_sched_privileged_nice_level	__read_mostly =
+					CONFIG_PRIVILEGED_TASKS_NICE_LEVEL;
 
 static int __init privileged_nice_level_setup(char *str)
 {
Index: linux-2.6.23-cfs/kernel/sched_fair.c
===================================================================
--- linux-2.6.23-cfs.orig/kernel/sched_fair.c
+++ linux-2.6.23-cfs/kernel/sched_fair.c
@@ -34,13 +34,15 @@
  * systems, 4x on 8-way systems, 5x on 16-way systems, etc.)
  * Targeted preemption latency for CPU-bound tasks:
  */
-unsigned int sysctl_sched_latency __read_mostly = 20000000ULL;
+unsigned int sysctl_sched_latency __read_mostly =
+				CONFIG_SCHED_LATENCY * 1000ULL;
 
 /*
  * Minimal preemption granularity for CPU-bound tasks:
  * (default: 2 msec, units: nanoseconds)
  */
-unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL;
+unsigned int sysctl_sched_min_granularity __read_mostly =
+				CONFIG_SCHED_MIN_GRANULARITY * 1000ULL;
 
 /*
  * sys_sched_yield() compat mode
@@ -58,7 +60,8 @@
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
-unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = 25000000UL;
+unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly =
+				CONFIG_SCHED_BATCH_WAKEUP_GRANULARITY * 1000UL;
 
 /*
  * SCHED_OTHER wake-up granularity.
@@ -68,7 +71,8 @@
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
-unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000UL;
+unsigned int sysctl_sched_wakeup_granularity __read_mostly =
+				CONFIG_SCHED_WAKEUP_GRANULARITY * 1000UL;
 
 unsigned int sysctl_sched_stat_granularity __read_mostly;
 
Index: linux-2.6.23-cfs/kernel/sysctl.c
===================================================================
--- linux-2.6.23-cfs.orig/kernel/sysctl.c
+++ linux-2.6.23-cfs/kernel/sysctl.c
@@ -123,5 +123,9 @@
 #ifdef CONFIG_RT_MUTEXES
 extern int max_lock_depth;
 #endif
+
+#ifdef CONFIG_SYSCTL_PRIVILEGED_TASKS_NICE_LEVEL
+extern int sysctl_sched_privileged_nice_level;
+#endif
 
 #ifdef CONFIG_SYSCTL_SYSCALL
@@ -594,6 +598,17 @@
 		.mode		= 0444,
 		.proc_handler	= &proc_dointvec,
 	},
+#if defined(CONFIG_SYSCTL_PRIVILEGED_TASKS_NICE_LEVEL)
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname       = "sched_privileged_nice_level",
+		.data           = &sysctl_sched_privileged_nice_level,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = &proc_dointvec_minmax,
+		.strategy       = &sysctl_intvec,
+	},
+#endif
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 	{
 		.ctl_name       = KERN_UNKNOWN_NMI_PANIC,

  reply	other threads:[~2008-03-11 14:34 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-03-10 14:49 Kernel Linux 2.6.23.16 hangs when run updatedb Renato S. Yamane
2008-03-11  8:11 ` Thomas Gleixner
2008-03-11 14:33   ` Renato S. Yamane [this message]
2008-03-11 15:03     ` Thomas Gleixner
  -- strict thread matches above, loose matches on Subject: below --
2008-03-07 20:44 devzero
2008-03-10 11:22 ` Kernel " Renato S. Yamane
2008-03-07 16:04 Renato S. Yamane

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=47D6984B.5040108@mandic.com.br \
    --to=renatoyamane@mandic.com.br \
    --cc=alan-jenkins@tuffmail.co.uk \
    --cc=devzero@web.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.