public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 2/2 v2] MAZE: Mazed processes monitor
@ 2008-05-22 10:01 Hirofumi Nakagawa
  2008-05-23  1:13 ` KAMEZAWA Hiroyuki
  0 siblings, 1 reply; 3+ messages in thread
From: Hirofumi Nakagawa @ 2008-05-22 10:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, Andi Kleen

This patch for linux-2.6.26-rc2-mm1 that is tested on x86_64 hardware.

Signed-off-by: Hirofumi Nakagawa <hnakagawa@miraclelinux.com>
---
 include/linux/maze.h  |   94 ++++++++
 include/linux/sched.h |    3
 init/Kconfig          |   18 +
 init/main.c           |    2
 kernel/Makefile       |    1
 kernel/exit.c         |    2
 kernel/fork.c         |    2
 kernel/maze.c         |  529 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched.c        |    3
 9 files changed, 654 insertions(+)

--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.26-rc2-mm1/include/linux/maze.h	2008-05-22 10:11:59.000000000 +0900
@@ -0,0 +1,94 @@
+/*
+ *  Copyright (C) 2007-2008 MIRACLE LINUX Corp.
+ *
+ *  Written by Hirofumi Nakagawa <hnakagawa@miraclelinux.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ */
+#ifndef _LINUX_MAZE_H
+#define _LINUX_MAZE_H
+
+#ifdef __KERNEL__
+
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+struct maze_context {
+	struct task_struct *task;
+
+	/* The stime of task when the maze count was reseted */
+	cputime_t reset_stime;
+	/* The utime of task when the maze count was reseted */
+	cputime_t reset_utime;
+
+	/* The soft limit of maze count */
+	unsigned long soft_limit;
+	/* THe hard limit of maze count */
+	unsigned long hard_limit;
+
+	/* The send signal when the maze count reach soft limit */
+	int soft_signal;
+	/* The send signal when the maze count reach hard limit */
+	int hard_signal;
+
+#define MAZE_SENT_SOFT_SIG  1
+#define MAZE_SENT_HARD_SIG  2
+	/* The flags of sent signal */
+	unsigned long flags;
+
+#define MAZE_ENQUEUE 1
+#define MAZE_EXIT    2
+	/* The state of queue */
+	unsigned long state;
+
+	/* This value is 1, if preempt_notifier is registered */
+	atomic_t registered_notifier;
+
+	struct preempt_notifier notifier;
+
+	/* This list_head is linked from the maze_queue */
+	struct list_head queue;
+	/* This list_head is linked from the maze_list */
+	struct list_head list;
+
+	spinlock_t lock;
+};
+
+#ifdef CONFIG_MAZE
+
+/* This function is called start_kernel() */
+extern void maze_init_early(void);
+
+/* This function is called do_exit() */
+extern void maze_exit(struct task_struct *task);
+
+/* This function is called copy_process() */
+extern void maze_fork(struct task_struct *task);
+
+/* This function is called sys_sched_yield() */
+extern void maze_sched_yield(struct task_struct *task);
+
+#else  /* !CONFIG_MAZE */
+
+static inline void maze_init_early(void)
+{
+}
+
+static inline maze_exit(struct task_struct *task)
+{
+}
+
+static inline void maze_fork(struct task_struct *task)
+{
+}
+
+static inline void maze_sched_yield(struct task_struct *task)
+{
+}
+#endif /* CONFIG_MAZE */
+
+#endif /* __KERNEL__    */
+#endif /* _LINUX_MAZE_H */
--- linux-2.6.26-rc2-mm1.orig/include/linux/sched.h	2008-05-22 10:08:46.000000000 +0900
+++ linux-2.6.26-rc2-mm1/include/linux/sched.h	2008-05-22 10:11:59.000000000 +0900
@@ -1280,6 +1280,9 @@ struct task_struct {
 	/* cg_list protected by css_set_lock and tsk->alloc_lock */
 	struct list_head cg_list;
 #endif
+#ifdef CONFIG_MAZE
+	struct maze_context *maze_context;
+#endif
 #ifdef CONFIG_FUTEX
 	struct robust_list_head __user *robust_list;
 #ifdef CONFIG_COMPAT
--- linux-2.6.26-rc2-mm1.orig/init/Kconfig	2008-05-22 10:08:46.000000000 +0900
+++ linux-2.6.26-rc2-mm1/init/Kconfig	2008-05-22 10:11:59.000000000 +0900
@@ -385,6 +385,24 @@ config RESOURCE_COUNTERS
           infrastructure that works with cgroups
 	depends on CGROUPS

+config MAZE
+	bool "MAZE monitor support"
+	select PREEMPT_NOTIFIERS
+	help
+	  MAZE is a function to monitor mazed processes which use excessive CPU cycles.
+	  This is a CGL (Carrier Grade Linux) requirement (AVL.14.0). MAZE detects such
+	  processes and sends specified signals to the processes for their termination.
+
+	  Say N if unsure.
+
+config MAZE_TIMER
+        int  "MAZE monitoring timer interval"
+	depends on MAZE
+        default 100
+	help
+	  This is the interval of MAZE monitoring timer.
+	  It unit is msec.
+
 config MM_OWNER
 	bool

--- linux-2.6.26-rc2-mm1.orig/init/main.c	2008-05-22 10:08:46.000000000 +0900
+++ linux-2.6.26-rc2-mm1/init/main.c	2008-05-22 10:11:59.000000000 +0900
@@ -60,6 +60,7 @@
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/idr.h>
+#include <linux/maze.h>

 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -544,6 +545,7 @@ asmlinkage void __init start_kernel(void
 	lockdep_init();
 	debug_objects_early_init();
 	cgroup_init_early();
+	maze_init_early();

 	local_irq_disable();
 	early_boot_irqs_off();
--- linux-2.6.26-rc2-mm1.orig/kernel/Makefile	2008-05-22 10:08:46.000000000 +0900
+++ linux-2.6.26-rc2-mm1/kernel/Makefile	2008-05-22 10:11:59.000000000 +0900
@@ -77,6 +77,7 @@ obj-$(CONFIG_LATENCYTOP) += latencytop.o
 obj-$(CONFIG_FTRACE) += trace/
 obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
+obj-$(CONFIG_MAZE) += maze.o

 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
--- linux-2.6.26-rc2-mm1.orig/kernel/exit.c	2008-05-22 10:08:46.000000000 +0900
+++ linux-2.6.26-rc2-mm1/kernel/exit.c	2008-05-22 10:11:59.000000000 +0900
@@ -45,6 +45,7 @@
 #include <linux/resource.h>
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
+#include <linux/maze.h>

 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -1042,6 +1043,7 @@ NORET_TYPE void do_exit(long code)
 	exit_fs(tsk);
 	check_stack_usage();
 	exit_thread();
+	maze_exit(tsk);
 	cgroup_exit(tsk, 1);
 	exit_keys(tsk);

--- linux-2.6.26-rc2-mm1.orig/kernel/fork.c	2008-05-22 10:08:46.000000000 +0900
+++ linux-2.6.26-rc2-mm1/kernel/fork.c	2008-05-22 10:11:59.000000000 +0900
@@ -54,6 +54,7 @@
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
+#include <linux/maze.h>

 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1223,6 +1224,7 @@ static struct task_struct *copy_process(
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
+	maze_fork(p);
 	return p;

 bad_fork_free_pid:
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.26-rc2-mm1/kernel/maze.c	2008-05-22 10:23:19.000000000 +0900
@@ -0,0 +1,529 @@
+/*
+ *  Copyright (C) 2007-2008 MIRACLE LINUX Corp.
+ *
+ *  Written by Hirofumi Nakagawa <hnakagawa@miraclelinux.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <linux/capability.h>
+#include <linux/maze.h>
+
+#define ERRPRINT(str, ...) printk(KERN_ERR"[%s] " str, __func__, ##__VA_ARGS__);
+
+/* The root directory of procfs. */
+static struct proc_dir_entry *maze_proc_dir;
+
+/* This list contains all maze_context.
+ * Note, this list is protected maze_list_lock.
+ */
+static LIST_HEAD(maze_list);
+
+/* The protection of maze_list. */
+static DEFINE_SPINLOCK(maze_list_lock);
+
+/* The interval of softirq timer. */
+static int maze_timer_interval;
+
+/* This list is linked scheduling task. */
+static DEFINE_PER_CPU(struct list_head, maze_queue);
+
+/* The timer of check maze-count.  */
+static DEFINE_PER_CPU(struct timer_list, maze_timer);
+
+/*
+ * Calculate maze-count.
+ * The definition of maze-count is keeping TASK_RUNNINT time.
+ */
+static inline cputime_t get_maze_count(struct maze_context *context)
+{
+	return (context->task->utime - context->reset_utime) +
+		(context->task->stime - context->reset_stime);
+}
+
+/*
+ * Reset maze-count.
+ * This function is called when watch task state is not TASK_RUNNING
+ * and it is preempted.
+ */
+static inline void reset_maze_count(struct maze_context *context)
+{
+	context->reset_utime = context->task->utime;
+	context->reset_stime = context->task->stime;
+	context->flags = 0;
+}
+
+/*
+ * Initializ init-process's maze_context.
+ * Note, this function is called start_kernel().
+ */
+void maze_init_early(void)
+{
+	init_task.maze_context = NULL;
+}
+
+/*
+ * Enqueue maze_context to maze_queue.
+ * Note, Must be called under spin_lock of context->lock.
+ */
+static void enqueue(struct maze_context *context)
+{
+	if (!(context->state & MAZE_ENQUEUE)) {
+		list_add(&context->queue, &__get_cpu_var(maze_queue));
+		context->state |= MAZE_ENQUEUE;
+	}
+}
+
+/*
+ * Free maze_context, if monitoring task exited.
+ * Note, this function is called do_exit().
+ */
+void maze_exit(struct task_struct *task)
+{
+	struct maze_context *context;
+	unsigned long flags;
+
+	spin_lock(&maze_list_lock);
+
+	context = task->maze_context;
+	if (context) {
+		spin_lock_irqsave(&context->lock, flags);
+		task->maze_context = NULL;
+
+		if (atomic_xchg(&context->registered_notifier, 1))
+			preempt_notifier_unregister(&context->notifier);
+
+		context->state |= MAZE_EXIT;
+
+		list_del(&context->list);
+		enqueue(context);
+
+		spin_unlock_irqrestore(&context->lock, flags);
+	}
+
+	spin_unlock(&maze_list_lock);
+}
+
+static inline void copy_limit_and_signal(struct maze_context *to,
+			struct maze_context *from)
+{
+	to->soft_limit = from->soft_limit;
+	to->hard_limit = from->hard_limit;
+	to->soft_signal = from->soft_signal;
+	to->hard_signal = from->hard_signal;
+}
+
+/*
+ * Make sched-in task pickup.
+ */
+static void sched_in_event(struct preempt_notifier *notifier, int cpu)
+{
+	struct maze_context *context;
+	unsigned long flags;
+
+	context = current->maze_context;
+	if (context) {
+		spin_lock_irqsave(&context->lock, flags);
+		enqueue(context);
+		spin_unlock_irqrestore(&context->lock, flags);
+	}
+}
+
+/*
+ * Make scheduling task pickup.
+ */
+static void sched_out_event(struct preempt_notifier *notifier,
+			  struct task_struct *next)
+{
+	struct maze_context *context;
+	unsigned long flags;
+
+	context = current->maze_context;
+	if (context) {
+		spin_lock_irqsave(&context->lock, flags);
+		if (current->state != TASK_RUNNING)
+			reset_maze_count(context);
+		spin_unlock_irqrestore(&context->lock, flags);
+	}
+
+	context = next->maze_context;
+	if (context) {
+		spin_lock_irqsave(&context->lock, flags);
+		enqueue(context);
+		spin_unlock_irqrestore(&context->lock, flags);
+	}
+}
+
+static struct preempt_ops preempt_ops = {
+	.sched_in = sched_in_event,
+	.sched_out = sched_out_event,
+};
+
+/*
+ * Copy parent's maze_context to child process,
+ * if monitoring process forked.
+ * Note, this function is called copy_process().
+ */
+void maze_fork(struct task_struct *task)
+{
+	struct maze_context *context;
+
+	task->maze_context = NULL;
+	if (!current->maze_context)
+		return;
+
+	spin_lock(&maze_list_lock);
+	context = kzalloc(sizeof(struct maze_context), GFP_KERNEL);
+	if (unlikely(!context)) {
+		ERRPRINT("fail to alloc maze_context.\n");
+		goto unlock;
+	}
+
+	spin_lock_init(&context->lock);
+
+	task->maze_context = context;
+	context->task = task;
+
+	copy_limit_and_signal(task->maze_context, current->maze_context);
+	preempt_notifier_init(&task->maze_context->notifier, &preempt_ops);
+
+	list_add(&task->maze_context->list, &maze_list);
+
+unlock:
+	spin_unlock(&maze_list_lock);
+}
+
+/*
+ * Reset maze-count.
+ * Note, this function is called sys_sched_yield().
+ */
+void maze_sched_yield(struct task_struct *task)
+{
+	if (task->maze_context)
+		reset_maze_count(task->maze_context);
+}
+
+/*
+ * Setup preempt notifier,if watch task has not setup it.
+ */
+static void set_preempt_notifier(struct maze_context *context)
+{
+	if (!atomic_xchg(&context->registered_notifier, 1))
+		preempt_notifier_register(&context->notifier);
+}
+
+/*
+ * Add monitoring task or copy new monitor state.
+ */
+static int add_maze_context(int pid, struct maze_context *context)
+{
+	struct task_struct *task;
+	unsigned long flags;
+	int ret = 0;
+
+	rcu_read_lock();
+
+	task = find_task_by_vpid(pid);
+	if (unlikely(!task))
+		goto read_unlock;
+
+	spin_lock(&maze_list_lock);
+
+	if (!task->maze_context) {
+		spin_lock_init(&context->lock);
+		spin_lock_irqsave(&context->lock, flags);
+
+		task->maze_context = context;
+		context->task = task;
+
+		list_add(&context->list, &maze_list);
+		reset_maze_count(context);
+
+		preempt_notifier_init(&context->notifier, &preempt_ops);
+		if (current == task)
+			set_preempt_notifier(context);
+
+		spin_unlock_irqrestore(&context->lock, flags);
+	} else {
+		spin_lock_irqsave(&task->maze_context->lock, flags);
+		copy_limit_and_signal(task->maze_context, context);
+		spin_unlock_irqrestore(&task->maze_context->lock, flags);
+		ret = 1;
+	}
+
+	spin_unlock(&maze_list_lock);
+
+read_unlock:
+	rcu_read_unlock();
+	return ret;
+}
+
+/*
+ * Build maze_context from a procfs input
+ */
+static int build_maze_context(const char *read_line,
+		    struct maze_context *context, int *pid)
+{
+	unsigned long soft_limit, hard_limit;
+	int soft_signal, hard_signal;
+	int res;
+
+	res = sscanf(read_line, "%d %ld %ld %d %d", pid, &soft_limit,
+				 &hard_limit, &soft_signal, &hard_signal);
+
+	if (res != 5 || *pid < 0 ||
+		soft_limit < 0 || hard_limit < 0 ||
+		soft_signal < 0 || hard_signal < 0)
+		return -EINVAL;
+
+	context->soft_limit = soft_limit;
+	context->hard_limit = hard_limit;
+	context->soft_signal = soft_signal;
+	context->hard_signal = hard_signal;
+
+	return 0;
+}
+
+static void timer_handler(unsigned long data);
+
+/*
+ * Setup softirq timer.
+ */
+static void continue_timer(int cpu)
+{
+	setup_timer(&per_cpu(maze_timer, cpu),
+				timer_handler,
+				jiffies + maze_timer_interval);
+
+	add_timer_on(&per_cpu(maze_timer, cpu), cpu);
+}
+
+/*
+ * Check maze-count that did't exceed limit.
+ */
+static void check_limit(struct maze_context *context)
+{
+	cputime_t t = get_maze_count(context);
+
+	if (!(context->flags & MAZE_SENT_SOFT_SIG)) {
+		if (t >= context->soft_limit) {
+			/* Send soft-signal */
+			send_sig(context->soft_signal, context->task, 1);
+			context->flags |= MAZE_SENT_SOFT_SIG;
+		}
+	} else if (!(context->flags & MAZE_SENT_HARD_SIG)) {
+		if (t >= context->hard_limit) {
+			/* Send hard-signal */
+			send_sig(context->hard_signal, context->task, 1);
+			context->flags |= MAZE_SENT_HARD_SIG;
+		}
+	}
+}
+
+/*
+ * Watch registed task.
+ * Timer interval is in CONFIG_MAZE_TIMER.
+ */
+static void timer_handler(unsigned long data)
+{
+	struct maze_context *context, *next;
+
+	context = current->maze_context;
+	if (context) {
+		spin_lock(&context->lock);
+		if (!context->state) {
+			set_preempt_notifier(context);
+			check_limit(context);
+		}
+		spin_unlock(&context->lock);
+	}
+
+	if (!list_empty(&__get_cpu_var(maze_queue))) {
+		list_for_each_entry_safe(context, next,
+				    &__get_cpu_var(maze_queue), queue) {
+			spin_lock(&context->lock);
+
+			if (context->state & MAZE_EXIT) {
+				list_del(&context->queue);
+				spin_unlock(&context->lock);
+				kfree(context);
+				continue;
+			}
+
+			check_limit(context);
+			list_del(&context->queue);
+			context->state = 0;
+			spin_unlock(&context->lock);
+		}
+	}
+
+	continue_timer(smp_processor_id());
+}
+
+static int maze_entries_file_show(struct seq_file *seq, void *nouse)
+{
+	struct maze_context *context;
+
+	spin_lock(&maze_list_lock);
+
+	list_for_each_entry(context, &maze_list, list) {
+		seq_printf(seq, "pid:%5d   ", context->task->pid);
+		seq_printf(seq, "count:%6ld  ", get_maze_count(context));
+		seq_printf(seq, "soft-limit:%6ld  ", context->soft_limit);
+		seq_printf(seq, "hard-limit:%6ld  ", context->hard_limit);
+		seq_printf(seq, "soft-signal:%2d  ", context->soft_signal);
+		seq_printf(seq, "hard-signal:%2d  ", context->hard_signal);
+		seq_printf(seq, "\n");
+	}
+
+	spin_unlock(&maze_list_lock);
+
+	return 0;
+}
+
+/*
+ * Open operation of /proc/maze/entries
+ */
+static int maze_entries_file_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, maze_entries_file_show, NULL);
+}
+
+/*
+ * Release operation of /proc/maze/entries
+ */
+static int maze_entries_file_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+/*
+ * Write operation of /proc/maze/entries
+ */
+static ssize_t maze_entries_file_write(struct file *file,
+					const char __user *buffer,
+					size_t count, loff_t *ppos)
+{
+	struct maze_context *context;
+	char read_line[32];
+	int ret, pid;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (count > sizeof(read_line) - 1)
+		return -EINVAL;
+
+	if (copy_from_user(read_line, buffer, count))
+		return -EFAULT;
+
+	read_line[count] = '\0';
+
+	context = kzalloc(sizeof(struct maze_context), GFP_KERNEL);
+	if (unlikely(!context)) {
+		ERRPRINT("fail to alloc maze_context.\n");
+		return -ENOMEM;
+	}
+
+	ret = build_maze_context(read_line, context, &pid);
+	if (ret) {
+		kfree(context);
+		return ret;
+	}
+
+	if (add_maze_context(pid, context))
+		/* Free maze_context, if already added it. */
+		kfree(context);
+
+	return count;
+}
+
+static struct file_operations maze_entries_file_ops = {
+	.owner	 = THIS_MODULE,
+	.open	 = maze_entries_file_open,
+	.read	 = seq_read,
+	.write   = maze_entries_file_write,
+	.llseek	 = seq_lseek,
+	.release = maze_entries_file_release,
+};
+
+/*
+ * Creating /proc/maze/
+ */
+static int init_dir(void)
+{
+	maze_proc_dir =
+		create_proc_entry("maze", S_IFDIR | S_IRUGO | S_IXUGO, NULL);
+
+	if (!maze_proc_dir)
+		panic("fail to create /proc/maze\n");
+
+	return 0;
+}
+
+/*
+ * Creating /proc/maze/entries
+ */
+static int init_entries(void)
+{
+	struct proc_dir_entry *p;
+
+	p = create_proc_entry("entries", S_IRUGO, maze_proc_dir);
+	if (p == NULL)
+		panic("fail to create /proc/maze/entries\n");
+
+	p->proc_fops = &maze_entries_file_ops;
+
+	return 0;
+}
+
+/*
+ * Initializ maze procfs.
+ */
+static void __init init_proc(void)
+{
+	init_dir();
+	init_entries();
+}
+
+/*
+ * Initializ timer and queue
+ */
+static void __init init_cpu(int cpu)
+{
+	INIT_LIST_HEAD(&per_cpu(maze_queue, cpu));
+
+	/* Setup softirq timer */
+	continue_timer(cpu);
+}
+
+static int __init maze_init(void)
+{
+	int cpu;
+
+	printk(KERN_INFO "Maze: Initializing\n");
+	init_proc();
+
+	/* Initializ interval of softirq timer */
+	maze_timer_interval = msecs_to_jiffies(CONFIG_MAZE_TIMER);
+
+	/* Initializ each cpu's timer and queue */
+	for_each_online_cpu(cpu)
+		init_cpu(cpu);
+
+	return 0;
+}
+__initcall(maze_init);
--- linux-2.6.26-rc2-mm1.orig/kernel/sched.c	2008-05-22 10:08:46.000000000 +0900
+++ linux-2.6.26-rc2-mm1/kernel/sched.c	2008-05-22 10:11:59.000000000 +0900
@@ -71,6 +71,7 @@
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
+#include <linux/maze.h>

 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
@@ -5557,6 +5558,8 @@ asmlinkage long sys_sched_yield(void)
 	_raw_spin_unlock(&rq->lock);
 	preempt_enable_no_resched();

+	maze_sched_yield(current);
+
 	schedule();

 	return 0;




^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [RFC][PATCH 2/2 v2] MAZE: Mazed processes monitor
  2008-05-22 10:01 [RFC][PATCH 2/2 v2] MAZE: Mazed processes monitor Hirofumi Nakagawa
@ 2008-05-23  1:13 ` KAMEZAWA Hiroyuki
  2008-05-23 10:30   ` Hirofumi Nakagawa
  0 siblings, 1 reply; 3+ messages in thread
From: KAMEZAWA Hiroyuki @ 2008-05-23  1:13 UTC (permalink / raw)
  To: Hirofumi Nakagawa; +Cc: linux-kernel, akpm, Andi Kleen

Some nitpicks ;)

On Thu, 22 May 2008 19:01:26 +0900
Hirofumi Nakagawa <hnakagawa@miraclelinux.com> wrote:

> +config MAZE
> +	bool "MAZE monitor support"
> +	select PREEMPT_NOTIFIERS
> +	help
> +	  MAZE is a function to monitor mazed processes which use excessive CPU cycles.
> +	  This is a CGL (Carrier Grade Linux) requirement (AVL.14.0). MAZE detects such
> +	  processes and sends specified signals to the processes for their termination.
> +
> +	  Say N if unsure.
> +
I don't have any objections what you name your code as. But If this feature is
from CGL, you should use more generic text in CONFIG's Headline as
"Excessive CPU Cycle Usage Detector".

<snip>

> +#define ERRPRINT(str, ...) printk(KERN_ERR"[%s] " str, __func__, ##__VA_ARGS__);
> +
This kind of macro should be removed (I think). At leaset, remove ";".

> +/* The root directory of procfs. */
> +static struct proc_dir_entry *maze_proc_dir;
> +
> +/* This list contains all maze_context.
> + * Note, this list is protected maze_list_lock.
> + */
> +static LIST_HEAD(maze_list);
> +
> +/* The protection of maze_list. */
> +static DEFINE_SPINLOCK(maze_list_lock);
> +
> +/* The interval of softirq timer. */
> +static int maze_timer_interval;
> +
> +/* This list is linked scheduling task. */
> +static DEFINE_PER_CPU(struct list_head, maze_queue);
> +
> +/* The timer of check maze-count.  */
> +static DEFINE_PER_CPU(struct timer_list, maze_timer);
> +
> +/*
> + * Calculate maze-count.
> + * The definition of maze-count is keeping TASK_RUNNINT time.
> + */
> +static inline cputime_t get_maze_count(struct maze_context *context)
> +{
> +	return (context->task->utime - context->reset_utime) +
> +		(context->task->stime - context->reset_stime);
> +}
> +
> +/*
> + * Reset maze-count.
> + * This function is called when watch task state is not TASK_RUNNING
> + * and it is preempted.
> + */
> +static inline void reset_maze_count(struct maze_context *context)
> +{
> +	context->reset_utime = context->task->utime;
> +	context->reset_stime = context->task->stime;
> +	context->flags = 0;
> +}
> +
> +/*
> + * Initializ init-process's maze_context.
> + * Note, this function is called start_kernel().
> + */
> +void maze_init_early(void)
> +{
> +	init_task.maze_context = NULL;
> +}
> +
> +/*
> + * Enqueue maze_context to maze_queue.
> + * Note, Must be called under spin_lock of context->lock.
> + */
> +static void enqueue(struct maze_context *context)
> +{
> +	if (!(context->state & MAZE_ENQUEUE)) {
> +		list_add(&context->queue, &__get_cpu_var(maze_queue));
> +		context->state |= MAZE_ENQUEUE;
> +	}
> +}
maze_enqueue() ?

> +
> +/*
> + * Free maze_context, if monitoring task exited.
> + * Note, this function is called do_exit().
> + */
> +void maze_exit(struct task_struct *task)
> +{
> +	struct maze_context *context;
> +	unsigned long flags;
> +
> +	spin_lock(&maze_list_lock);
> +
> +	context = task->maze_context;
> +	if (context) {
> +		spin_lock_irqsave(&context->lock, flags);
> +		task->maze_context = NULL;
> +
> +		if (atomic_xchg(&context->registered_notifier, 1))
> +			preempt_notifier_unregister(&context->notifier);
> +
please add comments why this atomic_xchg() is necessary.(and why atomic_t)
context->lock doesn't guard this ?

> +		context->state |= MAZE_EXIT;
> +
> +		list_del(&context->list);
> +		enqueue(context);
> +
> +		spin_unlock_irqrestore(&context->lock, flags);
> +	}
> +
> +	spin_unlock(&maze_list_lock);
> +}
> +
> +static inline void copy_limit_and_signal(struct maze_context *to,
> +			struct maze_context *from)
> +{
> +	to->soft_limit = from->soft_limit;
> +	to->hard_limit = from->hard_limit;
> +	to->soft_signal = from->soft_signal;
> +	to->hard_signal = from->hard_signal;
> +}
> +
> +/*
> + * Make sched-in task pickup.
> + */
> +static void sched_in_event(struct preempt_notifier *notifier, int cpu)
> +{
> +	struct maze_context *context;
> +	unsigned long flags;
> +
> +	context = current->maze_context;
> +	if (context) {
> +		spin_lock_irqsave(&context->lock, flags);
> +		enqueue(context);
> +		spin_unlock_irqrestore(&context->lock, flags);
> +	}
> +}
> +
> +/*
> + * Make scheduling task pickup.
> + */
> +static void sched_out_event(struct preempt_notifier *notifier,
> +			  struct task_struct *next)
> +{
> +	struct maze_context *context;
> +	unsigned long flags;
> +
> +	context = current->maze_context;
> +	if (context) {
> +		spin_lock_irqsave(&context->lock, flags);
> +		if (current->state != TASK_RUNNING)
> +			reset_maze_count(context);
> +		spin_unlock_irqrestore(&context->lock, flags);
> +	}
> +
> +	context = next->maze_context;
> +	if (context) {
> +		spin_lock_irqsave(&context->lock, flags);
> +		enqueue(context);
> +		spin_unlock_irqrestore(&context->lock, flags);
> +	}
> +}
> +
> +static struct preempt_ops preempt_ops = {
> +	.sched_in = sched_in_event,
> +	.sched_out = sched_out_event,
> +};
> +
> +/*
> + * Copy parent's maze_context to child process,
> + * if monitoring process forked.
> + * Note, this function is called copy_process().
> + */

You should explain this behavior in Documentation.


> +void maze_fork(struct task_struct *task)
> +{
> +	struct maze_context *context;
> +
> +	task->maze_context = NULL;
> +	if (!current->maze_context)
> +		return;
> +
> +	spin_lock(&maze_list_lock);
> +	context = kzalloc(sizeof(struct maze_context), GFP_KERNEL);
> +	if (unlikely(!context)) {
> +		ERRPRINT("fail to alloc maze_context.\n");
> +		goto unlock;
> +	}
Calling kzalloc() before spinlock is better.


> +
> +	spin_lock_init(&context->lock);
> +
> +	task->maze_context = context;
> +	context->task = task;
> +
> +	copy_limit_and_signal(task->maze_context, current->maze_context);
> +	preempt_notifier_init(&task->maze_context->notifier, &preempt_ops);
> +
> +	list_add(&task->maze_context->list, &maze_list);
> +
> +unlock:
> +	spin_unlock(&maze_list_lock);
> +}
> +
> +/*
> + * Reset maze-count.
> + * Note, this function is called sys_sched_yield().
> + */
> +void maze_sched_yield(struct task_struct *task)
> +{
> +	if (task->maze_context)
> +		reset_maze_count(task->maze_context);
> +}
> +
> +/*
> + * Setup preempt notifier,if watch task has not setup it.
> + */
> +static void set_preempt_notifier(struct maze_context *context)
> +{
> +	if (!atomic_xchg(&context->registered_notifier, 1))
> +		preempt_notifier_register(&context->notifier);
> +}
> +
> +/*
> + * Add monitoring task or copy new monitor state.
> + */
> +static int add_maze_context(int pid, struct maze_context *context)
> +{
> +	struct task_struct *task;
> +	unsigned long flags;
> +	int ret = 0;
> +
> +	rcu_read_lock();
> +
> +	task = find_task_by_vpid(pid);
> +	if (unlikely(!task))
> +		goto read_unlock;
> +
> +	spin_lock(&maze_list_lock);
> +
> +	if (!task->maze_context) {
> +		spin_lock_init(&context->lock);
> +		spin_lock_irqsave(&context->lock, flags);
> +
> +		task->maze_context = context;
> +		context->task = task;
> +
> +		list_add(&context->list, &maze_list);
> +		reset_maze_count(context);
> +
> +		preempt_notifier_init(&context->notifier, &preempt_ops);
> +		if (current == task)
> +			set_preempt_notifier(context);
> +
> +		spin_unlock_irqrestore(&context->lock, flags);
> +	} else {
> +		spin_lock_irqsave(&task->maze_context->lock, flags);
> +		copy_limit_and_signal(task->maze_context, context);
> +		spin_unlock_irqrestore(&task->maze_context->lock, flags);
> +		ret = 1;
> +	}
> +
> +	spin_unlock(&maze_list_lock);
> +
> +read_unlock:
> +	rcu_read_unlock();
> +	return ret;
> +}
> +
> +/*
> + * Build maze_context from a procfs input
> + */
> +static int build_maze_context(const char *read_line,
> +		    struct maze_context *context, int *pid)
> +{
> +	unsigned long soft_limit, hard_limit;
> +	int soft_signal, hard_signal;
> +	int res;
> +
> +	res = sscanf(read_line, "%d %ld %ld %d %d", pid, &soft_limit,
> +				 &hard_limit, &soft_signal, &hard_signal);
> +
This means that the interface requires " " in correct manner...
I think this kind of interface is not widely used in the kernel.
> +	if (res != 5 || *pid < 0 ||
> +		soft_limit < 0 || hard_limit < 0 ||
> +		soft_signal < 0 || hard_signal < 0)
> +		return -EINVAL;

soft_limit and hard_limit is "unsigned" long.
and soft_signal/hard_signal 's limitation is too big. 0 to NSIG ?


> +
> +	context->soft_limit = soft_limit;
> +	context->hard_limit = hard_limit;
> +	context->soft_signal = soft_signal;
> +	context->hard_signal = hard_signal;
> +
> +	return 0;
> +}
Why 'pid' is necessary as an argument ? How about returning PID ?

> +
> +static void timer_handler(unsigned long data);
> +
> +/*
> + * Setup softirq timer.
> + */
> +static void continue_timer(int cpu)
> +{
> +	setup_timer(&per_cpu(maze_timer, cpu),
> +				timer_handler,
> +				jiffies + maze_timer_interval);
> +
> +	add_timer_on(&per_cpu(maze_timer, cpu), cpu);
> +}
> +
> +/*
> + * Check maze-count that did't exceed limit.
> + */
> +static void check_limit(struct maze_context *context)
> +{
> +	cputime_t t = get_maze_count(context);
> +
> +	if (!(context->flags & MAZE_SENT_SOFT_SIG)) {
> +		if (t >= context->soft_limit) {
> +			/* Send soft-signal */
> +			send_sig(context->soft_signal, context->task, 1);
> +			context->flags |= MAZE_SENT_SOFT_SIG;
> +		}
> +	} else if (!(context->flags & MAZE_SENT_HARD_SIG)) {
> +		if (t >= context->hard_limit) {
> +			/* Send hard-signal */
> +			send_sig(context->hard_signal, context->task, 1);
> +			context->flags |= MAZE_SENT_HARD_SIG;
> +		}
> +	}
> +}
> +
> +/*
> + * Watch registed task.
> + * Timer interval is in CONFIG_MAZE_TIMER.
> + */
> +static void timer_handler(unsigned long data)
> +{
> +	struct maze_context *context, *next;
> +
> +	context = current->maze_context;
> +	if (context) {
> +		spin_lock(&context->lock);
> +		if (!context->state) {
> +			set_preempt_notifier(context);
> +			check_limit(context);
> +		}
> +		spin_unlock(&context->lock);
> +	}
> +
> +	if (!list_empty(&__get_cpu_var(maze_queue))) {
> +		list_for_each_entry_safe(context, next,
> +				    &__get_cpu_var(maze_queue), queue) {
> +			spin_lock(&context->lock);
> +
> +			if (context->state & MAZE_EXIT) {
> +				list_del(&context->queue);
> +				spin_unlock(&context->lock);
> +				kfree(context);
> +				continue;
> +			}
> +
> +			check_limit(context);
> +			list_del(&context->queue);
> +			context->state = 0;
> +			spin_unlock(&context->lock);
> +		}
> +	}
> +
> +	continue_timer(smp_processor_id());
> +}
> +
> +static int maze_entries_file_show(struct seq_file *seq, void *nouse)
> +{
> +	struct maze_context *context;
> +
> +	spin_lock(&maze_list_lock);
> +
> +	list_for_each_entry(context, &maze_list, list) {
> +		seq_printf(seq, "pid:%5d   ", context->task->pid);
> +		seq_printf(seq, "count:%6ld  ", get_maze_count(context));
> +		seq_printf(seq, "soft-limit:%6ld  ", context->soft_limit);
> +		seq_printf(seq, "hard-limit:%6ld  ", context->hard_limit);
> +		seq_printf(seq, "soft-signal:%2d  ", context->soft_signal);
> +		seq_printf(seq, "hard-signal:%2d  ", context->hard_signal);
> +		seq_printf(seq, "\n");
> +	}
> +
> +	spin_unlock(&maze_list_lock);
> +
> +	return 0;
> +}
> +
> +/*
> + * Open operation of /proc/maze/entries
> + */
> +static int maze_entries_file_open(struct inode *inode, struct file *file)
> +{
> +	return single_open(file, maze_entries_file_show, NULL);
> +}
> +
> +/*
> + * Release operation of /proc/maze/entries
> + */
> +static int maze_entries_file_release(struct inode *inode, struct file *file)
> +{
> +	return single_release(inode, file);
> +}
> +
> +/*
> + * Write operation of /proc/maze/entries
> + */
> +static ssize_t maze_entries_file_write(struct file *file,
> +					const char __user *buffer,
> +					size_t count, loff_t *ppos)
> +{
> +	struct maze_context *context;
> +	char read_line[32];
> +	int ret, pid;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	if (count > sizeof(read_line) - 1)
> +		return -EINVAL;
> +
> +	if (copy_from_user(read_line, buffer, count))
> +		return -EFAULT;
> +
> +	read_line[count] = '\0';
> +
> +	context = kzalloc(sizeof(struct maze_context), GFP_KERNEL);
> +	if (unlikely(!context)) {
> +		ERRPRINT("fail to alloc maze_context.\n");
> +		return -ENOMEM;
> +	}
> +
> +	ret = build_maze_context(read_line, context, &pid);
> +	if (ret) {
> +		kfree(context);
> +		return ret;
> +	}
> +
> +	if (add_maze_context(pid, context))
> +		/* Free maze_context, if already added it. */
> +		kfree(context);
> +
> +	return count;
> +}
> +
> +static struct file_operations maze_entries_file_ops = {
> +	.owner	 = THIS_MODULE,
> +	.open	 = maze_entries_file_open,
> +	.read	 = seq_read,
> +	.write   = maze_entries_file_write,
> +	.llseek	 = seq_lseek,
> +	.release = maze_entries_file_release,
> +};
> +
> +/*
> + * Creating /proc/maze/
> + */
> +static int init_dir(void)
> +{
> +	maze_proc_dir =
> +		create_proc_entry("maze", S_IFDIR | S_IRUGO | S_IXUGO, NULL);
> +
> +	if (!maze_proc_dir)
> +		panic("fail to create /proc/maze\n");
> +
> +	return 0;
> +}
> +
> +/*
> + * Creating /proc/maze/entries
> + */
> +static int init_entries(void)
> +{
> +	struct proc_dir_entry *p;
> +
> +	p = create_proc_entry("entries", S_IRUGO, maze_proc_dir);
> +	if (p == NULL)
> +		panic("fail to create /proc/maze/entries\n");
> +
> +	p->proc_fops = &maze_entries_file_ops;
> +
> +	return 0;
> +}
> +
> +/*
> + * Initializ maze procfs.
> + */
> +static void __init init_proc(void)
> +{
> +	init_dir();
> +	init_entries();
> +}
> +
> +/*
> + * Initializ timer and queue
> + */
> +static void __init init_cpu(int cpu)
> +{
> +	INIT_LIST_HEAD(&per_cpu(maze_queue, cpu));
> +
> +	/* Setup softirq timer */
> +	continue_timer(cpu);
> +}
> +
> +static int __init maze_init(void)
> +{
> +	int cpu;
> +
> +	printk(KERN_INFO "Maze: Initializing\n");
> +	init_proc();
> +
> +	/* Initializ interval of softirq timer */
> +	maze_timer_interval = msecs_to_jiffies(CONFIG_MAZE_TIMER);
> +
> +	/* Initializ each cpu's timer and queue */
> +	for_each_online_cpu(cpu)
> +		init_cpu(cpu);
> +
> +	return 0;
> +}

Please add 'cpu hotplug support' to TODO List or
make configs depend on !HOTPLUG_CPU.

Thanks,
-Kame


> +__initcall(maze_init);
> --- linux-2.6.26-rc2-mm1.orig/kernel/sched.c	2008-05-22 10:08:46.000000000 +0900
> +++ linux-2.6.26-rc2-mm1/kernel/sched.c	2008-05-22 10:11:59.000000000 +0900
> @@ -71,6 +71,7 @@
>  #include <linux/debugfs.h>
>  #include <linux/ctype.h>
>  #include <linux/ftrace.h>
> +#include <linux/maze.h>
> 
>  #include <asm/tlb.h>
>  #include <asm/irq_regs.h>
> @@ -5557,6 +5558,8 @@ asmlinkage long sys_sched_yield(void)
>  	_raw_spin_unlock(&rq->lock);
>  	preempt_enable_no_resched();
> 
> +	maze_sched_yield(current);
> +
>  	schedule();
> 
>  	return 0;
> 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [RFC][PATCH 2/2 v2] MAZE: Mazed processes monitor
  2008-05-23  1:13 ` KAMEZAWA Hiroyuki
@ 2008-05-23 10:30   ` Hirofumi Nakagawa
  0 siblings, 0 replies; 3+ messages in thread
From: Hirofumi Nakagawa @ 2008-05-23 10:30 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki; +Cc: linux-kernel, akpm

Thank you for your review.
I'll study your comments.

Thanks.
Hirofumi Nakagawa



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2008-05-23 10:36 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-05-22 10:01 [RFC][PATCH 2/2 v2] MAZE: Mazed processes monitor Hirofumi Nakagawa
2008-05-23  1:13 ` KAMEZAWA Hiroyuki
2008-05-23 10:30   ` Hirofumi Nakagawa

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox