All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hirofumi Nakagawa <hnakagawa@miraclelinux.com>
To: linux-kernel@vger.kernel.org
Cc: akpm@linux-foundation.org
Subject: [RFC][PATCH 1/1] MAZE: Mazed processes monitor
Date: Tue, 13 May 2008 20:47:32 +0900	[thread overview]
Message-ID: <48297FD4.1060508@miraclelinux.com> (raw)

This patch for linux-2.6.25-mm1 that is tested on x86_64 hardware.

Signed-off-by: Hirofumi Nakagawa <hnakagawa@miraclelinux.com>
---
 include/linux/maze.h  |   92 +++++++++
 include/linux/sched.h |    3
 init/Kconfig          |   10 +
 init/main.c           |    2
 kernel/Makefile       |    1
 kernel/exit.c         |    2
 kernel/fork.c         |    2
 kernel/maze.c         |  466 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched.c        |    3
 9 files changed, 581 insertions(+)

--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.25-mm1-maze/include/linux/maze.h	2008-05-13 17:52:23.000000000 +0900
@@ -0,0 +1,92 @@
+/*
+ *  Copyright (C) 2007-2008 MIRACLE LINUX Corp.
+ *
+ *  Written by Hirofumi Nakagawa <hnakagawa@miraclelinux.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ */
+#ifndef _LINUX_MAZE_H
+#define _LINUX_MAZE_H
+
+#ifdef __KERNEL__
+
+#include <linux/sched.h>
+#include <linux/list.h>
+
+struct maze_context {
+	struct task_struct *task;
+	int pid;
+
+	/* The stime of task when the maze count was reseted */
+	cputime_t reset_stime;
+	/* The utime of task when the maze count was reseted */
+	cputime_t reset_utime;
+
+	/* The soft limit of maze count */
+	unsigned long soft_limit;
+	/* THe hard limit of maze count */
+	unsigned long hard_limit;
+
+	/* The send signal when the maze count reach soft limit */
+	int soft_signal;
+	/* The send signal when the maze count reach hard limit */
+	int hard_signal;
+
+#define MAZE_SENT_SOFT_SIG  1
+#define MAZE_SENT_HARD_SIG  2
+	/* The flags of sent signal */
+	unsigned long flags;
+
+	/* This value is 1, if maze_context is enqueueing */
+	int enqueue;
+	/* This value is cpu number of enqueueing */
+	int enqueue_cpu;
+
+	/* This value is 1, if preempt_notifier is registered */
+	atomic_t registered_notifier;
+
+	struct preempt_notifier notifier;
+
+	/* This list_head is linked from the maze_queue */
+	struct list_head queue;
+	/* This list_head is linked from the maze_list */
+	struct list_head list;
+};
+
+#ifdef CONFIG_MAZE
+
+/* This function is called start_kernel() */
+extern void maze_init_early(void);
+
+/* This function is called do_exit() */
+extern void maze_exit(struct task_struct *task);
+
+/* This function is called copy_process() */
+extern void maze_fork(struct task_struct *task);
+
+/* This function is called sys_sched_yield() */
+extern void maze_sched_yield(struct task_struct *task);
+
+#else  /* !CONFIG_MAZE */
+
+static inline void maze_init_early(void)
+{
+}
+
+static inline maze_exit(struct task_struct *task)
+{
+}
+
+static inline void maze_fork(struct task_struct *task)
+{
+}
+
+static inline void maze_sched_yield(struct task_struct *task)
+{
+}
+#endif /* CONFIG_MAZE */
+
+#endif /* __KERNEL__    */
+#endif /* _LINUX_MAZE_H */
--- linux-2.6.25-mm1-maze.orig/include/linux/sched.h	2008-05-13 17:40:43.000000000 +0900
+++ linux-2.6.25-mm1-maze/include/linux/sched.h	2008-05-13 17:52:23.000000000 +0900
@@ -1254,6 +1254,9 @@ struct task_struct {
 	/* cg_list protected by css_set_lock and tsk->alloc_lock */
 	struct list_head cg_list;
 #endif
+#ifdef CONFIG_MAZE
+	struct maze_context *maze_context;
+#endif
 #ifdef CONFIG_FUTEX
 	struct robust_list_head __user *robust_list;
 #ifdef CONFIG_COMPAT
--- linux-2.6.25-mm1-maze.orig/init/Kconfig	2008-05-13 17:40:43.000000000 +0900
+++ linux-2.6.25-mm1-maze/init/Kconfig	2008-05-13 17:52:23.000000000 +0900
@@ -371,6 +371,16 @@ config RESOURCE_COUNTERS
           infrastructure that works with cgroups
 	depends on CGROUPS

+config MAZE
+	bool "MAZE monitor support"
+	select PREEMPT_NOTIFIERS
+	help
+	  MAZE is a function to monitor mazed processes which use excessive CPU cycles.
+	  This is a CGL (Carrier Grade Linux) requirement (AVL.14.0). MAZE detects such
+	  processes and sends specified signals to the processes for their termination.
+
+	  Say N if unsure.
+
 config MM_OWNER
 	bool

--- linux-2.6.25-mm1-maze.orig/init/main.c	2008-05-13 17:40:43.000000000 +0900
+++ linux-2.6.25-mm1-maze/init/main.c	2008-05-13 17:52:23.000000000 +0900
@@ -61,6 +61,7 @@
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/idr.h>
+#include <linux/maze.h>

 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -543,6 +544,7 @@ asmlinkage void __init start_kernel(void
 	boot_init_stack_canary();
 	debug_objects_early_init();
 	cgroup_init_early();
+	maze_init_early();

 	local_irq_disable();
 	early_boot_irqs_off();
--- linux-2.6.25-mm1-maze.orig/kernel/Makefile	2008-05-13 17:40:43.000000000 +0900
+++ linux-2.6.25-mm1-maze/kernel/Makefile	2008-05-13 17:52:23.000000000 +0900
@@ -80,6 +80,7 @@ obj-$(CONFIG_MARKERS) += marker.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
 obj-$(CONFIG_FTRACE) += trace/
 obj-$(CONFIG_TRACING) += trace/
+obj-$(CONFIG_MAZE) += maze.o

 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
--- linux-2.6.25-mm1-maze.orig/kernel/exit.c	2008-05-13 17:40:43.000000000 +0900
+++ linux-2.6.25-mm1-maze/kernel/exit.c	2008-05-13 17:52:23.000000000 +0900
@@ -44,6 +44,7 @@
 #include <linux/resource.h>
 #include <linux/blkdev.h>
 #include <linux/task_io_accounting_ops.h>
+#include <linux/maze.h>

 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -1058,6 +1059,7 @@ NORET_TYPE void do_exit(long code)
 	__exit_fs(tsk);
 	check_stack_usage();
 	exit_thread();
+	maze_exit(tsk);
 	cgroup_exit(tsk, 1);
 	exit_keys(tsk);

--- linux-2.6.25-mm1-maze.orig/kernel/fork.c	2008-05-13 17:40:43.000000000 +0900
+++ linux-2.6.25-mm1-maze/kernel/fork.c	2008-05-13 17:52:23.000000000 +0900
@@ -53,6 +53,7 @@
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
+#include <linux/maze.h>

 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1363,6 +1364,7 @@ static struct task_struct *copy_process(
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
+	maze_fork(p);
 	return p;

 bad_fork_free_pid:
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.25-mm1-maze/kernel/maze.c	2008-05-13 20:07:55.000000000 +0900
@@ -0,0 +1,466 @@
+/*
+ *  Copyright (C) 2007-2008 MIRACLE LINUX Corp.
+ *
+ *  Written by Hirofumi Nakagawa <hnakagawa@miraclelinux.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <linux/capability.h>
+#include <linux/maze.h>
+
+#define ERRPRINT(str, ...) printk("ERROR [%s] " str, __FUNCTION__, ##__VA_ARGS__);
+
+/* The root directory of procfs*/
+static struct proc_dir_entry *maze_proc_dir;
+
+static int maze_initialized;
+
+static struct list_head maze_list;
+static spinlock_t maze_list_lock;
+
+static int maze_timer_interval = 10;
+
+DEFINE_PER_CPU(struct list_head, maze_queue);
+DEFINE_PER_CPU(spinlock_t, maze_queue_lock);
+
+DEFINE_PER_CPU(struct timer_list, maze_timer);
+
+static inline void reset_maze_count(struct maze_context *context)
+{
+	context->reset_utime = context->task->utime;
+	context->reset_stime = context->task->stime;
+	context->flags = 0;
+}
+
+static inline cputime_t get_maze_count(struct maze_context *context)
+{
+	return (context->task->utime - context->reset_utime) +
+		(context->task->stime - context->reset_stime);
+}
+
+/*
+ * This function is called start_kernel()
+ */
+void maze_init_early(void)
+{
+	init_task.maze_context = NULL;
+}
+
+/*
+ * This function is called do_exit()
+ */
+void maze_exit(struct task_struct *task)
+{
+	unsigned long flags;
+
+	task_lock(task);
+	if (task->maze_context) {
+		spin_lock_irqsave(&per_cpu(maze_queue_lock,
+								   task->maze_context->enqueue_cpu), flags);
+		if (task->maze_context->enqueue)
+			list_del(&task->maze_context->queue);
+		spin_unlock_irqrestore(&per_cpu(maze_queue_lock,
+										task->maze_context->enqueue_cpu), flags);
+
+		spin_lock(&maze_list_lock);
+		list_del(&task->maze_context->list);
+		spin_unlock(&maze_list_lock);
+
+		kfree(task->maze_context);
+		task->maze_context = NULL;
+	}
+	task_unlock(task);
+}
+
+static inline void copy_limit_and_signal(struct maze_context *to,
+										 struct maze_context *from)
+{
+	to->soft_limit = from->soft_limit;
+	to->hard_limit = from->hard_limit;
+	to->soft_signal = from->soft_signal;
+	to->hard_signal = from->hard_signal;
+}
+
+/*
+ * Must be called under task_lock().
+ */
+static inline void enqueue(struct maze_context *context)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&__get_cpu_var(maze_queue_lock), flags);
+	if (!context->enqueue) {
+		list_add(&context->queue, &__get_cpu_var(maze_queue));
+		context->enqueue_cpu = smp_processor_id();
+		context->enqueue = 1;
+	}
+	spin_unlock_irqrestore(&__get_cpu_var(maze_queue_lock), flags);
+}
+
+static void sched_in_event(struct preempt_notifier *notifier, int cpu)
+{
+	task_lock(current);
+	if (current->maze_context) {
+		if (current->state != TASK_RUNNING)
+			reset_maze_count(current->maze_context);
+		enqueue(current->maze_context);
+	}
+	task_unlock(current);
+}
+
+static void sched_out_event(struct preempt_notifier *notifier,
+							struct task_struct *next)
+{
+	task_lock(next);
+	if (next->maze_context)
+		enqueue(next->maze_context);
+	task_unlock(next);
+}
+
+static struct preempt_ops preempt_ops = {
+	.sched_in = sched_in_event,
+	.sched_out = sched_out_event,
+};
+
+/*
+ * This function is called copy_process()
+ */
+void maze_fork(struct task_struct *task)
+{
+	task->maze_context = NULL;
+
+	if (!current->maze_context)
+		return;
+
+	task_lock(task);
+
+	task->maze_context = kzalloc(sizeof(struct maze_context), GFP_ATOMIC);
+	if (unlikely(!task->maze_context)) {
+		ERRPRINT("fail to alloc maze_context.\n");
+		goto task_unlock;
+	}
+	task->maze_context->task = task;
+	task->maze_context->pid = task->pid;
+
+	copy_limit_and_signal(task->maze_context, current->maze_context);
+	preempt_notifier_init(&task->maze_context->notifier, &preempt_ops);
+
+	spin_lock(&maze_list_lock);
+	list_add(&task->maze_context->list, &maze_list);
+	spin_unlock(&maze_list_lock);
+
+task_unlock:
+	task_unlock(task);
+}
+
+/*
+ * This function is called sys_sched_yield()
+ */
+void maze_sched_yield(struct task_struct *task)
+{
+	if (task->maze_context)
+		reset_maze_count(task->maze_context);
+}
+
+static void soft_limit_event(struct task_struct *task)
+{
+	send_sig(task->maze_context->soft_signal, task, 1);
+	task->maze_context->flags |= MAZE_SENT_SOFT_SIG;
+}
+
+static void hard_limit_event(struct task_struct *task)
+{
+	send_sig(task->maze_context->hard_signal, task, 1);
+	task->maze_context->flags |= MAZE_SENT_HARD_SIG;
+}
+
+static inline void set_preempt_notifir(struct maze_context *context)
+{
+	if (!atomic_xchg(&context->registered_notifier, 1))
+		preempt_notifier_register(&context->notifier);
+}
+
+static int add_maze_context(struct maze_context *context)
+{
+	struct task_struct *task;
+	int ret = 0;
+
+	rcu_read_lock();
+
+	task = find_task_by_pid(context->pid);
+	if (unlikely(!task))
+		goto read_unlock;
+
+	task_lock(task);
+
+	if (!task->maze_context) {
+		task->maze_context = context;
+		context->task = task;
+
+		spin_lock(&maze_list_lock);
+		list_add(&context->list, &maze_list);
+		spin_unlock(&maze_list_lock);
+
+		reset_maze_count(context);
+
+		preempt_notifier_init(&context->notifier, &preempt_ops);
+		if (current == task)
+		   set_preempt_notifir(context);
+	} else {
+		copy_limit_and_signal(task->maze_context, context);
+		ret = 1;
+	}
+
+	task_unlock(task);
+read_unlock:
+	rcu_read_unlock();
+	return ret;
+}
+
+static int build_maze_context(const char *read_line, struct maze_context *context)
+{
+	unsigned long soft_limit, hard_limit;
+	int soft_signal, hard_signal;
+	int pid, res;
+
+	res = sscanf(read_line, "%d %ld %ld %d %d", &pid, &soft_limit,
+				 &hard_limit, &soft_signal, &hard_signal);
+
+	if (res != 5 || pid < 0 ||
+		soft_limit < 0 || hard_limit < 0 ||
+		soft_signal < 0 || hard_signal < 0)
+		return -EINVAL;
+
+	context->pid = pid;
+	context->soft_limit = soft_limit;
+	context->hard_limit = hard_limit;
+	context->soft_signal = soft_signal;
+	context->hard_signal = hard_signal;
+
+	return 0;
+}
+
+static void timer_handler(unsigned long data);
+
+/*
+ * Setup softirq timer
+ */
+static void continue_timer(int cpu)
+{
+	struct timer_list *t;
+
+	t = &per_cpu(maze_timer, cpu);
+	t->function = timer_handler;
+	t->expires = jiffies + maze_timer_interval;
+	init_timer(t);
+	add_timer_on(t, cpu);
+}
+
+static inline void check_limit(struct maze_context *context)
+{
+	cputime_t t = get_maze_count(context);
+
+	if (!(context->flags & MAZE_SENT_SOFT_SIG)) {
+		if (t >= context->soft_limit)
+			soft_limit_event(context->task);
+	} else if (!(context->flags & MAZE_SENT_HARD_SIG)) {
+		if (t >= context->hard_limit)
+			hard_limit_event(context->task);
+	}
+}
+
+static void timer_handler(unsigned long data)
+{
+	struct maze_context *context, *next;
+
+	spin_lock(&__get_cpu_var(maze_queue_lock));
+
+	if (current->maze_context) {
+		set_preempt_notifir(current->maze_context);
+		if (!current->maze_context->enqueue)
+			check_limit(current->maze_context);
+	}
+
+	if (!list_empty(&__get_cpu_var(maze_queue))) {
+		list_for_each_entry_safe (context, next,
+								  &__get_cpu_var(maze_queue), queue) {
+			check_limit(context);
+			list_del(&context->queue);
+			context->enqueue = 0;
+		}
+	}
+
+	spin_unlock(&__get_cpu_var(maze_queue_lock));
+
+	continue_timer(smp_processor_id());
+}
+
+
+static int maze_entries_file_show(struct seq_file *seq, void *nouse)
+{
+	struct maze_context *context;
+
+	spin_lock(&maze_list_lock);
+
+	list_for_each_entry(context, &maze_list, list) {
+		seq_printf(seq, "pid:%5d   ", context->pid);
+		seq_printf(seq, "count:%6ld  ", get_maze_count(context));
+		seq_printf(seq, "soft limit:%6ld  ", context->soft_limit);
+		seq_printf(seq, "hard limit:%6ld  ", context->hard_limit);
+		seq_printf(seq, "soft signal:%2d  ", context->soft_signal);
+		seq_printf(seq, "hard signal:%2d  ", context->hard_signal);
+		seq_printf(seq, "\n");
+	}
+
+	spin_unlock(&maze_list_lock);
+
+	return 0;
+}
+
+/*
+ * Open operation of /proc/maze/entries
+ */
+static int maze_entries_file_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, maze_entries_file_show, NULL);
+}
+
+/*
+ * Release operation of /proc/maze/entries
+ */
+static int maze_entries_file_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+/*
+ * Write operation of /proc/maze/entries
+ */
+static ssize_t maze_entries_file_write(struct file *file,
+					const char __user *buffer,
+					size_t count, loff_t *ppos)
+{
+	struct maze_context *context;
+	char read_line[32];
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (count > sizeof(read_line) - 1)
+		return -EINVAL;
+
+	if (copy_from_user(read_line, buffer, count))
+		return -EFAULT;
+
+	read_line[count] = '\0';
+
+	context = kzalloc(sizeof(struct maze_context), GFP_ATOMIC);
+	if (unlikely(!context)) {
+		ERRPRINT("fail to alloc maze_context.\n");
+		return -ENOMEM;
+	}
+
+	ret = build_maze_context(read_line, context);
+	if (ret) {
+		kfree(context);
+		return ret;
+	}
+
+	if (add_maze_context(context))
+		kfree(context);
+
+	return count;
+}
+
+static struct file_operations maze_entries_file_ops = {
+	.owner	 = THIS_MODULE,
+	.open	 = maze_entries_file_open,
+	.read	 = seq_read,
+	.write   = maze_entries_file_write,
+	.llseek	 = seq_lseek,
+	.release = maze_entries_file_release,
+};
+
+/*
+ * Creating /proc/maze/
+ */
+static inline int init_dir(void)
+{
+	maze_proc_dir =
+		create_proc_entry("maze",
+						  S_IFDIR | S_IRUGO | S_IXUGO,
+						  NULL);
+
+	if (!maze_proc_dir)
+		panic("fail to create /proc/maze\n");
+
+	return 0;
+}
+
+
+/*
+ * Creating /proc/maze/entries
+ */
+static inline int init_entries(void)
+{
+	struct proc_dir_entry *p;
+
+	p = create_proc_entry("entries", S_IRUGO, maze_proc_dir);
+	if (p == NULL)
+		panic("fail to create /proc/maze/entries\n");
+
+	p->proc_fops = &maze_entries_file_ops;
+
+	return 0;
+}
+
+/*
+ * Initializing maze procfs
+ */
+static void __init init_proc(void)
+{
+	init_dir();
+	init_entries();
+}
+
+static void __init init_cpu(int cpu)
+{
+	spin_lock_init(&per_cpu(maze_queue_lock, cpu));
+	INIT_LIST_HEAD(&per_cpu(maze_queue, cpu));
+
+	/* Setup softirq timer */
+	continue_timer(cpu);
+}
+
+static int __init maze_init(void)
+{
+	int cpu;
+
+	printk(KERN_INFO "Maze: Initializing\n");
+	maze_initialized = 1;
+	init_proc();
+
+	spin_lock_init(&maze_list_lock);
+	INIT_LIST_HEAD(&maze_list);
+
+	for_each_online_cpu(cpu)
+		init_cpu(cpu);
+
+	return 0;
+}
+late_initcall(maze_init);
--- linux-2.6.25-mm1-maze.orig/kernel/sched.c	2008-05-13 17:40:43.000000000 +0900
+++ linux-2.6.25-mm1-maze/kernel/sched.c	2008-05-13 17:52:23.000000000 +0900
@@ -68,6 +68,7 @@
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
 #include <linux/ftrace.h>
+#include <linux/maze.h>

 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
@@ -5157,6 +5158,8 @@ asmlinkage long sys_sched_yield(void)
 	_raw_spin_unlock(&rq->lock);
 	preempt_enable_no_resched();

+	maze_sched_yield(current);
+
 	schedule();

 	return 0;




             reply	other threads:[~2008-05-13 12:21 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-05-13 11:47 Hirofumi Nakagawa [this message]
2008-05-13 16:04 ` [RFC][PATCH 1/1] MAZE: Mazed processes monitor Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48297FD4.1060508@miraclelinux.com \
    --to=hnakagawa@miraclelinux.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.