From: Tejun Heo <tj@kernel.org>
To: linux-kernel@vger.kernel.org, rusty@rustcorp.com.au,
sivanich@sgi.com, heiko.carstens@de.ibm.com,
torvalds@linux-foundation.org, mingo@elte.hu,
peterz@infradead.org, dipankar@in.ibm.com, josh@freedesktop.org,
paulmck@linux.vnet.ibm.com, oleg@redhat.com,
akpm@linux-foundation.org
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 1/4] cpuhog: implement cpuhog
Date: Tue, 9 Mar 2010 00:53:20 +0900 [thread overview]
Message-ID: <1268063603-7425-2-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1268063603-7425-1-git-send-email-tj@kernel.org>
Implement a simplistic per-cpu maximum priority cpu hogging mechanism
named cpuhog. A callback can be scheduled to run on one or multiple
cpus with maximum priority monopolozing those cpus. This is primarily
to replace and unify RT workqueue usage in stop_machine and scheduler
migration_thread which currently is serving multiple purposes.
Four functions are provided - hog_one_cpu(), hog_one_cpu_nowait(),
hog_cpus() and try_hog_cpus().
This is to allow clean sharing of resources among stop_cpu and all the
migration thread users. One cpuhog thread per cpu is created which is
currently named "hog/CPU". This will eventually replace the migration
thread and take on its name.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
---
include/linux/cpuhog.h | 24 +++
kernel/Makefile | 2 +-
kernel/cpuhog.c | 362 ++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 387 insertions(+), 1 deletions(-)
create mode 100644 include/linux/cpuhog.h
create mode 100644 kernel/cpuhog.c
diff --git a/include/linux/cpuhog.h b/include/linux/cpuhog.h
new file mode 100644
index 0000000..5252884
--- /dev/null
+++ b/include/linux/cpuhog.h
@@ -0,0 +1,24 @@
+/*
+ * linux/cpuhog.h - CPU hogs to monopolize CPUs
+ *
+ * Copyright (C) 2010 SUSE Linux Products GmbH
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/cpumask.h>
+#include <linux/list.h>
+
+typedef int (*cpuhog_fn_t)(void *arg);
+
+struct cpuhog_work {
+ struct list_head list; /* cpuhog->works */
+ cpuhog_fn_t fn;
+ void *arg;
+ struct cpuhog_done *done;
+};
+
+int hog_one_cpu(unsigned int cpu, cpuhog_fn_t fn, void *arg);
+void hog_one_cpu_nowait(unsigned int cpu, cpuhog_fn_t fn, void *arg,
+ struct cpuhog_work *work_buf);
+int hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg);
+int try_hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg);
diff --git a/kernel/Makefile b/kernel/Makefile
index 864ff75..1f84388 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
- async.o
+ async.o cpuhog.o
obj-y += groups.o
ifdef CONFIG_FUNCTION_TRACER
diff --git a/kernel/cpuhog.c b/kernel/cpuhog.c
new file mode 100644
index 0000000..c25c510
--- /dev/null
+++ b/kernel/cpuhog.c
@@ -0,0 +1,362 @@
+/*
+ * kernel/cpuhog.c - CPU hogs to monopolize CPUs
+ *
+ * Copyright (C) 2010 SUSE Linux Products GmbH
+ * Copyright (C) 2010 Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ *
+ * Simplistic per-cpu maximum priority cpu hogging mechanism. The
+ * caller can specify a function to be executed on a single or
+ * multiple cpus preempting all other processes and monopolizing those
+ * cpus until it sleeps or finishes.
+ *
+ * Resources for this mechanism are preallocated when a cpu is brought
+ * up and requests are guaranteed to be served as long as the target
+ * cpus are online; however, execution context is limited to one per
+ * cpu, so don't hog for too long.
+ */
+#include <linux/completion.h>
+#include <linux/cpu.h>
+#include <linux/cpuhog.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/percpu.h>
+
+/*
+ * Structure to determine completion condition and record errors. May
+ * be shared by works on different cpus.
+ */
+struct cpuhog_done {
+ atomic_t nr_todo; /* nr left to execute */
+ bool executed; /* actually executed? */
+ int ret; /* collected return value */
+ struct completion completion; /* fired if nr_todo reaches 0 */
+};
+
+/* the actual hog, one per every possible cpu, enabled on online cpus */
+struct cpuhog {
+ spinlock_t lock;
+ struct list_head works; /* list of pending works */
+ struct task_struct *thread; /* hog thread */
+ bool enabled; /* is this hog enabled? */
+};
+
+static DEFINE_PER_CPU(struct cpuhog, cpuhog);
+
+static void cpuhog_init_done(struct cpuhog_done *done, unsigned int nr_todo)
+{
+ memset(done, 0, sizeof(*done));
+ atomic_set(&done->nr_todo, nr_todo);
+ init_completion(&done->completion);
+}
+
+/* signal completion unless @done is NULL */
+static void cpuhog_signal_done(struct cpuhog_done *done, bool executed)
+{
+ if (done) {
+ if (executed)
+ done->executed = true;
+ if (atomic_dec_and_test(&done->nr_todo))
+ complete(&done->completion);
+ }
+}
+
+/* queue @work to @hog. if offline, @work is completed immediately */
+static void cpuhog_queue_work(struct cpuhog *hog, struct cpuhog_work *work)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&hog->lock, flags);
+
+ if (hog->enabled) {
+ list_add_tail(&work->list, &hog->works);
+ wake_up_process(hog->thread);
+ } else
+ cpuhog_signal_done(work->done, false);
+
+ spin_unlock_irqrestore(&hog->lock, flags);
+}
+
+/**
+ * hog_one_cpu - hog a cpu
+ * @cpu: cpu to hog
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Execute @fn(@arg) on @cpu. @fn is run in a process context with
+ * the highest priority preempting any task on the cpu and
+ * monopolizing it. This function returns after the execution is
+ * complete.
+ *
+ * This function doesn't guarantee @cpu stays online till @fn
+ * completes. If @cpu goes down in the middle, execution may happen
+ * partially or fully on different cpus. @fn should either be ready
+ * for that or the caller should ensure that @cpu stays online until
+ * this function completes.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * -ENOENT if @fn(@arg) was not executed because @cpu was offline;
+ * otherwise, the return value of @fn.
+ */
+int hog_one_cpu(unsigned int cpu, cpuhog_fn_t fn, void *arg)
+{
+ struct cpuhog_done done;
+ struct cpuhog_work work = { .fn = fn, .arg = arg, .done = &done };
+
+ cpuhog_init_done(&done, 1);
+ cpuhog_queue_work(&per_cpu(cpuhog, cpu), &work);
+ wait_for_completion(&done.completion);
+ return done.executed ? done.ret : -ENOENT;
+}
+
+/**
+ * hog_one_cpu_nowait - hog a cpu but don't wait for completion
+ * @cpu: cpu to hog
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Similar to hog_one_cpu() but doesn't wait for completion. The
+ * caller is responsible for ensuring @work_buf is currently unused
+ * and will remain untouched until cpuhog starts executing @fn.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+void hog_one_cpu_nowait(unsigned int cpu, cpuhog_fn_t fn, void *arg,
+ struct cpuhog_work *work_buf)
+{
+ memset(work_buf, 0, sizeof(*work_buf));
+ work_buf->fn = fn;
+ work_buf->arg = arg;
+ cpuhog_queue_work(&per_cpu(cpuhog, cpu), work_buf);
+}
+
+/* static data for hog_cpus */
+static DEFINE_MUTEX(hog_cpus_mutex);
+static DEFINE_PER_CPU(struct cpuhog_work, hog_cpus_work);
+
+int __hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg)
+{
+ struct cpuhog_work *work;
+ struct cpuhog_done done;
+ unsigned int cpu;
+
+ /* initialize works and done */
+ for_each_cpu(cpu, cpumask) {
+ work = &per_cpu(hog_cpus_work, cpu);
+ work->fn = fn;
+ work->arg = arg;
+ work->done = &done;
+ }
+ cpuhog_init_done(&done, cpumask_weight(cpumask));
+
+ /*
+ * Disable preemption while queueing to avoid getting
+ * preempted by a hog which might wait for other hogs to enter
+ * @fn which can lead to deadlock.
+ */
+ preempt_disable();
+ for_each_cpu(cpu, cpumask)
+ cpuhog_queue_work(&per_cpu(cpuhog, cpu),
+ &per_cpu(hog_cpus_work, cpu));
+ preempt_enable();
+
+ wait_for_completion(&done.completion);
+ return done.executed ? done.ret : -ENOENT;
+}
+
+/**
+ * hog_cpus - hog multiple cpus
+ * @cpumask: cpus to hog
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Execute @fn(@arg) on online cpus in @cpumask. On each target cpu,
+ * @fn is run in a process context with the highest priority
+ * preempting any task on the cpu and monopolizing it. This function
+ * returns after all executions are complete.
+ *
+ * This function doesn't guarantee the cpus in @cpumask stay online
+ * till @fn completes. If some cpus go down in the middle, execution
+ * on the cpu may happen partially or fully on different cpus. @fn
+ * should either be ready for that or the caller should ensure that
+ * the cpus stay online until this function completes.
+ *
+ * All hog_cpus() calls are serialized making it safe for @fn to wait
+ * for all cpus to start executing it.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * -ENOENT if @fn(@arg) was not executed at all because all cpus in
+ * @cpumask were offline; otherwise, 0 if all executions of @fn
+ * returned 0, any non zero return value if any returned non zero.
+ */
+int hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg)
+{
+ int ret;
+
+ /* static works are used, process one request at a time */
+ mutex_lock(&hog_cpus_mutex);
+ ret = __hog_cpus(cpumask, fn, arg);
+ mutex_unlock(&hog_cpus_mutex);
+ return ret;
+}
+
+/**
+ * try_hog_cpus - try to hog multiple cpus
+ * @cpumask: cpus to hog
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Identical to hog_cpus() except that it fails with -EAGAIN if
+ * someone else is already using the facility.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * -EAGAIN if someone else is already hogging cpus, -ENOENT if
+ * @fn(@arg) was not executed at all because all cpus in @cpumask were
+ * offline; otherwise, 0 if all executions of @fn returned 0, any non
+ * zero return value if any returned non zero.
+ */
+int try_hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg)
+{
+ int ret;
+
+ /* static works are used, process one request at a time */
+ if (!mutex_trylock(&hog_cpus_mutex))
+ return -EAGAIN;
+ ret = __hog_cpus(cpumask, fn, arg);
+ mutex_unlock(&hog_cpus_mutex);
+ return ret;
+}
+
+static int cpuhog_thread(void *data)
+{
+ struct cpuhog *hog = data;
+ struct cpuhog_work *work;
+ int ret;
+
+repeat:
+ set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */
+
+ if (kthread_should_stop()) {
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+
+ work = NULL;
+ spin_lock_irq(&hog->lock);
+ if (!list_empty(&hog->works)) {
+ work = list_first_entry(&hog->works, struct cpuhog_work, list);
+ list_del_init(&work->list);
+ }
+ spin_unlock_irq(&hog->lock);
+
+ if (work) {
+ struct cpuhog_done *done = work->done;
+
+ __set_current_state(TASK_RUNNING);
+
+ ret = work->fn(work->arg);
+ if (ret)
+ done->ret = ret;
+
+ cpuhog_signal_done(done, true);
+ } else
+ schedule();
+
+ goto repeat;
+}
+
+/* manage hog for a cpu, mostly lifted from sched migration thread mgmt */
+static int __cpuinit cpuhog_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+ unsigned int cpu = (unsigned long)hcpu;
+ struct cpuhog *hog = &per_cpu(cpuhog, cpu);
+ struct cpuhog_work *work;
+ struct task_struct *p;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ BUG_ON(hog->thread || hog->enabled || !list_empty(&hog->works));
+ p = kthread_create(cpuhog_thread, hog, "hog/%d", cpu);
+ if (IS_ERR(p))
+ return NOTIFY_BAD;
+ sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m);
+ get_task_struct(p);
+ hog->thread = p;
+ break;
+
+ case CPU_ONLINE:
+ kthread_bind(hog->thread, cpu);
+ /* strictly unnecessary, as first user will wake it */
+ wake_up_process(hog->thread);
+ /* mark enabled */
+ spin_lock_irq(&hog->lock);
+ hog->enabled = true;
+ spin_unlock_irq(&hog->lock);
+ break;
+
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ /* kill the hog */
+ kthread_stop(hog->thread);
+ /* drain remaining works */
+ spin_lock_irq(&hog->lock);
+ list_for_each_entry(work, &hog->works, list)
+ cpuhog_signal_done(work->done, false);
+ hog->enabled = false;
+ spin_unlock_irq(&hog->lock);
+ /* release the hog */
+ put_task_struct(hog->thread);
+ hog->thread = NULL;
+ break;
+#endif
+ }
+
+ return NOTIFY_OK;
+}
+
+/*
+ * Give it a higher priority so that cpuhog is available to other cpu
+ * notifiers. It currently shares the same priority as sched
+ * migration_notifier.
+ */
+static struct notifier_block __cpuinitdata cpuhog_cpu_notifier = {
+ .notifier_call = cpuhog_cpu_callback,
+ .priority = 10,
+};
+
+static int __init cpuhog_init(void)
+{
+ void *bcpu = (void *)(long)smp_processor_id();
+ unsigned int cpu;
+ int err;
+
+ for_each_possible_cpu(cpu) {
+ struct cpuhog *hog = &per_cpu(cpuhog, cpu);
+
+ spin_lock_init(&hog->lock);
+ INIT_LIST_HEAD(&hog->works);
+ }
+
+ /* start one for the boot cpu */
+ err = cpuhog_cpu_callback(&cpuhog_cpu_notifier, CPU_UP_PREPARE, bcpu);
+ BUG_ON(err == NOTIFY_BAD);
+ cpuhog_cpu_callback(&cpuhog_cpu_notifier, CPU_ONLINE, bcpu);
+ register_cpu_notifier(&cpuhog_cpu_notifier);
+
+ return 0;
+}
+early_initcall(cpuhog_init);
--
1.6.4.2
next prev parent reply other threads:[~2010-03-08 15:54 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-03-08 15:53 [PATCHSET] cpuhog: implement and use cpuhog Tejun Heo
2010-03-08 15:53 ` Tejun Heo [this message]
2010-03-08 19:01 ` [PATCH 1/4] cpuhog: implement cpuhog Oleg Nesterov
2010-03-08 23:18 ` Tejun Heo
2010-03-08 15:53 ` [PATCH 2/4] stop_machine: reimplement using cpuhog Tejun Heo
2010-03-08 16:32 ` Arjan van de Ven
2010-03-08 23:21 ` Tejun Heo
2010-03-08 17:10 ` Heiko Carstens
2010-03-08 18:27 ` Oleg Nesterov
2010-03-08 19:37 ` Heiko Carstens
2010-03-08 23:39 ` Tejun Heo
2010-03-09 7:09 ` Heiko Carstens
2010-03-09 7:16 ` Tejun Heo
2010-03-08 19:06 ` Oleg Nesterov
2010-03-08 23:22 ` Tejun Heo
2010-03-08 15:53 ` [PATCH 3/4] scheduler: replace migration_thread with cpuhog Tejun Heo
2010-03-08 15:53 ` [PATCH 4/4] scheduler: kill paranoia check in synchronize_sched_expedited() Tejun Heo
2010-03-10 19:25 ` [PATCHSET] cpuhog: implement and use cpuhog Peter Zijlstra
2010-03-12 3:13 ` Tejun Heo
2010-03-29 6:46 ` Rusty Russell
2010-03-29 9:11 ` Peter Zijlstra
2010-04-02 5:45 ` Tejun Heo
-- strict thread matches above, loose matches on Subject: below --
2010-03-17 8:40 [PATCHSET sched/core] cpuhog: implement and use cpuhog, take#2 Tejun Heo
2010-03-17 8:40 ` [PATCH 1/4] cpuhog: implement cpuhog Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1268063603-7425-2-git-send-email-tj@kernel.org \
--to=tj@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=dipankar@in.ibm.com \
--cc=heiko.carstens@de.ibm.com \
--cc=josh@freedesktop.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=oleg@redhat.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=peterz@infradead.org \
--cc=rusty@rustcorp.com.au \
--cc=sivanich@sgi.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.