From: Tejun Heo <tj@kernel.org>
To: linux-kernel@vger.kernel.org, rusty@rustcorp.com.au,
sivanich@sgi.com, heiko.carstens@de.ibm.com,
torvalds@linux-foundation.org, mingo@elte.hu,
peterz@infradead.org, dipankar@in.ibm.com, josh@freedesktop.org,
paulmck@linux.vnet.ibm.com, oleg@redhat.com,
akpm@linux-foundation.org
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 1/4] cpuhog: implement cpuhog
Date: Tue, 9 Mar 2010 00:53:20 +0900 [thread overview]
Message-ID: <1268063603-7425-2-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1268063603-7425-1-git-send-email-tj@kernel.org>
Implement a simplistic per-cpu maximum priority cpu hogging mechanism
named cpuhog. A callback can be scheduled to run on one or multiple
cpus with maximum priority monopolozing those cpus. This is primarily
to replace and unify RT workqueue usage in stop_machine and scheduler
migration_thread which currently is serving multiple purposes.
Four functions are provided - hog_one_cpu(), hog_one_cpu_nowait(),
hog_cpus() and try_hog_cpus().
This is to allow clean sharing of resources among stop_cpu and all the
migration thread users. One cpuhog thread per cpu is created which is
currently named "hog/CPU". This will eventually replace the migration
thread and take on its name.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
---
include/linux/cpuhog.h | 24 +++
kernel/Makefile | 2 +-
kernel/cpuhog.c | 362 ++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 387 insertions(+), 1 deletions(-)
create mode 100644 include/linux/cpuhog.h
create mode 100644 kernel/cpuhog.c
diff --git a/include/linux/cpuhog.h b/include/linux/cpuhog.h
new file mode 100644
index 0000000..5252884
--- /dev/null
+++ b/include/linux/cpuhog.h
@@ -0,0 +1,24 @@
+/*
+ * linux/cpuhog.h - CPU hogs to monopolize CPUs
+ *
+ * Copyright (C) 2010 SUSE Linux Products GmbH
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/cpumask.h>
+#include <linux/list.h>
+
+typedef int (*cpuhog_fn_t)(void *arg);
+
+struct cpuhog_work {
+ struct list_head list; /* cpuhog->works */
+ cpuhog_fn_t fn;
+ void *arg;
+ struct cpuhog_done *done;
+};
+
+int hog_one_cpu(unsigned int cpu, cpuhog_fn_t fn, void *arg);
+void hog_one_cpu_nowait(unsigned int cpu, cpuhog_fn_t fn, void *arg,
+ struct cpuhog_work *work_buf);
+int hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg);
+int try_hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg);
diff --git a/kernel/Makefile b/kernel/Makefile
index 864ff75..1f84388 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
- async.o
+ async.o cpuhog.o
obj-y += groups.o
ifdef CONFIG_FUNCTION_TRACER
diff --git a/kernel/cpuhog.c b/kernel/cpuhog.c
new file mode 100644
index 0000000..c25c510
--- /dev/null
+++ b/kernel/cpuhog.c
@@ -0,0 +1,362 @@
+/*
+ * kernel/cpuhog.c - CPU hogs to monopolize CPUs
+ *
+ * Copyright (C) 2010 SUSE Linux Products GmbH
+ * Copyright (C) 2010 Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ *
+ * Simplistic per-cpu maximum priority cpu hogging mechanism. The
+ * caller can specify a function to be executed on a single or
+ * multiple cpus preempting all other processes and monopolizing those
+ * cpus until it sleeps or finishes.
+ *
+ * Resources for this mechanism are preallocated when a cpu is brought
+ * up and requests are guaranteed to be served as long as the target
+ * cpus are online; however, execution context is limited to one per
+ * cpu, so don't hog for too long.
+ */
+#include <linux/completion.h>
+#include <linux/cpu.h>
+#include <linux/cpuhog.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/percpu.h>
+
+/*
+ * Structure to determine completion condition and record errors. May
+ * be shared by works on different cpus.
+ */
+struct cpuhog_done {
+ atomic_t nr_todo; /* nr left to execute */
+ bool executed; /* actually executed? */
+ int ret; /* collected return value */
+ struct completion completion; /* fired if nr_todo reaches 0 */
+};
+
+/* the actual hog, one per every possible cpu, enabled on online cpus */
+struct cpuhog {
+ spinlock_t lock;
+ struct list_head works; /* list of pending works */
+ struct task_struct *thread; /* hog thread */
+ bool enabled; /* is this hog enabled? */
+};
+
+static DEFINE_PER_CPU(struct cpuhog, cpuhog);
+
+static void cpuhog_init_done(struct cpuhog_done *done, unsigned int nr_todo)
+{
+ memset(done, 0, sizeof(*done));
+ atomic_set(&done->nr_todo, nr_todo);
+ init_completion(&done->completion);
+}
+
+/* signal completion unless @done is NULL */
+static void cpuhog_signal_done(struct cpuhog_done *done, bool executed)
+{
+ if (done) {
+ if (executed)
+ done->executed = true;
+ if (atomic_dec_and_test(&done->nr_todo))
+ complete(&done->completion);
+ }
+}
+
+/* queue @work to @hog. if offline, @work is completed immediately */
+static void cpuhog_queue_work(struct cpuhog *hog, struct cpuhog_work *work)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&hog->lock, flags);
+
+ if (hog->enabled) {
+ list_add_tail(&work->list, &hog->works);
+ wake_up_process(hog->thread);
+ } else
+ cpuhog_signal_done(work->done, false);
+
+ spin_unlock_irqrestore(&hog->lock, flags);
+}
+
+/**
+ * hog_one_cpu - hog a cpu
+ * @cpu: cpu to hog
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Execute @fn(@arg) on @cpu. @fn is run in a process context with
+ * the highest priority preempting any task on the cpu and
+ * monopolizing it. This function returns after the execution is
+ * complete.
+ *
+ * This function doesn't guarantee @cpu stays online till @fn
+ * completes. If @cpu goes down in the middle, execution may happen
+ * partially or fully on different cpus. @fn should either be ready
+ * for that or the caller should ensure that @cpu stays online until
+ * this function completes.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * -ENOENT if @fn(@arg) was not executed because @cpu was offline;
+ * otherwise, the return value of @fn.
+ */
+int hog_one_cpu(unsigned int cpu, cpuhog_fn_t fn, void *arg)
+{
+ struct cpuhog_done done;
+ struct cpuhog_work work = { .fn = fn, .arg = arg, .done = &done };
+
+ cpuhog_init_done(&done, 1);
+ cpuhog_queue_work(&per_cpu(cpuhog, cpu), &work);
+ wait_for_completion(&done.completion);
+ return done.executed ? done.ret : -ENOENT;
+}
+
+/**
+ * hog_one_cpu_nowait - hog a cpu but don't wait for completion
+ * @cpu: cpu to hog
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Similar to hog_one_cpu() but doesn't wait for completion. The
+ * caller is responsible for ensuring @work_buf is currently unused
+ * and will remain untouched until cpuhog starts executing @fn.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+void hog_one_cpu_nowait(unsigned int cpu, cpuhog_fn_t fn, void *arg,
+ struct cpuhog_work *work_buf)
+{
+ memset(work_buf, 0, sizeof(*work_buf));
+ work_buf->fn = fn;
+ work_buf->arg = arg;
+ cpuhog_queue_work(&per_cpu(cpuhog, cpu), work_buf);
+}
+
+/* static data for hog_cpus */
+static DEFINE_MUTEX(hog_cpus_mutex);
+static DEFINE_PER_CPU(struct cpuhog_work, hog_cpus_work);
+
+int __hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg)
+{
+ struct cpuhog_work *work;
+ struct cpuhog_done done;
+ unsigned int cpu;
+
+ /* initialize works and done */
+ for_each_cpu(cpu, cpumask) {
+ work = &per_cpu(hog_cpus_work, cpu);
+ work->fn = fn;
+ work->arg = arg;
+ work->done = &done;
+ }
+ cpuhog_init_done(&done, cpumask_weight(cpumask));
+
+ /*
+ * Disable preemption while queueing to avoid getting
+ * preempted by a hog which might wait for other hogs to enter
+ * @fn which can lead to deadlock.
+ */
+ preempt_disable();
+ for_each_cpu(cpu, cpumask)
+ cpuhog_queue_work(&per_cpu(cpuhog, cpu),
+ &per_cpu(hog_cpus_work, cpu));
+ preempt_enable();
+
+ wait_for_completion(&done.completion);
+ return done.executed ? done.ret : -ENOENT;
+}
+
+/**
+ * hog_cpus - hog multiple cpus
+ * @cpumask: cpus to hog
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Execute @fn(@arg) on online cpus in @cpumask. On each target cpu,
+ * @fn is run in a process context with the highest priority
+ * preempting any task on the cpu and monopolizing it. This function
+ * returns after all executions are complete.
+ *
+ * This function doesn't guarantee the cpus in @cpumask stay online
+ * till @fn completes. If some cpus go down in the middle, execution
+ * on the cpu may happen partially or fully on different cpus. @fn
+ * should either be ready for that or the caller should ensure that
+ * the cpus stay online until this function completes.
+ *
+ * All hog_cpus() calls are serialized making it safe for @fn to wait
+ * for all cpus to start executing it.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * -ENOENT if @fn(@arg) was not executed at all because all cpus in
+ * @cpumask were offline; otherwise, 0 if all executions of @fn
+ * returned 0, any non zero return value if any returned non zero.
+ */
+int hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg)
+{
+ int ret;
+
+ /* static works are used, process one request at a time */
+ mutex_lock(&hog_cpus_mutex);
+ ret = __hog_cpus(cpumask, fn, arg);
+ mutex_unlock(&hog_cpus_mutex);
+ return ret;
+}
+
+/**
+ * try_hog_cpus - try to hog multiple cpus
+ * @cpumask: cpus to hog
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Identical to hog_cpus() except that it fails with -EAGAIN if
+ * someone else is already using the facility.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * -EAGAIN if someone else is already hogging cpus, -ENOENT if
+ * @fn(@arg) was not executed at all because all cpus in @cpumask were
+ * offline; otherwise, 0 if all executions of @fn returned 0, any non
+ * zero return value if any returned non zero.
+ */
+int try_hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg)
+{
+ int ret;
+
+ /* static works are used, process one request at a time */
+ if (!mutex_trylock(&hog_cpus_mutex))
+ return -EAGAIN;
+ ret = __hog_cpus(cpumask, fn, arg);
+ mutex_unlock(&hog_cpus_mutex);
+ return ret;
+}
+
+static int cpuhog_thread(void *data)
+{
+ struct cpuhog *hog = data;
+ struct cpuhog_work *work;
+ int ret;
+
+repeat:
+ set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */
+
+ if (kthread_should_stop()) {
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+
+ work = NULL;
+ spin_lock_irq(&hog->lock);
+ if (!list_empty(&hog->works)) {
+ work = list_first_entry(&hog->works, struct cpuhog_work, list);
+ list_del_init(&work->list);
+ }
+ spin_unlock_irq(&hog->lock);
+
+ if (work) {
+ struct cpuhog_done *done = work->done;
+
+ __set_current_state(TASK_RUNNING);
+
+ ret = work->fn(work->arg);
+ if (ret)
+ done->ret = ret;
+
+ cpuhog_signal_done(done, true);
+ } else
+ schedule();
+
+ goto repeat;
+}
+
+/* manage hog for a cpu, mostly lifted from sched migration thread mgmt */
+static int __cpuinit cpuhog_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+ unsigned int cpu = (unsigned long)hcpu;
+ struct cpuhog *hog = &per_cpu(cpuhog, cpu);
+ struct cpuhog_work *work;
+ struct task_struct *p;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ BUG_ON(hog->thread || hog->enabled || !list_empty(&hog->works));
+ p = kthread_create(cpuhog_thread, hog, "hog/%d", cpu);
+ if (IS_ERR(p))
+ return NOTIFY_BAD;
+ sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m);
+ get_task_struct(p);
+ hog->thread = p;
+ break;
+
+ case CPU_ONLINE:
+ kthread_bind(hog->thread, cpu);
+ /* strictly unnecessary, as first user will wake it */
+ wake_up_process(hog->thread);
+ /* mark enabled */
+ spin_lock_irq(&hog->lock);
+ hog->enabled = true;
+ spin_unlock_irq(&hog->lock);
+ break;
+
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ /* kill the hog */
+ kthread_stop(hog->thread);
+ /* drain remaining works */
+ spin_lock_irq(&hog->lock);
+ list_for_each_entry(work, &hog->works, list)
+ cpuhog_signal_done(work->done, false);
+ hog->enabled = false;
+ spin_unlock_irq(&hog->lock);
+ /* release the hog */
+ put_task_struct(hog->thread);
+ hog->thread = NULL;
+ break;
+#endif
+ }
+
+ return NOTIFY_OK;
+}
+
+/*
+ * Give it a higher priority so that cpuhog is available to other cpu
+ * notifiers. It currently shares the same priority as sched
+ * migration_notifier.
+ */
+static struct notifier_block __cpuinitdata cpuhog_cpu_notifier = {
+ .notifier_call = cpuhog_cpu_callback,
+ .priority = 10,
+};
+
+static int __init cpuhog_init(void)
+{
+ void *bcpu = (void *)(long)smp_processor_id();
+ unsigned int cpu;
+ int err;
+
+ for_each_possible_cpu(cpu) {
+ struct cpuhog *hog = &per_cpu(cpuhog, cpu);
+
+ spin_lock_init(&hog->lock);
+ INIT_LIST_HEAD(&hog->works);
+ }
+
+ /* start one for the boot cpu */
+ err = cpuhog_cpu_callback(&cpuhog_cpu_notifier, CPU_UP_PREPARE, bcpu);
+ BUG_ON(err == NOTIFY_BAD);
+ cpuhog_cpu_callback(&cpuhog_cpu_notifier, CPU_ONLINE, bcpu);
+ register_cpu_notifier(&cpuhog_cpu_notifier);
+
+ return 0;
+}
+early_initcall(cpuhog_init);
--
1.6.4.2
next prev parent reply other threads:[~2010-03-08 15:54 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-03-08 15:53 [PATCHSET] cpuhog: implement and use cpuhog Tejun Heo
2010-03-08 15:53 ` Tejun Heo [this message]
2010-03-08 19:01 ` [PATCH 1/4] cpuhog: implement cpuhog Oleg Nesterov
2010-03-08 23:18 ` Tejun Heo
2010-03-08 15:53 ` [PATCH 2/4] stop_machine: reimplement using cpuhog Tejun Heo
2010-03-08 16:32 ` Arjan van de Ven
2010-03-08 23:21 ` Tejun Heo
2010-03-08 17:10 ` Heiko Carstens
2010-03-08 18:27 ` Oleg Nesterov
2010-03-08 19:37 ` Heiko Carstens
2010-03-08 23:39 ` Tejun Heo
2010-03-09 7:09 ` Heiko Carstens
2010-03-09 7:16 ` Tejun Heo
2010-03-08 19:06 ` Oleg Nesterov
2010-03-08 23:22 ` Tejun Heo
2010-03-08 15:53 ` [PATCH 3/4] scheduler: replace migration_thread with cpuhog Tejun Heo
2010-03-08 15:53 ` [PATCH 4/4] scheduler: kill paranoia check in synchronize_sched_expedited() Tejun Heo
2010-03-10 19:25 ` [PATCHSET] cpuhog: implement and use cpuhog Peter Zijlstra
2010-03-12 3:13 ` Tejun Heo
2010-03-29 6:46 ` Rusty Russell
2010-03-29 9:11 ` Peter Zijlstra
2010-04-02 5:45 ` Tejun Heo
-- strict thread matches above, loose matches on Subject: below --
2010-03-17 8:40 [PATCHSET sched/core] cpuhog: implement and use cpuhog, take#2 Tejun Heo
2010-03-17 8:40 ` [PATCH 1/4] cpuhog: implement cpuhog Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1268063603-7425-2-git-send-email-tj@kernel.org \
--to=tj@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=dipankar@in.ibm.com \
--cc=heiko.carstens@de.ibm.com \
--cc=josh@freedesktop.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=oleg@redhat.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=peterz@infradead.org \
--cc=rusty@rustcorp.com.au \
--cc=sivanich@sgi.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).