All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: linux-kernel@vger.kernel.org, rusty@rustcorp.com.au,
	sivanich@sgi.com, heiko.carstens@de.ibm.com,
	torvalds@linux-foundation.org, mingo@elte.hu,
	dipankar@in.ibm.com, josh@freedesktop.org,
	paulmck@linux.vnet.ibm.com, oleg@redhat.com,
	akpm@linux-foundation.org, peterz@infradead.org,
	arjan@infradead.org
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 1/4] cpuhog: implement cpuhog
Date: Wed, 17 Mar 2010 17:40:48 +0900	[thread overview]
Message-ID: <1268815251-31407-2-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1268815251-31407-1-git-send-email-tj@kernel.org>

Implement a simplistic per-cpu maximum priority cpu hogging mechanism
named cpuhog.  A non-sleeping callback can be scheduled to run on one
or multiple cpus with maximum priority monopolozing those cpus.  This
is primarily to replace and unify RT workqueue usage in stop_machine
and scheduler migration_thread which currently is serving multiple
purposes.

Four functions are provided - hog_one_cpu(), hog_one_cpu_nowait(),
hog_cpus() and try_hog_cpus().

This is to allow clean sharing of resources among stop_cpu and all the
migration thread users.  One cpuhog thread per cpu is created which is
currently named "hog/CPU".  This will eventually replace the migration
thread and take on its name.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
---
 include/linux/cpuhog.h |   24 +++
 kernel/Makefile        |    2 +-
 kernel/cpuhog.c        |  368 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 393 insertions(+), 1 deletions(-)
 create mode 100644 include/linux/cpuhog.h
 create mode 100644 kernel/cpuhog.c

diff --git a/include/linux/cpuhog.h b/include/linux/cpuhog.h
new file mode 100644
index 0000000..5252884
--- /dev/null
+++ b/include/linux/cpuhog.h
@@ -0,0 +1,24 @@
+/*
+ * linux/cpuhog.h - CPU hogs to monopolize CPUs
+ *
+ * Copyright (C) 2010		SUSE Linux Products GmbH
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/cpumask.h>
+#include <linux/list.h>
+
+typedef int (*cpuhog_fn_t)(void *arg);
+
+struct cpuhog_work {
+	struct list_head	list;		/* cpuhog->works */
+	cpuhog_fn_t		fn;
+	void			*arg;
+	struct cpuhog_done	*done;
+};
+
+int hog_one_cpu(unsigned int cpu, cpuhog_fn_t fn, void *arg);
+void hog_one_cpu_nowait(unsigned int cpu, cpuhog_fn_t fn, void *arg,
+			struct cpuhog_work *work_buf);
+int hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg);
+int try_hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg);
diff --git a/kernel/Makefile b/kernel/Makefile
index 6aebdeb..0fa06cb 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
-	    async.o
+	    async.o cpuhog.o
 obj-y += groups.o
 
 ifdef CONFIG_FUNCTION_TRACER
diff --git a/kernel/cpuhog.c b/kernel/cpuhog.c
new file mode 100644
index 0000000..877a5ed
--- /dev/null
+++ b/kernel/cpuhog.c
@@ -0,0 +1,368 @@
+/*
+ * kernel/cpuhog.c - CPU hogs to monopolize CPUs
+ *
+ * Copyright (C) 2010		SUSE Linux Products GmbH
+ * Copyright (C) 2010		Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ *
+ * Simplistic per-cpu maximum priority cpu hogging mechanism.  The
+ * caller can specify a non-sleeping function to be executed on a
+ * single or multiple cpus preempting all other processes and
+ * monopolizing those cpus until it finishes.
+ *
+ * Resources for this mechanism are preallocated when a cpu is brought
+ * up and requests are guaranteed to be served as long as the target
+ * cpus are online.
+ */
+#include <linux/completion.h>
+#include <linux/cpu.h>
+#include <linux/cpuhog.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/percpu.h>
+
+/*
+ * Structure to determine completion condition and record errors.  May
+ * be shared by works on different cpus.
+ */
+struct cpuhog_done {
+	atomic_t		nr_todo;	/* nr left to execute */
+	bool			executed;	/* actually executed? */
+	int			ret;		/* collected return value */
+	struct completion	completion;	/* fired if nr_todo reaches 0 */
+};
+
+/* the actual hog, one per every possible cpu, enabled on online cpus */
+struct cpuhog {
+	spinlock_t		lock;
+	struct list_head	works;		/* list of pending works */
+	struct task_struct	*thread;	/* hog thread */
+	bool			enabled;	/* is this hog enabled? */
+};
+
+static DEFINE_PER_CPU(struct cpuhog, cpuhog);
+
+static void cpuhog_init_done(struct cpuhog_done *done, unsigned int nr_todo)
+{
+	memset(done, 0, sizeof(*done));
+	atomic_set(&done->nr_todo, nr_todo);
+	init_completion(&done->completion);
+}
+
+/* signal completion unless @done is NULL */
+static void cpuhog_signal_done(struct cpuhog_done *done, bool executed)
+{
+	if (done) {
+		if (executed)
+			done->executed = true;
+		if (atomic_dec_and_test(&done->nr_todo))
+			complete(&done->completion);
+	}
+}
+
+/* queue @work to @hog.  if offline, @work is completed immediately */
+static void cpuhog_queue_work(struct cpuhog *hog, struct cpuhog_work *work)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&hog->lock, flags);
+
+	if (hog->enabled) {
+		list_add_tail(&work->list, &hog->works);
+		wake_up_process(hog->thread);
+	} else
+		cpuhog_signal_done(work->done, false);
+
+	spin_unlock_irqrestore(&hog->lock, flags);
+}
+
+/**
+ * hog_one_cpu - hog a cpu
+ * @cpu: cpu to hog
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Execute @fn(@arg) on @cpu.  @fn is run in a process context with
+ * the highest priority preempting any task on the cpu and
+ * monopolizing it.  This function returns after the execution is
+ * complete.
+ *
+ * This function doesn't guarantee @cpu stays online till @fn
+ * completes.  If @cpu goes down in the middle, execution may happen
+ * partially or fully on different cpus.  @fn should either be ready
+ * for that or the caller should ensure that @cpu stays online until
+ * this function completes.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * -ENOENT if @fn(@arg) was not executed because @cpu was offline;
+ * otherwise, the return value of @fn.
+ */
+int hog_one_cpu(unsigned int cpu, cpuhog_fn_t fn, void *arg)
+{
+	struct cpuhog_done done;
+	struct cpuhog_work work = { .fn = fn, .arg = arg, .done = &done };
+
+	cpuhog_init_done(&done, 1);
+	cpuhog_queue_work(&per_cpu(cpuhog, cpu), &work);
+	wait_for_completion(&done.completion);
+	return done.executed ? done.ret : -ENOENT;
+}
+
+/**
+ * hog_one_cpu_nowait - hog a cpu but don't wait for completion
+ * @cpu: cpu to hog
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Similar to hog_one_cpu() but doesn't wait for completion.  The
+ * caller is responsible for ensuring @work_buf is currently unused
+ * and will remain untouched until cpuhog starts executing @fn.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+void hog_one_cpu_nowait(unsigned int cpu, cpuhog_fn_t fn, void *arg,
+			struct cpuhog_work *work_buf)
+{
+	memset(work_buf, 0, sizeof(*work_buf));
+	work_buf->fn = fn;
+	work_buf->arg = arg;
+	cpuhog_queue_work(&per_cpu(cpuhog, cpu), work_buf);
+}
+
+/* static data for hog_cpus */
+static DEFINE_MUTEX(hog_cpus_mutex);
+static DEFINE_PER_CPU(struct cpuhog_work, hog_cpus_work);
+
+int __hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg)
+{
+	struct cpuhog_work *work;
+	struct cpuhog_done done;
+	unsigned int cpu;
+
+	/* initialize works and done */
+	for_each_cpu(cpu, cpumask) {
+		work = &per_cpu(hog_cpus_work, cpu);
+		work->fn = fn;
+		work->arg = arg;
+		work->done = &done;
+	}
+	cpuhog_init_done(&done, cpumask_weight(cpumask));
+
+	/*
+	 * Disable preemption while queueing to avoid getting
+	 * preempted by a hog which might wait for other hogs to enter
+	 * @fn which can lead to deadlock.
+	 */
+	preempt_disable();
+	for_each_cpu(cpu, cpumask)
+		cpuhog_queue_work(&per_cpu(cpuhog, cpu),
+				  &per_cpu(hog_cpus_work, cpu));
+	preempt_enable();
+
+	wait_for_completion(&done.completion);
+	return done.executed ? done.ret : -ENOENT;
+}
+
+/**
+ * hog_cpus - hog multiple cpus
+ * @cpumask: cpus to hog
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Execute @fn(@arg) on online cpus in @cpumask.  On each target cpu,
+ * @fn is run in a process context with the highest priority
+ * preempting any task on the cpu and monopolizing it.  This function
+ * returns after all executions are complete.
+ *
+ * This function doesn't guarantee the cpus in @cpumask stay online
+ * till @fn completes.  If some cpus go down in the middle, execution
+ * on the cpu may happen partially or fully on different cpus.  @fn
+ * should either be ready for that or the caller should ensure that
+ * the cpus stay online until this function completes.
+ *
+ * All hog_cpus() calls are serialized making it safe for @fn to wait
+ * for all cpus to start executing it.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * -ENOENT if @fn(@arg) was not executed at all because all cpus in
+ * @cpumask were offline; otherwise, 0 if all executions of @fn
+ * returned 0, any non zero return value if any returned non zero.
+ */
+int hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg)
+{
+	int ret;
+
+	/* static works are used, process one request at a time */
+	mutex_lock(&hog_cpus_mutex);
+	ret = __hog_cpus(cpumask, fn, arg);
+	mutex_unlock(&hog_cpus_mutex);
+	return ret;
+}
+
+/**
+ * try_hog_cpus - try to hog multiple cpus
+ * @cpumask: cpus to hog
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Identical to hog_cpus() except that it fails with -EAGAIN if
+ * someone else is already using the facility.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * -EAGAIN if someone else is already hogging cpus, -ENOENT if
+ * @fn(@arg) was not executed at all because all cpus in @cpumask were
+ * offline; otherwise, 0 if all executions of @fn returned 0, any non
+ * zero return value if any returned non zero.
+ */
+int try_hog_cpus(const struct cpumask *cpumask, cpuhog_fn_t fn, void *arg)
+{
+	int ret;
+
+	/* static works are used, process one request at a time */
+	if (!mutex_trylock(&hog_cpus_mutex))
+		return -EAGAIN;
+	ret = __hog_cpus(cpumask, fn, arg);
+	mutex_unlock(&hog_cpus_mutex);
+	return ret;
+}
+
+static int cpuhog_thread(void *data)
+{
+	struct cpuhog *hog = data;
+	struct cpuhog_work *work;
+	int ret;
+
+repeat:
+	set_current_state(TASK_INTERRUPTIBLE);	/* mb paired w/ kthread_stop */
+
+	if (kthread_should_stop()) {
+		__set_current_state(TASK_RUNNING);
+		return 0;
+	}
+
+	work = NULL;
+	spin_lock_irq(&hog->lock);
+	if (!list_empty(&hog->works)) {
+		work = list_first_entry(&hog->works, struct cpuhog_work, list);
+		list_del_init(&work->list);
+	}
+	spin_unlock_irq(&hog->lock);
+
+	if (work) {
+		struct cpuhog_done *done = work->done;
+
+		__set_current_state(TASK_RUNNING);
+
+		/* cpu hog callbacks are not allowed to sleep */
+		preempt_disable();
+
+		ret = work->fn(work->arg);
+		if (ret)
+			done->ret = ret;
+
+		/* restore preemption and check it's still balanced */
+		preempt_enable();
+		WARN_ON_ONCE(preempt_count());
+
+		cpuhog_signal_done(done, true);
+	} else
+		schedule();
+
+	goto repeat;
+}
+
+/* manage hog for a cpu, mostly lifted from sched migration thread mgmt */
+static int __cpuinit cpuhog_cpu_callback(struct notifier_block *nfb,
+					 unsigned long action, void *hcpu)
+{
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+	unsigned int cpu = (unsigned long)hcpu;
+	struct cpuhog *hog = &per_cpu(cpuhog, cpu);
+	struct cpuhog_work *work;
+	struct task_struct *p;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		BUG_ON(hog->thread || hog->enabled || !list_empty(&hog->works));
+		p = kthread_create(cpuhog_thread, hog, "hog/%d", cpu);
+		if (IS_ERR(p))
+			return NOTIFY_BAD;
+		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
+		get_task_struct(p);
+		hog->thread = p;
+		break;
+
+	case CPU_ONLINE:
+		kthread_bind(hog->thread, cpu);
+		/* strictly unnecessary, as first user will wake it */
+		wake_up_process(hog->thread);
+		/* mark enabled */
+		spin_lock_irq(&hog->lock);
+		hog->enabled = true;
+		spin_unlock_irq(&hog->lock);
+		break;
+
+#ifdef CONFIG_HOTPLUG_CPU
+	case CPU_UP_CANCELED:
+	case CPU_DEAD:
+		/* kill the hog */
+		kthread_stop(hog->thread);
+		/* drain remaining works */
+		spin_lock_irq(&hog->lock);
+		list_for_each_entry(work, &hog->works, list)
+			cpuhog_signal_done(work->done, false);
+		hog->enabled = false;
+		spin_unlock_irq(&hog->lock);
+		/* release the hog */
+		put_task_struct(hog->thread);
+		hog->thread = NULL;
+		break;
+#endif
+	}
+
+	return NOTIFY_OK;
+}
+
+/*
+ * Give it a higher priority so that cpuhog is available to other cpu
+ * notifiers.  It currently shares the same priority as sched
+ * migration_notifier.
+ */
+static struct notifier_block __cpuinitdata cpuhog_cpu_notifier = {
+	.notifier_call	= cpuhog_cpu_callback,
+	.priority	= 10,
+};
+
+static int __init cpuhog_init(void)
+{
+	void *bcpu = (void *)(long)smp_processor_id();
+	unsigned int cpu;
+	int err;
+
+	for_each_possible_cpu(cpu) {
+		struct cpuhog *hog = &per_cpu(cpuhog, cpu);
+
+		spin_lock_init(&hog->lock);
+		INIT_LIST_HEAD(&hog->works);
+	}
+
+	/* start one for the boot cpu */
+	err = cpuhog_cpu_callback(&cpuhog_cpu_notifier, CPU_UP_PREPARE, bcpu);
+	BUG_ON(err == NOTIFY_BAD);
+	cpuhog_cpu_callback(&cpuhog_cpu_notifier, CPU_ONLINE, bcpu);
+	register_cpu_notifier(&cpuhog_cpu_notifier);
+
+	return 0;
+}
+early_initcall(cpuhog_init);
-- 
1.6.4.2


  reply	other threads:[~2010-03-17  8:42 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-17  8:40 [PATCHSET sched/core] cpuhog: implement and use cpuhog, take#2 Tejun Heo
2010-03-17  8:40 ` Tejun Heo [this message]
2010-03-17  8:40 ` [PATCH 2/4] stop_machine: reimplement using cpuhog Tejun Heo
2010-03-17  8:40 ` [PATCH 3/4] scheduler: replace migration_thread with cpuhog Tejun Heo
2010-03-29 13:57   ` Oleg Nesterov
2010-03-29 14:17     ` Peter Zijlstra
2010-03-17  8:40 ` [PATCH 4/4] scheduler: kill paranoia check in synchronize_sched_expedited() Tejun Heo
  -- strict thread matches above, loose matches on Subject: below --
2010-03-08 15:53 [PATCHSET] cpuhog: implement and use cpuhog Tejun Heo
2010-03-08 15:53 ` [PATCH 1/4] cpuhog: implement cpuhog Tejun Heo
2010-03-08 19:01   ` Oleg Nesterov
2010-03-08 23:18     ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1268815251-31407-2-git-send-email-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=arjan@infradead.org \
    --cc=dipankar@in.ibm.com \
    --cc=heiko.carstens@de.ibm.com \
    --cc=josh@freedesktop.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=oleg@redhat.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=peterz@infradead.org \
    --cc=rusty@rustcorp.com.au \
    --cc=sivanich@sgi.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.