linux-pm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Arjan van de Ven <arjan@linux.intel.com>,
	lenb@kernel.org, rjw@rjwysocki.net,
	Eliezer Tamir <eliezer.tamir@linux.intel.com>,
	Chris Leech <christopher.leech@intel.com>,
	David Miller <davem@davemloft.net>,
	rui.zhang@intel.com, jacob.jun.pan@linux.intel.com,
	Mike Galbraith <bitbucket@online.de>,
	Ingo Molnar <mingo@kernel.org>,
	hpa@zytor.com, Thomas Gleixner <tglx@linutronix.de>,
	Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org, linux-pm@vger.kernel.org,
	"Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Subject: [PATCH 3/7] idle, thermal, acpi: Remove home grown idle implementations
Date: Wed, 20 Nov 2013 17:04:53 +0100	[thread overview]
Message-ID: <20131120162736.508462614@infradead.org> (raw)
In-Reply-To: 20131120160450.072555619@infradead.org

[-- Attachment #1: peterz-fixup-intel_clamp-mess.patch --]
[-- Type: text/plain, Size: 9665 bytes --]

People are starting to grow their own idle implementations in various
disgusting ways. Collapse the lot and use the generic idle code to
provide a proper idle cycle implementation.

This does not fully preseve existing behaviour in that the generic
idle cycle function calls into the normal cpuidle governed idle
routines and should thus respect things like QoS parameters and the
like.

If people want to over-ride the idle state they should talk to the
cpuidle folks about extending the interface and attempt to preserve
QoS guarantees, instead of jumping straight to the deepest possible C
state.

Compile tested only -- I've no idea how to actually use these vile
things.

Cc: hpa@zytor.com
Cc: arjan@linux.intel.com
Cc: rui.zhang@intel.com
Cc: jacob.jun.pan@linux.intel.com
Cc: lenb@kernel.org
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
---
 drivers/acpi/acpi_pad.c            |   41 ------------
 drivers/thermal/intel_powerclamp.c |   38 -----------
 include/linux/cpu.h                |    2 
 kernel/cpu/idle.c                  |  123 ++++++++++++++++++++++---------------
 kernel/time/tick-sched.c           |    2 
 5 files changed, 82 insertions(+), 124 deletions(-)

--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -41,9 +41,7 @@ static DEFINE_MUTEX(round_robin_lock);
 static unsigned long power_saving_mwait_eax;
 
 static unsigned char tsc_detected_unstable;
-static unsigned char tsc_marked_unstable;
 static unsigned char lapic_detected_unstable;
-static unsigned char lapic_marked_unstable;
 
 static void power_saving_mwait_init(void)
 {
@@ -153,10 +151,9 @@ static int power_saving_thread(void *dat
 	unsigned int tsk_index = (unsigned long)data;
 	u64 last_jiffies = 0;
 
-	sched_setscheduler(current, SCHED_RR, &param);
+	sched_setscheduler(current, SCHED_FIFO, &param);
 
 	while (!kthread_should_stop()) {
-		int cpu;
 		u64 expire_time;
 
 		try_to_freeze();
@@ -171,41 +168,7 @@ static int power_saving_thread(void *dat
 
 		expire_time = jiffies + HZ * (100 - idle_pct) / 100;
 
-		while (!need_resched()) {
-			if (tsc_detected_unstable && !tsc_marked_unstable) {
-				/* TSC could halt in idle, so notify users */
-				mark_tsc_unstable("TSC halts in idle");
-				tsc_marked_unstable = 1;
-			}
-			if (lapic_detected_unstable && !lapic_marked_unstable) {
-				int i;
-				/* LAPIC could halt in idle, so notify users */
-				for_each_online_cpu(i)
-					clockevents_notify(
-						CLOCK_EVT_NOTIFY_BROADCAST_ON,
-						&i);
-				lapic_marked_unstable = 1;
-			}
-			local_irq_disable();
-			cpu = smp_processor_id();
-			if (lapic_marked_unstable)
-				clockevents_notify(
-					CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
-			stop_critical_timings();
-
-			mwait_idle_with_hints(power_saving_mwait_eax, 1);
-
-			start_critical_timings();
-			if (lapic_marked_unstable)
-				clockevents_notify(
-					CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
-			local_irq_enable();
-
-			if (jiffies > expire_time) {
-				do_sleep = 1;
-				break;
-			}
-		}
+		play_idle(expire_time);
 
 		/*
 		 * current sched_rt has threshold for rt task running time.
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -247,11 +247,6 @@ static u64 pkg_state_counter(void)
 	return count;
 }
 
-static void noop_timer(unsigned long foo)
-{
-	/* empty... just the fact that we get the interrupt wakes us up */
-}
-
 static unsigned int get_compensation(int ratio)
 {
 	unsigned int comp = 0;
@@ -356,7 +351,6 @@ static bool powerclamp_adjust_controls(u
 static int clamp_thread(void *arg)
 {
 	int cpunr = (unsigned long)arg;
-	DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0);
 	static const struct sched_param param = {
 		.sched_priority = MAX_USER_RT_PRIO/2,
 	};
@@ -365,11 +359,9 @@ static int clamp_thread(void *arg)
 
 	set_bit(cpunr, cpu_clamping_mask);
 	set_freezable();
-	init_timer_on_stack(&wakeup_timer);
 	sched_setscheduler(current, SCHED_FIFO, &param);
 
-	while (true == clamping && !kthread_should_stop() &&
-		cpu_online(cpunr)) {
+	while (clamping && !kthread_should_stop() && cpu_online(cpunr)) {
 		int sleeptime;
 		unsigned long target_jiffies;
 		unsigned int guard;
@@ -417,35 +409,11 @@ static int clamp_thread(void *arg)
 		if (should_skip)
 			continue;
 
-		target_jiffies = jiffies + duration_jiffies;
-		mod_timer(&wakeup_timer, target_jiffies);
 		if (unlikely(local_softirq_pending()))
 			continue;
-		/*
-		 * stop tick sched during idle time, interrupts are still
-		 * allowed. thus jiffies are updated properly.
-		 */
-		preempt_disable();
-		tick_nohz_idle_enter();
-		/* mwait until target jiffies is reached */
-		while (time_before(jiffies, target_jiffies)) {
-			unsigned long ecx = 1;
-			unsigned long eax = target_mwait;
-
-			/*
-			 * REVISIT: may call enter_idle() to notify drivers who
-			 * can save power during cpu idle. same for exit_idle()
-			 */
-			local_touch_nmi();
-			stop_critical_timings();
-			mwait_idle_with_hints(eax, ecx);
-			start_critical_timings();
-			atomic_inc(&idle_wakeup_counter);
-		}
-		tick_nohz_idle_exit();
-		preempt_enable_no_resched();
+
+		play_idle(duration_jiffies);
 	}
-	del_timer_sync(&wakeup_timer);
 	clear_bit(cpunr, cpu_clamping_mask);
 
 	return 0;
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -215,6 +215,8 @@ enum cpuhp_state {
 	CPUHP_ONLINE,
 };
 
+void play_idle(unsigned long jiffies);
+
 void cpu_startup_entry(enum cpuhp_state state);
 void cpu_idle(void);
 
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -63,62 +63,88 @@ void __weak arch_cpu_idle(void)
 }
 
 /*
- * Generic idle loop implementation
+ * Generic idle cycle.
  */
-static void cpu_idle_loop(void)
+static void do_idle(void)
 {
-	while (1) {
-		tick_nohz_idle_enter();
+	tick_nohz_idle_enter();
 
-		while (!need_resched()) {
-			check_pgt_cache();
-			rmb();
-
-			if (cpu_is_offline(smp_processor_id()))
-				arch_cpu_idle_dead();
-
-			local_irq_disable();
-			arch_cpu_idle_enter();
-
-			/*
-			 * In poll mode we reenable interrupts and spin.
-			 *
-			 * Also if we detected in the wakeup from idle
-			 * path that the tick broadcast device expired
-			 * for us, we don't want to go deep idle as we
-			 * know that the IPI is going to arrive right
-			 * away
-			 */
-			if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
-				cpu_idle_poll();
-			} else {
-				if (!current_clr_polling_and_test()) {
-					stop_critical_timings();
-					rcu_idle_enter();
-					arch_cpu_idle();
-					WARN_ON_ONCE(irqs_disabled());
-					rcu_idle_exit();
-					start_critical_timings();
-				} else {
-					local_irq_enable();
-				}
-				__current_set_polling();
-			}
-			arch_cpu_idle_exit();
-		}
+	while (!need_resched()) {
+		check_pgt_cache();
+		rmb();
+
+		if (cpu_is_offline(smp_processor_id()))
+			arch_cpu_idle_dead();
+
+		local_irq_disable();
+		arch_cpu_idle_enter();
 
 		/*
-		 * We need to test and propagate the TIF_NEED_RESCHED bit here
-		 * because we might not have send the reschedule IPI to idle
-		 * tasks.
+		 * In poll mode we reenable interrupts and spin.
+		 *
+		 * Also if we detected in the wakeup from idle path that the
+		 * tick broadcast device expired for us, we don't want to go
+		 * deep idle as we know that the IPI is going to arrive right
+		 * away
 		 */
-		preempt_fold_need_resched();
-		tick_nohz_idle_exit();
-		schedule_preempt_disabled();
+		if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
+			cpu_idle_poll();
+		} else {
+			if (!current_clr_polling_and_test()) {
+				stop_critical_timings();
+				rcu_idle_enter();
+				arch_cpu_idle();
+				WARN_ON_ONCE(irqs_disabled());
+				rcu_idle_exit();
+				start_critical_timings();
+			} else {
+				local_irq_enable();
+			}
+			__current_set_polling();
+		}
+		arch_cpu_idle_exit();
 	}
+
+	/*
+	 * We need to test and propagate the TIF_NEED_RESCHED bit here
+	 * because we might not have send the reschedule IPI to idle
+	 * tasks.
+	 */
+	preempt_fold_need_resched();
+	tick_nohz_idle_exit();
+	schedule_preempt_disabled();
+}
+
+static void play_idle_timer(unsigned long foo)
+{
+	set_tsk_need_resched(current);
+}
+
+void play_idle(unsigned long duration)
+{
+	DEFINE_TIMER(wakeup_timer, play_idle_timer, 0, 0);
+
+	/*
+	 * Only FIFO tasks can disable the tick since they don't need the forced
+	 * preemption.
+	 */
+	WARN_ON_ONCE(current->policy != SCHED_FIFO);
+	WARN_ON_ONCE(current->nr_cpus_allowed != 1);
+	WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY));
+	WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
+
+	init_timer_on_stack(&wakeup_timer);
+	mod_timer_pinned(&wakeup_timer, jiffies + duration);
+
+	preempt_disable();
+	do_idle();
+	del_timer_sync(&wakeup_timer);
+	preempt_fold_need_resched();
+	preempt_enable();
 }
+EXPORT_SYMBOL_GPL(play_idle);
 
-void cpu_startup_entry(enum cpuhp_state state)
+__noreturn void cpu_startup_entry(enum cpuhp_state state)
 {
 	/*
 	 * This #ifdef needs to die, but it's too late in the cycle to
@@ -137,5 +163,6 @@ void cpu_startup_entry(enum cpuhp_state
 #endif
 	__current_set_polling();
 	arch_cpu_idle_prepare();
-	cpu_idle_loop();
+	while (1)
+		do_idle();
 }
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -804,7 +804,6 @@ void tick_nohz_idle_enter(void)
 
 	local_irq_enable();
 }
-EXPORT_SYMBOL_GPL(tick_nohz_idle_enter);
 
 /**
  * tick_nohz_irq_exit - update next tick event from interrupt exit
@@ -932,7 +931,6 @@ void tick_nohz_idle_exit(void)
 
 	local_irq_enable();
 }
-EXPORT_SYMBOL_GPL(tick_nohz_idle_exit);
 
 static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
 {

  parent reply	other threads:[~2013-11-20 16:04 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-20 16:04 [PATCH 0/7] Cure some vaux idle wrackage Peter Zijlstra
2013-11-20 16:04 ` [PATCH 1/7] x86, acpi, idle: Restructure the mwait idle routines Peter Zijlstra
2013-11-20 16:04 ` [PATCH 2/7] sched, preempt: Fixup missed PREEMPT_NEED_RESCHED folding Peter Zijlstra
2013-11-21  8:25   ` Peter Zijlstra
2013-11-20 16:04 ` Peter Zijlstra [this message]
2013-11-20 16:40   ` [PATCH 3/7] idle, thermal, acpi: Remove home grown idle implementations Arjan van de Ven
2013-11-20 16:59     ` Peter Zijlstra
2013-11-20 17:23     ` Thomas Gleixner
2013-11-20 17:23       ` Arjan van de Ven
2013-11-20 17:55         ` Thomas Gleixner
2013-11-20 18:21           ` Arjan van de Ven
2013-11-20 19:38             ` Thomas Gleixner
2013-11-20 22:08               ` Jacob Pan
2013-11-21  0:54   ` Jacob Pan
2013-11-21  8:21     ` Peter Zijlstra
2013-11-21 16:07       ` Paul E. McKenney
2013-11-21 16:21         ` Arjan van de Ven
2013-11-21 19:19           ` Paul E. McKenney
2013-11-21 19:45             ` Arjan van de Ven
2013-11-21 20:07               ` Paul E. McKenney
2013-11-22  0:10                 ` Jacob Pan
2013-11-22  4:20                   ` Paul E. McKenney
2013-11-22 11:33                     ` Peter Zijlstra
2013-11-22 17:17                       ` Paul E. McKenney
2013-11-21 16:29         ` Peter Zijlstra
2013-11-21 17:27           ` Paul E. McKenney
2013-11-20 16:04 ` [PATCH 4/7] preempt, locking: Rework local_bh_{dis,en}able() Peter Zijlstra
2013-11-20 16:04 ` [PATCH 5/7] locking: Optimize lock_bh functions Peter Zijlstra
2013-11-20 16:04 ` [PATCH 6/7] sched: Clean up preempt_enable_no_resched() abuse Peter Zijlstra
2013-11-20 18:02   ` Eliezer Tamir
2013-11-20 18:15     ` Peter Zijlstra
2013-11-20 20:14       ` Eliezer Tamir
2013-11-21 10:10     ` Peter Zijlstra
2013-11-21 13:26       ` Eliezer Tamir
2013-11-21 13:39         ` Peter Zijlstra
2013-11-22  6:56           ` Eliezer Tamir
2013-11-22 11:30             ` Peter Zijlstra
2013-11-26  7:15               ` Eliezer Tamir
2013-11-26 10:51                 ` Thomas Gleixner
2013-11-20 16:04 ` [PATCH 7/7] preempt: Take away preempt_enable_no_resched() from modules Peter Zijlstra
2013-11-20 18:54   ` Jacob Pan
2013-11-20 19:00     ` Peter Zijlstra
2013-11-20 19:18     ` Peter Zijlstra
2013-11-20 19:29       ` Jacob Pan
2013-11-20 16:34 ` [PATCH 0/7] Cure some vaux idle wrackage Peter Zijlstra
2013-11-20 17:19   ` Jacob Pan
2013-11-20 17:24     ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20131120162736.508462614@infradead.org \
    --to=peterz@infradead.org \
    --cc=arjan@linux.intel.com \
    --cc=bitbucket@online.de \
    --cc=christopher.leech@intel.com \
    --cc=davem@davemloft.net \
    --cc=eliezer.tamir@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=jacob.jun.pan@linux.intel.com \
    --cc=lenb@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=rafael.j.wysocki@intel.com \
    --cc=rjw@rjwysocki.net \
    --cc=rui.zhang@intel.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).