Re: [patch] remove the BKL (Big Kernel Lock), this time for real

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Ingo Molnar <mingo@elte.hu>
To: linux-kernel@vger.kernel.org
Cc: Andrew Morton <akpm@osdl.org>, Linus Torvalds <torvalds@osdl.org>,
	William Lee Irwin III <wli@holomorphy.com>,
	Arjan van de Ven <arjanv@redhat.com>,
	Lee Revell <rlrevell@joe-job.com>
Subject: Re: [patch] remove the BKL (Big Kernel Lock), this time for real
Date: Fri, 17 Sep 2004 14:53:34 +0200	[thread overview]
Message-ID: <20040917125334.GA4954@elte.hu> (raw)
In-Reply-To: <20040917103945.GA19861@elte.hu>


* Ingo Molnar <mingo@elte.hu> wrote:

> the attached patch is a simplified variant of the remove-bkl patch i
> sent two days ago: it doesnt do the ->cpus_allowed trick.
> 
> The question is, is there any BKL-using kernel code that relies on the
> task not migrating to another CPU within the BLK critical section?

the attached debug patch is ontop of the above patch and prints warnings
if code uses smp_processor_id() in a preemptible section of code. The
patch gets rid of a number of common false positives but more false
positives are more than likely.

The patch has already uncovered a bug: softirq invocation had a softirq
invocation race possibly leading to (<1msec) softirq processing delays
in the PREEMPT case. But more importantly it has found no BKL user so
far that relied on smp_processor_id(), which makes me hopeful that the
problem is not widespread at all and we could opt for the simpler,
semaphore-based BKS design.

(patch tested on x86 and x64, should work on all architectures.)

	Ingo


--- linux/arch/i386/lib/delay.c	
+++ linux/arch/i386/lib/delay.c	
@@ -34,7 +34,7 @@ inline void __const_udelay(unsigned long
 	xloops *= 4;
 	__asm__("mull %0"
 		:"=d" (xloops), "=&a" (d0)
-		:"1" (xloops),"0" (current_cpu_data.loops_per_jiffy * (HZ/4)));
+		:"1" (xloops),"0" (boot_cpu_data.loops_per_jiffy * (HZ/4)));
         __delay(++xloops);
 }
 
--- linux/arch/x86_64/lib/delay.c	
+++ linux/arch/x86_64/lib/delay.c	
@@ -34,7 +34,7 @@ void __delay(unsigned long loops)
 
 inline void __const_udelay(unsigned long xloops)
 {
-        __delay(((xloops * current_cpu_data.loops_per_jiffy) >> 32) * HZ);
+        __delay(((xloops * boot_cpu_data.loops_per_jiffy) >> 32) * HZ);
 }
 
 void __udelay(unsigned long usecs)
--- linux/include/asm-i386/smp.h	
+++ linux/include/asm-i386/smp.h	
@@ -50,7 +50,7 @@ extern u8 x86_cpu_to_apicid[];
  * from the initial startup. We map APIC_BASE very early in page_setup(),
  * so this is correct in the x86 case.
  */
-#define smp_processor_id() (current_thread_info()->cpu)
+#define __smp_processor_id() (current_thread_info()->cpu)
 
 extern cpumask_t cpu_callout_map;
 #define cpu_possible_map cpu_callout_map
--- linux/include/asm-x86_64/smp.h	
+++ linux/include/asm-x86_64/smp.h	
@@ -66,7 +66,7 @@ static inline int num_booting_cpus(void)
 	return cpus_weight(cpu_callout_map);
 }
 
-#define smp_processor_id() read_pda(cpunumber)
+#define __smp_processor_id() read_pda(cpunumber)
 
 extern __inline int hard_smp_processor_id(void)
 {
--- linux/include/linux/smp.h	
+++ linux/include/linux/smp.h	
@@ -95,8 +95,11 @@ void smp_prepare_boot_cpu(void);
 /*
  *	These macros fold the SMP functionality into a single CPU system
  */
- 
-#define smp_processor_id()			0
+
+#if !defined(__smp_processor_id) || !defined(CONFIG_PREEMPT)
+# define smp_processor_id()			0
+
+#endif
 #define hard_smp_processor_id()			0
 #define smp_threads_ready			1
 #define smp_call_function(func,info,retry,wait)	({ 0; })
@@ -107,6 +110,17 @@ static inline void smp_send_reschedule(i
 
 #endif /* !SMP */
 
+#ifdef __smp_processor_id 
+# ifdef CONFIG_PREEMPT
+   extern unsigned int smp_processor_id(void);
+# else
+#  define smp_processor_id() __smp_processor_id()
+# endif
+# define _smp_processor_id() __smp_processor_id()
+#else
+# define _smp_processor_id() smp_processor_id()
+#endif
+
 #define get_cpu()		({ preempt_disable(); smp_processor_id(); })
 #define put_cpu()		preempt_enable()
 #define put_cpu_no_resched()	preempt_enable_no_resched()
--- linux/include/net/route.h	
+++ linux/include/net/route.h	
@@ -105,7 +105,7 @@ struct rt_cache_stat 
 
 extern struct rt_cache_stat *rt_cache_stat;
 #define RT_CACHE_STAT_INC(field)					  \
-		(per_cpu_ptr(rt_cache_stat, smp_processor_id())->field++)
+		(per_cpu_ptr(rt_cache_stat, _smp_processor_id())->field++)
 
 extern struct ip_rt_acct *ip_rt_acct;
 
--- linux/include/net/snmp.h	
+++ linux/include/net/snmp.h	
@@ -128,18 +128,18 @@ struct linux_mib {
 #define SNMP_STAT_USRPTR(name)	(name[1])
 
 #define SNMP_INC_STATS_BH(mib, field) 	\
-	(per_cpu_ptr(mib[0], smp_processor_id())->mibs[field]++)
+	(per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field]++)
 #define SNMP_INC_STATS_OFFSET_BH(mib, field, offset)	\
-	(per_cpu_ptr(mib[0], smp_processor_id())->mibs[field + (offset)]++)
+	(per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field + (offset)]++)
 #define SNMP_INC_STATS_USER(mib, field) \
-	(per_cpu_ptr(mib[1], smp_processor_id())->mibs[field]++)
+	(per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field]++)
 #define SNMP_INC_STATS(mib, field) 	\
-	(per_cpu_ptr(mib[!in_softirq()], smp_processor_id())->mibs[field]++)
+	(per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]++)
 #define SNMP_DEC_STATS(mib, field) 	\
-	(per_cpu_ptr(mib[!in_softirq()], smp_processor_id())->mibs[field]--)
+	(per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]--)
 #define SNMP_ADD_STATS_BH(mib, field, addend) 	\
-	(per_cpu_ptr(mib[0], smp_processor_id())->mibs[field] += addend)
+	(per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field] += addend)
 #define SNMP_ADD_STATS_USER(mib, field, addend) 	\
-	(per_cpu_ptr(mib[1], smp_processor_id())->mibs[field] += addend)
+	(per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field] += addend)
 
 #endif
--- linux/kernel/printk.c	
+++ linux/kernel/printk.c	
@@ -641,8 +641,9 @@ void release_console_sem(void)
 		_con_start = con_start;
 		_log_end = log_end;
 		con_start = log_end;		/* Flush */
-		spin_unlock_irqrestore(&logbuf_lock, flags);
+		spin_unlock(&logbuf_lock);
 		call_console_drivers(_con_start, _log_end);
+		local_irq_restore(flags);
 	}
 	console_locked = 0;
 	console_may_schedule = 0;
--- linux/kernel/sched.c	
+++ linux/kernel/sched.c	
@@ -2288,6 +2288,56 @@ static inline int dependent_sleeper(int 
 }
 #endif
 
+#if defined(CONFIG_PREEMPT) && defined(__smp_processor_id)
+/*
+ * Debugging check.
+ */
+unsigned int smp_processor_id(void)
+{
+	unsigned long preempt_count = preempt_count();
+	int this_cpu = __smp_processor_id();
+	cpumask_t this_mask;
+
+	if (likely(preempt_count))
+		goto out;
+
+	if (irqs_disabled())
+		goto out;
+
+	/*
+	 * Kernel threads bound to a single CPU can safely use
+	 * smp_processor_id():
+	 */
+	this_mask = cpumask_of_cpu(this_cpu);
+
+	if (cpus_equal(current->cpus_allowed, this_mask))
+		goto out;
+
+	/*
+	 * It is valid to assume CPU-locality during early bootup:
+	 */
+	if (system_state != SYSTEM_RUNNING)
+		goto out;
+
+	/*
+	 * Avoid recursion:
+	 */
+	preempt_disable();
+
+	if (!printk_ratelimit())
+		goto out_enable;
+		
+	printk(KERN_ERR "using smp_processor_id() in preemptible code: %s/%d\n",
+		current->comm, current->pid);
+	dump_stack();
+
+out_enable:
+	preempt_enable_no_resched();
+out:
+	return this_cpu;
+}
+#endif
+
 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
 
 /*
@@ -3406,7 +3456,7 @@ EXPORT_SYMBOL(yield);
  */
 void __sched io_schedule(void)
 {
-	struct runqueue *rq = this_rq();
+	struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id());
 
 	atomic_inc(&rq->nr_iowait);
 	schedule();
@@ -3417,7 +3467,7 @@ EXPORT_SYMBOL(io_schedule);
 
 long __sched io_schedule_timeout(long timeout)
 {
-	struct runqueue *rq = this_rq();
+	struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id());
 	long ret;
 
 	atomic_inc(&rq->nr_iowait);
--- linux/kernel/softirq.c	
+++ linux/kernel/softirq.c	
@@ -137,12 +137,19 @@ EXPORT_SYMBOL(do_softirq);
 
 void local_bh_enable(void)
 {
-	__local_bh_enable();
 	WARN_ON(irqs_disabled());
-	if (unlikely(!in_interrupt() &&
-		     local_softirq_pending()))
+	if (unlikely(!in_interrupt() && local_softirq_pending())) {
+		/*
+		 * Keep preemption disabled until we are done with
+		 * softirq processing:
+	 	 */
+		preempt_count() -= SOFTIRQ_OFFSET - 1;
 		invoke_softirq();
-	preempt_check_resched();
+		preempt_enable();
+	} else {
+		__local_bh_enable();
+		preempt_check_resched();
+	}
 }
 EXPORT_SYMBOL(local_bh_enable);

next prev parent reply	other threads:[~2004-09-17 14:22 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-09-15 15:18 [patch] remove the BKL (Big Kernel Lock), this time for real Ingo Molnar
2004-09-15 15:40 ` Linus Torvalds
2004-09-15 15:55   ` Ingo Molnar
2004-09-15 17:04     ` Ricky Beam
2004-09-15 19:39       ` Ingo Molnar
2004-09-15 18:28     ` Chris Wedgwood
2004-09-15 21:25   ` William Lee Irwin III
2004-09-17 10:39 ` Ingo Molnar
2004-09-17 12:53   ` Ingo Molnar [this message]
2004-09-17 20:56     ` Andrea Arcangeli
2004-09-18  8:02       ` Ingo Molnar
2004-09-18 23:36         ` Andrea Arcangeli
2004-09-17 13:26   ` Andrea Arcangeli
2004-09-17 13:47     ` William Lee Irwin III
2004-09-17 13:56       ` Andrea Arcangeli
2004-09-17 14:18         ` William Lee Irwin III
2004-09-17 15:16   ` Linus Torvalds
     [not found] <2EJTp-7bx-1@gated-at.bofh.it>
2004-09-15 15:46 ` Andi Kleen
2004-09-15 15:58   ` Ingo Molnar
2004-09-15 20:11   ` Ingo Molnar
2004-09-16  1:17     ` Nick Piggin
2004-09-16 14:28   ` Bill Davidsen
2004-09-16 22:29     ` Bill Huey
2004-09-16 22:40       ` David S. Miller
2004-09-16 22:51         ` Bill Huey
2004-09-16 22:54           ` David S. Miller
2004-09-16 23:01             ` Bill Huey
2004-09-16 23:33             ` William Lee Irwin III
2004-09-17  6:43           ` Ingo Molnar
2004-09-17  7:21             ` Tony Lee
  -- strict thread matches above, loose matches on Subject: below --
2004-09-18  5:44 Manfred Spraul
2004-09-18 13:09 ` Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040917125334.GA4954@elte.hu \
    --to=mingo@elte.hu \
    --cc=akpm@osdl.org \
    --cc=arjanv@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rlrevell@joe-job.com \
    --cc=torvalds@osdl.org \
    --cc=wli@holomorphy.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox