public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@zip.com.au>
To: Linus Torvalds <torvalds@transmeta.com>
Cc: Robert Love <rml@tech9.net>, Skip Ford <skip.ford@verizon.net>,
	"Adam J. Richter" <adam@yggdrasil.com>,
	ryan.flanigan@intel.com, linux-kernel@vger.kernel.org
Subject: Re: 2.5.31: modules don't work at all
Date: Mon, 12 Aug 2002 16:34:23 -0700	[thread overview]
Message-ID: <3D5845FF.C6668B15@zip.com.au> (raw)
In-Reply-To: Pine.LNX.4.33.0208121330310.1289-100000@penguin.transmeta.com

Linus Torvalds wrote:
> 
> On Mon, 12 Aug 2002, Andrew Morton wrote:
> >
> > Gets tricky with nested lock_kernels.
> 
> No, lock-kernel already only increments once, at the first lock_kernel. We
> have a totally separate counter for the BKL depth, see <asm/smplock.h>
> 

There are eighteen smplock.h's, all different.  At least one (SuperH)
hasn't been converted to preempt.  I'd rather like to decouple the kmap
optimisation work from that mini-quagmire.  It's very easy to do, with

	current->flags |= PF_ATOMIC;
	__copy_to_user(...);
	current->flags &= ~PF_ATOMIC;

and that all works fine.

However, soldiering on leads us to some difficulties.  You're proposing,
effectively, that preempt_count gets shifted left one bit and that bit
zero becomes "has done lock_kernel()".

So bits 0-31 of preempt_count mean "may not be preempted" and "should
not preempt self".  And bits 1-31 of preempt count mean "must not
explicitly call schedule".

Problem is, the semantics of this vary too much between preemptible
and non-preemptible kernels.  Because non-preemptible kernels do
not increment preempt_count in spin_lock().

Maybe my penny hasn't dropped yet, but I tend to feel that the
semantics of my "may_schedule()" below are too fragile for it to
be part of the API.

(Untested, uncompiled code)


 arch/i386/mm/fault.c       |    2 -
 include/asm-i386/smplock.h |   20 +++++++++++++-----
 include/linux/preempt.h    |   49 ++++++++++++++++++++++++++++++++-------------
 3 files changed, 51 insertions, 20 deletions

--- 2.5.31/arch/i386/mm/fault.c~fix-faults	Mon Aug 12 16:14:21 2002
+++ 2.5.31-akpm/arch/i386/mm/fault.c	Mon Aug 12 16:14:21 2002
@@ -192,7 +192,7 @@ asmlinkage void do_page_fault(struct pt_
 	 * If we're in an interrupt, have no user context or are running in an
 	 * atomic region then we must not take the fault..
 	 */
-	if (preempt_count() || !mm)
+	if (!may_schedule() || !mm)
 		goto no_context;
 
 #ifdef CONFIG_X86_REMOTE_DEBUG
--- 2.5.31/include/linux/preempt.h~fix-faults	Mon Aug 12 16:14:21 2002
+++ 2.5.31-akpm/include/linux/preempt.h	Mon Aug 12 16:14:21 2002
@@ -5,29 +5,60 @@
 
 #define preempt_count() (current_thread_info()->preempt_count)
 
+/*
+ * Bit zero of preempt_count means "holds lock_kernel".
+ * So a non-zero value in preempt_count() means "may not be preempted" and a
+ * non-zero value in bits 1-31 means "may not explicitly schedule".
+ */
+
 #define inc_preempt_count() \
 do { \
-	preempt_count()++; \
+	preempt_count() += 2; \
 } while (0)
 
 #define dec_preempt_count() \
 do { \
-	preempt_count()--; \
+	preempt_count() -= 2; \
+} while (0)
+
+#define lock_kernel_enter() \
+do { \
+	preempt_count() |= 1; \
+} while (0)
+
+#define lock_kernel_exit() \
+do { \
+	preempt_count() &= ~1; \
 } while (0)
 
+/*
+ * The semantics of this depend upon CONFIG_PREEMPT.
+ *
+ * With CONFIG_PREEMPT=y, may_schedule() returns false in irq context and
+ * inside spinlocks, and returns true inside lock_kernel().
+ *
+ * With CONFIG_PREEMPT=n, may_schedule() returns false in irq context, returns
+ * true inside spinlocks and returns true inside lock_kernel().
+ *
+ * But may_schedule() will also return false if the task has performed an
+ * explicit inc_preempt_count(), regardless of CONFIG_PREEMPT.  Which is really
+ * the only situation in which may_schedule() is useful.
+ */
+#define may_schedule()	(!(preempt_count() >> 1))
+
 #ifdef CONFIG_PREEMPT
 
 extern void preempt_schedule(void);
 
 #define preempt_disable() \
 do { \
-	inc_preempt_count(); \
+	preempt_count() += 2; \
 	barrier(); \
 } while (0)
 
 #define preempt_enable_no_resched() \
 do { \
-	dec_preempt_count(); \
+	preempt_count() -= 2; \
 	barrier(); \
 } while (0)
 
@@ -44,22 +75,12 @@ do { \
 		preempt_schedule(); \
 } while (0)
 
-#define inc_preempt_count_non_preempt()	do { } while (0)
-#define dec_preempt_count_non_preempt()	do { } while (0)
-
 #else
 
 #define preempt_disable()		do { } while (0)
 #define preempt_enable_no_resched()	do {} while(0)
 #define preempt_enable()		do { } while (0)
 #define preempt_check_resched()		do { } while (0)
-
-/*
- * Sometimes we want to increment the preempt count, but we know that it's
- * already incremented if the kernel is compiled for preemptibility.
- */
-#define inc_preempt_count_non_preempt()	inc_preempt_count()
-#define dec_preempt_count_non_preempt()	dec_preempt_count()
 
 #endif
 
--- 2.5.31/include/asm-i386/smplock.h~fix-faults	Mon Aug 12 16:14:21 2002
+++ 2.5.31-akpm/include/asm-i386/smplock.h	Mon Aug 12 16:15:20 2002
@@ -25,8 +25,10 @@ extern spinlock_t kernel_flag;
  */
 #define release_kernel_lock(task)		\
 do {						\
-	if (unlikely(task->lock_depth >= 0))	\
+	if (unlikely(task->lock_depth >= 0)) {	\
 		spin_unlock(&kernel_flag);	\
+		lock_kernel_exit();		\
+	}					\
 } while (0)
 
 /*
@@ -34,8 +36,10 @@ do {						\
  */
 #define reacquire_kernel_lock(task)		\
 do {						\
-	if (unlikely(task->lock_depth >= 0))	\
+	if (unlikely(task->lock_depth >= 0)) {	\
+		lock_kernel_enter();		\
 		spin_lock(&kernel_flag);	\
+	}					\
 } while (0)
 
 
@@ -49,13 +53,17 @@ do {						\
 static __inline__ void lock_kernel(void)
 {
 #ifdef CONFIG_PREEMPT
-	if (current->lock_depth == -1)
+	if (current->lock_depth == -1) {
+		lock_kernel_enter();
 		spin_lock(&kernel_flag);
+	}
 	++current->lock_depth;
 #else
 #if 1
-	if (!++current->lock_depth)
+	if (!++current->lock_depth) {
+		lock_kernel_enter();
 		spin_lock(&kernel_flag);
+	}
 #else
 	__asm__ __volatile__(
 		"incl %1\n\t"
@@ -73,8 +81,10 @@ static __inline__ void unlock_kernel(voi
 	if (current->lock_depth < 0)
 		BUG();
 #if 1
-	if (--current->lock_depth < 0)
+	if (--current->lock_depth < 0) {
 		spin_unlock(&kernel_flag);
+		lock_kernel_exit();
+	}
 #else
 	__asm__ __volatile__(
 		"decl %1\n\t"

.

  reply	other threads:[~2002-08-12 23:33 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-08-12  2:33 2.5.31: modules don't work at all Adam J. Richter
2002-08-12  3:07 ` Skip Ford
2002-08-12  5:36   ` Andrew Morton
2002-08-12 17:22     ` Linus Torvalds
2002-08-12 17:52       ` Andrew Morton
2002-08-12 20:32         ` Linus Torvalds
2002-08-12 23:34           ` Andrew Morton [this message]
2002-08-12 23:45             ` Linus Torvalds
2002-08-13  0:32             ` Skip Ford
2002-08-13  1:31             ` Skip Ford
2002-08-13  0:09     ` Andrew Rodland
2002-08-13  0:13       ` Andrew Morton
2002-08-20 22:59     ` Ed Tomlinson
2002-08-12  3:09 ` Flanigan, Ryan
  -- strict thread matches above, loose matches on Subject: below --
2002-08-11 12:41 Michel Eyckmans (MCE)
2002-08-12  0:54 ` Flanigan, Ryan
2002-08-12  1:03 ` Andrew Rodland
2002-08-12  1:11   ` Flanigan, Ryan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3D5845FF.C6668B15@zip.com.au \
    --to=akpm@zip.com.au \
    --cc=adam@yggdrasil.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rml@tech9.net \
    --cc=ryan.flanigan@intel.com \
    --cc=skip.ford@verizon.net \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox