public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@elte.hu>
To: Andrew Morton <akpm@osdl.org>
Cc: torvalds@osdl.org, linux-kernel@vger.kernel.org,
	davej@redhat.com, tglx@linutronix.de, drepper@redhat.com
Subject: Re: [patch] pi-futex: futex_wake() lockup fix
Date: Sat, 1 Jul 2006 09:25:46 +0200	[thread overview]
Message-ID: <20060701072545.GA24283@elte.hu> (raw)
In-Reply-To: <20060701002305.7b6b78a4.akpm@osdl.org>


* Andrew Morton <akpm@osdl.org> wrote:

> >  		if (match_futex (&this->key, &key)) {
> > -			if (this->pi_state)
> > -				return -EINVAL;
> > +			if (this->pi_state) {
> > +				ret = -EINVAL;
> > +				break;
> > +			}
> >  			wake_futex(this);
> >  			if (++ret >= nr_wake)
> >  				break;
> 
> Well that was rather a howler.

yeah :-| This bug was introduced relatively late, as part of another 
bugfix.

> How did the lock validator help in identifying this?

i've got an extension for it that moans if we exit a syscall with held 
locks. (i dont want to risk the current queue with it, it's below - just 
for review)

(but it's not a strict lockdep thing - it should probably be done as 
part of DEBUG_LOCK_ALLOC, not LOCKDEP)

anyway, this is for later - it just shows that we can certainly do a 
number of nice things with lockdep's lock-stack. [as DEBUG_LOCK_ALLOC 
itself already demonstrates]

	Ingo

NOT-YET-Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/i386/kernel/entry.S     |   15 ++++++++++++
 arch/x86_64/ia32/ia32entry.S |    6 +++++
 arch/x86_64/kernel/entry.S   |    4 +++
 include/asm-x86_64/calling.h |   50 +++++++++++++++++++++++++++++++++++++++++++
 kernel/lockdep.c             |   20 +++++++++++++++++
 lib/Kconfig.debug            |    4 +++
 6 files changed, 99 insertions(+)

Index: linux/arch/i386/kernel/entry.S
===================================================================
--- linux.orig/arch/i386/kernel/entry.S
+++ linux/arch/i386/kernel/entry.S
@@ -308,6 +308,11 @@ sysenter_past_esp:
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
+#ifdef CONFIG_SYSCALL_TRACE
+	pushl %edx; pushl %ecx; pushl %ebx; pushl %eax
+	call sys_call
+	popl %eax; popl %ebx; popl %ecx; popl %edx
+#endif
 	GET_THREAD_INFO(%ebp)
 
 	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
@@ -322,6 +327,11 @@ sysenter_past_esp:
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx
 	jne syscall_exit_work
+#ifdef CONFIG_SYSCALL_TRACE
+	pushl %eax
+	call sys_ret
+	popl %eax
+#endif
 /* if something modifies registers it must also disable sysexit */
 	movl EIP(%esp), %edx
 	movl OLDESP(%esp), %ecx
@@ -338,6 +348,11 @@ ENTRY(system_call)
 	pushl %eax			# save orig_eax
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
+#ifdef CONFIG_SYSCALL_TRACE
+	pushl %edx; pushl %ecx; pushl %ebx; pushl %eax
+	call sys_call
+	popl %eax; popl %ebx; popl %ecx; popl %edx
+#endif
 	GET_THREAD_INFO(%ebp)
 	testl $TF_MASK,EFLAGS(%esp)
 	jz no_singlestep
Index: linux/arch/x86_64/ia32/ia32entry.S
===================================================================
--- linux.orig/arch/x86_64/ia32/ia32entry.S
+++ linux/arch/x86_64/ia32/ia32entry.S
@@ -119,7 +119,9 @@ sysenter_do_call:	
 	cmpl	$(IA32_NR_syscalls-1),%eax
 	ja	ia32_badsys
 	IA32_ARG_FIXUP 1
+	TRACE_SYS_IA32_CALL
 	call	*ia32_sys_call_table(,%rax,8)
+	TRACE_SYS_RET
 	movq	%rax,RAX-ARGOFFSET(%rsp)
 	GET_THREAD_INFO(%r10)
 	cli
@@ -227,7 +229,9 @@ cstar_do_call:	
 	cmpl $IA32_NR_syscalls-1,%eax
 	ja  ia32_badsys
 	IA32_ARG_FIXUP 1
+	TRACE_SYS_IA32_CALL
 	call *ia32_sys_call_table(,%rax,8)
+	TRACE_SYS_RET
 	movq %rax,RAX-ARGOFFSET(%rsp)
 	GET_THREAD_INFO(%r10)
 	cli
@@ -320,8 +324,10 @@ ia32_do_syscall:	
 	cmpl $(IA32_NR_syscalls-1),%eax
 	ja  ia32_badsys
 	IA32_ARG_FIXUP
+	TRACE_SYS_IA32_CALL
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
+	TRACE_SYS_RET
 	movq %rax,RAX-ARGOFFSET(%rsp)
 	jmp int_ret_from_sys_call 
 
Index: linux/arch/x86_64/kernel/entry.S
===================================================================
--- linux.orig/arch/x86_64/kernel/entry.S
+++ linux/arch/x86_64/kernel/entry.S
@@ -222,7 +222,9 @@ ENTRY(system_call)
 	cmpq $__NR_syscall_max,%rax
 	ja badsys
 	movq %r10,%rcx
+	TRACE_SYS_CALL
 	call *sys_call_table(,%rax,8)  # XXX:	 rip relative
+	TRACE_SYS_RET
 	movq %rax,RAX-ARGOFFSET(%rsp)
 /*
  * Syscall return path ending with SYSRET (fast path)
@@ -304,7 +306,9 @@ tracesys:			 
 	cmpq $__NR_syscall_max,%rax
 	ja  1f
 	movq %r10,%rcx	/* fixup for C */
+	TRACE_SYS_CALL
 	call *sys_call_table(,%rax,8)
+	TRACE_SYS_RET
 1:	movq %rax,RAX-ARGOFFSET(%rsp)
 	/* Use IRET because user could have changed frame */
 	jmp int_ret_from_sys_call
Index: linux/include/asm-x86_64/calling.h
===================================================================
--- linux.orig/include/asm-x86_64/calling.h
+++ linux/include/asm-x86_64/calling.h
@@ -160,3 +160,53 @@
 	.macro icebp
 	.byte 0xf1
 	.endm
+
+/*
+ * syscall-tracing helpers:
+ */
+
+	.macro TRACE_SYS_CALL
+
+#ifdef CONFIG_SYSCALL_TRACE
+	SAVE_ARGS
+
+	mov     %rdx, %rcx
+	mov     %rsi, %rdx
+	mov     %rdi, %rsi
+	mov     %rax, %rdi
+
+	call sys_call
+
+	RESTORE_ARGS
+#endif
+	.endm
+
+
+	.macro TRACE_SYS_IA32_CALL
+
+#ifdef CONFIG_SYSCALL_TRACE
+	SAVE_ARGS
+
+	mov     %rdx, %rcx
+	mov     %rsi, %rdx
+	mov     %rdi, %rsi
+	mov     %rax, %rdi
+
+	call sys_ia32_call
+
+	RESTORE_ARGS
+#endif
+	.endm
+
+	.macro TRACE_SYS_RET
+
+#ifdef CONFIG_SYSCALL_TRACE
+	SAVE_ARGS
+
+	mov     %rax, %rdi
+
+	call sys_ret
+
+	RESTORE_ARGS
+#endif
+	.endm
Index: linux/kernel/lockdep.c
===================================================================
--- linux.orig/kernel/lockdep.c
+++ linux/kernel/lockdep.c
@@ -2701,3 +2701,23 @@ void debug_show_held_locks(struct task_s
 
 EXPORT_SYMBOL_GPL(debug_show_held_locks);
 
+void
+sys_call(unsigned long nr, unsigned long p1, unsigned long p2, unsigned long p3)
+{
+	DEBUG_LOCKS_WARN_ON(current->lockdep_depth);
+}
+
+#if defined(CONFIG_COMPAT) && defined(CONFIG_X86)
+
+void sys_ia32_call(unsigned long nr, unsigned long p1, unsigned long p2,
+		   unsigned long p3)
+{
+	DEBUG_LOCKS_WARN_ON(current->lockdep_depth);
+}
+
+#endif
+
+void sys_ret(unsigned long ret)
+{
+	DEBUG_LOCKS_WARN_ON(current->lockdep_depth);
+}
Index: linux/lib/Kconfig.debug
===================================================================
--- linux.orig/lib/Kconfig.debug
+++ linux/lib/Kconfig.debug
@@ -251,6 +251,10 @@ config LOCKDEP
 	select FRAME_POINTER
 	select KALLSYMS
 	select KALLSYMS_ALL
+	select SYSCALL_TRACE
+
+config SYSCALL_TRACE
+	bool
 
 config DEBUG_LOCKDEP
 	bool "Lock dependency engine debugging"

      reply	other threads:[~2006-07-01  7:30 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-07-01  7:07 [patch] pi-futex: futex_wake() lockup fix Ingo Molnar
2006-07-01  7:23 ` Andrew Morton
2006-07-01  7:25   ` Ingo Molnar [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060701072545.GA24283@elte.hu \
    --to=mingo@elte.hu \
    --cc=akpm@osdl.org \
    --cc=davej@redhat.com \
    --cc=drepper@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox