From: Keith Owens <kaos@sgi.com>
To: linux-ia64@vger.kernel.org
Subject: [Linux-ia64] Re: ia64_spinlock_contention and NEW_LOCK
Date: Wed, 12 Mar 2003 10:19:37 +0000 [thread overview]
Message-ID: <marc-linux-ia64-105590709806055@msgid-missing> (raw)
On Tue, 11 Mar 2003 17:59:56 -0800,
David Mosberger <davidm@napali.hpl.hp.com> wrote:
>Seriously though: I think the main problem is trying to do branch to
>out-of-line code without the compiler knowing about it. The main
>reason I wanted to do that is to avoid turning leaf routines into
>interior routines just because of a spinlock acquisition.
This patch against 2.4.20-ia64-021220 has had minimal testing, it works
for me (TM). kdb and lkcd diagnosis of hung cpus is much easier with
the patch.
The inline code for an uncontended lock drops from 5 to 4 bundles for
each use of spin_lock(). The uncontended path has one less memory
access.
ia64_spinlock_contention in head.S is the basic framework for the out
of line contention code, it can be expanded for exponential backoff,
kdb, lock monitoring, detection of hung locks, whatever we want.
Calling another function from the contention path is possible, but
tricky.
This code works for spin_lock() in built in code and for modules. It
does not affect the status of leaf functions, no change to ar.pfs or b0.
Index: 20.5/include/asm-ia64/spinlock.h
--- 20.5/include/asm-ia64/spinlock.h Fri, 01 Mar 2002 11:01:28 +1100 kaos (linux-2.4/s/28_spinlock.h 1.1.3.1.1.1.2.1 644)
+++ 20.5(w)/include/asm-ia64/spinlock.h Wed, 12 Mar 2003 17:33:55 +1100 kaos (linux-2.4/s/28_spinlock.h 1.1.3.1.1.1.2.1 644)
@@ -15,10 +15,6 @@
#include <asm/bitops.h>
#include <asm/atomic.h>
-#undef NEW_LOCK
-
-#ifdef NEW_LOCK
-
typedef struct {
volatile unsigned int lock;
} spinlock_t;
@@ -27,81 +23,48 @@ typedef struct {
#define spin_lock_init(x) ((x)->lock = 0)
/*
- * Streamlined test_and_set_bit(0, (x)). We use test-and-test-and-set
- * rather than a simple xchg to avoid writing the cache-line when
- * there is contention.
+ * Try to get the lock. If we fail to get the lock, branch (not call) to
+ * ia64_spinlock_contention. We do not use call because that stamps on ar.pfs
+ * which has unwanted side effects on the routine using spin_lock().
+ *
+ * ia64_spinlock_contention is entered with :-
+ * r27 - available for use.
+ * r28 - address of start of spin_lock code. Used as a "return" address
+ * from the contention path. mov r28=ip must be in the first bundle.
+ * r29 - available for use.
+ * r30 - available for use.
+ * r31 - address of lock.
+ * b7 - available for use.
+ * p15 - available for use.
*/
-#define spin_lock(x) \
-{ \
+#define spin_lock(x) \
+({ \
register char *addr __asm__ ("r31") = (char *) &(x)->lock; \
\
__asm__ __volatile__ ( \
- "mov r30=1\n" \
+ "1:\n" /* force a new bundle, r28 points here */ \
"mov ar.ccv=r0\n" \
+ "mov r28=ip\n" \
+ "mov r30=1\n" \
";;\n" \
"cmpxchg4.acq r30=[%0],r30,ar.ccv\n" \
+ "movl r29=ia64_spinlock_contention\n" \
";;\n" \
"cmp.ne p15,p0=r30,r0\n" \
- "(p15) br.call.spnt.few b7=ia64_spinlock_contention\n" \
+ "mov b7=r29\n" \
";;\n" \
- "1:\n" /* force a new bundle */ \
- :: "r"(addr) \
- : "ar.ccv", "ar.pfs", "b7", "p15", "r28", "r29", "r30", "memory"); \
-}
-
-#define spin_trylock(x) \
-({ \
- register long result; \
- \
- __asm__ __volatile__ ( \
- "mov ar.ccv=r0\n" \
+ "(p15) br.cond.spnt.few b7\n" \
";;\n" \
- "cmpxchg4.acq %0=[%2],%1,ar.ccv\n" \
- : "=r"(result) : "r"(1), "r"(&(x)->lock) : "ar.ccv", "memory"); \
- (result = 0); \
+ "2:\n" /* force a new bundle */ \
+ :: "r"(addr) \
+ : "ar.ccv", "r28", "r29", "r30", "b7", "p15", "memory"); \
})
#define spin_is_locked(x) ((x)->lock != 0)
-#define spin_unlock(x) do { barrier(); ((spinlock_t *) x)->lock = 0;} while (0)
-#define spin_unlock_wait(x) do { barrier(); } while ((x)->lock)
-
-#else /* !NEW_LOCK */
-
-typedef struct {
- volatile unsigned int lock;
-} spinlock_t;
-
-#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
-#define spin_lock_init(x) ((x)->lock = 0)
-
-/*
- * Streamlined test_and_set_bit(0, (x)). We use test-and-test-and-set
- * rather than a simple xchg to avoid writing the cache-line when
- * there is contention.
- */
-#define spin_lock(x) __asm__ __volatile__ ( \
- "mov ar.ccv = r0\n" \
- "mov r29 = 1\n" \
- ";;\n" \
- "1:\n" \
- "ld4 r2 = [%0]\n" \
- ";;\n" \
- "cmp4.eq p0,p7 = r0,r2\n" \
- "(p7) br.cond.spnt.few 1b \n" \
- "cmpxchg4.acq r2 = [%0], r29, ar.ccv\n" \
- ";;\n" \
- "cmp4.eq p0,p7 = r0, r2\n" \
- "(p7) br.cond.spnt.few 1b\n" \
- ";;\n" \
- :: "r"(&(x)->lock) : "ar.ccv", "p7", "r2", "r29", "memory")
-
-#define spin_is_locked(x) ((x)->lock != 0)
#define spin_unlock(x) do { barrier(); ((spinlock_t *) x)->lock = 0; } while (0)
#define spin_trylock(x) (cmpxchg_acq(&(x)->lock, 0, 1) = 0)
#define spin_unlock_wait(x) do { barrier(); } while ((x)->lock)
-#endif /* !NEW_LOCK */
-
typedef struct {
volatile int read_counter:31;
volatile int write_lock:1;
Index: 20.5/arch/ia64/kernel/ia64_ksyms.c
--- 20.5/arch/ia64/kernel/ia64_ksyms.c Wed, 11 Dec 2002 20:58:53 +1100 kaos (linux-2.4/r/c/35_ia64_ksyms 1.1.3.1.3.1.1.1.1.3 644)
+++ 20.5(w)/arch/ia64/kernel/ia64_ksyms.c Wed, 12 Mar 2003 17:28:13 +1100 kaos (linux-2.4/r/c/35_ia64_ksyms 1.1.3.1.3.1.1.1.1.3 644)
@@ -165,3 +165,9 @@ EXPORT_SYMBOL(machvec_noop);
EXPORT_SYMBOL(pfm_install_alternate_syswide_subsystem);
EXPORT_SYMBOL(pfm_remove_alternate_syswide_subsystem);
#endif
+
+/* Spinlock contention path is entered via direct branch, not using a function
+ * pointer. Fudge the declaration so we do not generate a function descriptor.
+ */
+extern char ia64_spinlock_contention[];
+EXPORT_SYMBOL(ia64_spinlock_contention);
Index: 20.5/arch/ia64/kernel/head.S
--- 20.5/arch/ia64/kernel/head.S Wed, 11 Dec 2002 20:58:53 +1100 kaos (linux-2.4/s/c/11_head.S 1.1.4.1.3.1.1.1.1.3 644)
+++ 20.5(w)/arch/ia64/kernel/head.S Wed, 12 Mar 2003 17:36:46 +1100 kaos (linux-2.4/s/c/11_head.S 1.1.4.1.3.1.1.1.1.3 644)
@@ -742,69 +742,51 @@ SET_REG(b5);
#ifdef CONFIG_SMP
/*
- * This routine handles spinlock contention. It uses a simple exponential backoff
- * algorithm to reduce unnecessary bus traffic. The initial delay is selected from
- * the low-order bits of the cycle counter (a cheap "randomizer"). I'm sure this
- * could use additional tuning, especially on systems with a large number of CPUs.
- * Also, I think the maximum delay should be made a function of the number of CPUs in
- * the system. --davidm 00/08/05
+ * This routine handles spinlock contention, using non-standard entry
+ * conventions. To avoid converting leaf routines into non-leaf, the
+ * inline spin_lock() code uses br.cond (not br.call) to enter this
+ * code. r28 contains the start of the inline spin_lock() code.
*
- * WARNING: This is not a normal procedure. It gets called from C code without
- * the compiler knowing about it. Thus, we must not use any scratch registers
- * beyond those that were declared "clobbered" at the call-site (see spin_lock()
- * macro). We may not even use the stacked registers, because that could overwrite
- * output registers. Similarly, we can't use the scratch stack area as it may be
- * in use, too.
+ * Do not use gp relative variables, this code is called from the kernel
+ * and from modules, r1 is undefined. Do not use stacked registers, the
+ * caller owns them. Do not use the scratch stack space, the caller
+ * owns it. Do not change ar.pfs, the caller owns it.
*
* Inputs:
- * ar.ccv = 0 (and available for use)
- * r28 = available for use
- * r29 = available for use
- * r30 = non-zero (and available for use)
- * r31 = address of lock we're trying to acquire
- * p15 = available for use
+ * ar.ccv - 0 (and available for use)
+ * r12 - kernel stack pointer, but see above.
+ * r13 - current process.
+ * r28 - address of start of spin_lock code. Used as a "return"
+ * address from this contention path. Available for use
+ * after it has been saved.
+ * r29 - available for use. Used to hold dummy ar.pfs to get good
+ * unwind data.
+ * r30 - available for use.
+ * r31 - address of lock.
+ * b7 - available for use.
+ * p15 - available for use.
+ * Rest - caller's state, do not use, especially ar.pfs.
*/
-# define delay r28
-# define timeout r29
-# define tmp r30
-
GLOBAL_ENTRY(ia64_spinlock_contention)
- mov tmp=ar.itc
- ;;
- and delay=0x3f,tmp
- ;;
-
-.retry: add timeout=tmp,delay
- shl delayÞlay,1
- ;;
- dep delayÞlay,r0,0,13 // limit delay to 8192 cycles
- ;;
- // delay a little...
-.wait: sub tmp=tmp,timeout
- or delay=0xf,delay // make sure delay is non-zero (otherwise we get stuck with 0)
- ;;
- cmp.lt p15,p0=tmp,r0
- mov tmp=ar.itc
-(p15) br.cond.sptk .wait
- ;;
- ld4 tmp=[r31]
- ;;
- cmp.ne p15,p0=tmp,r0
- mov tmp=ar.itc
-(p15) br.cond.sptk .retry // lock is still busy
- ;;
- // try acquiring lock (we know ar.ccv is still zero!):
- mov tmp=1
- ;;
- cmpxchg4.acq tmp=[r31],tmp,ar.ccv
- ;;
- cmp.eq p15,p0=tmp,r0
-
- mov tmp=ar.itc
-(p15) br.ret.sptk.many b7 // got lock -> return
- br .retry // still no luck, retry
+ // To get decent unwind data, lie about our state
+ .prologue
+ .altrp b7
+ .save ar.pfs, r29
+ mov b7=r28 // my "return" address
+ mov r29=0 // dummy ar.pfs, pretend zero frame size
+ ;;
+ .body
+.retry:
+ // exponential backoff, kdb, lockmeter etc. go in here
+ //
+ ;;
+ ld4 r28=[r31]
+ ;;
+ cmp4.eq p15,p0=r28,r0
+(p15) br.cond.spnt.few b7 // lock is now free, try to acquire
+ br.cond.sptk.few .retry
END(ia64_spinlock_contention)
-#endif
+#endif // CONFIG_SMP
kdb backtrace from an interrupt while in the contention path. The
arguments for the first two entries are associated with the wrong label,
apart from that we get a clean backtrace on lock contention, I will
investigate the unwind problem later. r31 contains the lock address.
Lock acquire has no unwind problems, it is normal inline code.
[6]kdb> bt
Stack traceback for pid 0
0xe0000030071e0000 0 0 1 6 R 0xe0000030071e07d0 *swapper
0xe002000000008f30 ia64_spinlock_contention+0x30
args (0x0, 0x1, 0x0, 0xe00210000084d500, 0x6)
kernel .text 0xe002000000000000 0xe002000000008f00 0xe002000000008f60
0xe0020000000ce390 scheduler_tick+0x370
kernel .text 0xe002000000000000 0xe0020000000ce020 0xe0020000000cede0
0xe0020000000f2250 update_process_times+0x50
args (0x0, 0x1, 0xe002000000047ab0, 0x184)
kernel .text 0xe002000000000000 0xe0020000000f2200 0xe0020000000f2280
0xe002000000047ab0 smp_do_timer+0x90
args (0xe0000030071e7cd0, 0xe002000000032480, 0x813)
kernel .text 0xe002000000000000 0xe002000000047a20 0xe002000000047ae0
0xe002000000032480 timer_interrupt+0x220
args (0x6ef, 0x0, 0xe0000030071e7cd0, 0x2fdc609dda, 0xe0021000009e92d8)
kernel .text 0xe002000000000000 0xe002000000032260 0xe002000000032700
0xe002000000012e40 handle_IRQ_event+0x100
args (0x6ef, 0xe0000030071e7cd0, 0xe0021000008ddaf0, 0xe002100000948000, 0x20000001)
kernel .text 0xe002000000000000 0xe002000000012d40 0xe002000000012ee0
0xe0020000000133d0 do_IRQ+0x130
args (0x6ef, 0xe0000030071e7cd0, 0xe00000343bf0f788, 0xe002100000948000, 0x6ef)
kernel .text 0xe002000000000000 0xe0020000000132a0 0xe002000000013680
0xe002000000015790 ia64_handle_irq+0xb0
args (0xef, 0xe0000030071e7cd0, 0xfd, 0x0, 0xe0021000008fe328)
kernel .text 0xe002000000000000 0xe0020000000156e0 0xe0020000000158c0
0xe00200000000ec00 ia64_leave_kernel
args (0xef, 0xe0000030071e7cd0)
kernel .text 0xe002000000000000 0xe00200000000ec00 0xe00200000000ee90
0xe002000000016970 cpu_idle+0xb0
args (0xe0021000008e5708, 0xa000000000008418, 0x1e0008, 0xa000000000008410, 0xfffffffffffffffd)
kernel .text 0xe002000000000000 0xe0020000000168c0 0xe002000000016b60
0xe002000000671cc0 start_secondary+0x80
args (0xe0020000000084c0, 0x207)
kernel .text.init 0xe002000000668000 0xe002000000671c40 0xe002000000671ce0
0xe0020000000084c0 start_ap+0x2a0
args (0xffed0000, 0x1b004182300, 0xffc3a3b0, 0x207)
kernel .text 0xe002000000000000 0xe002000000008220 0xe0020000000084f0
[6]kdb> r
psr: 0x0000101008022018 ifs: 0x8000000000000389 ip: 0xe002000000008f30
unat: 0x0000000000000000 pfs: 0x0000000000000208 rsc: 0x0000000000000003
rnat: 0xefe566b88bd12bcc bsps: 0x000000000000bb56 pr: 0x80000000ff659165
ldrs: 0x0000000000000000 ccv: 0x0000000000000000 fpsr: 0x0009804c0270033f
b0: 0xe0020000000f2250 b6: 0xe002000000032260 b7: 0xe0020000000ce390
r1: 0xe002100000948000 r2: 0xe0000030071e7888 r3: 0xe0000030071e7889
r8: 0x00000000000000ef r9: 0x0000000000000013 r10: 0x0000000000000000
r11: 0x80000000ff661265 r12: 0xe0000030071e7cc0 r13: 0xe0000030071e0000
r14: 0xe0021000008fe2f0 r15: 0x0000000000000001 r16: 0x00000000000125bf
r17: 0xe00210000084e6e8 r18: 0x0000000000000300 r19: 0x0000000000000001
r20: 0x00000000000000e7 r21: 0x0000000000000001 r22: 0xe0000030071e003c
r23: 0x7fffffff00000000 r24: 0x0000000000000000 r25: 0x7fffffff00000000
r26: 0x0000000000000000 r27: 0xe0000030071e7890 r28: 0x0000000000000000
r29: 0x0000000000000000 r30: 0x0000000000000001 r31: 0xe00210000084d500
®s = e0000030071e7b30
[6]kdb> 0xe00210000084d500
0xe00210000084d500 = 0xe00210000084d500 (runqueues+0x8100)
next reply other threads:[~2003-03-12 10:19 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-03-12 10:19 Keith Owens [this message]
2003-03-12 10:35 ` [Linux-ia64] Re: ia64_spinlock_contention and NEW_LOCK Keith Owens
2003-03-12 17:27 ` David Mosberger
2003-03-12 21:18 ` Keith Owens
2003-03-12 21:51 ` Keith Owens
2003-03-13 19:16 ` David Mosberger
2003-03-13 21:59 ` Keith Owens
2003-03-13 22:14 ` David Mosberger
2003-03-13 22:20 ` Keith Owens
2003-03-13 22:26 ` David Mosberger
2003-03-13 22:42 ` Keith Owens
2003-03-13 22:46 ` David Mosberger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=marc-linux-ia64-105590709806055@msgid-missing \
--to=kaos@sgi.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox