public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* Re:  frlock and barrier discussion
@ 2003-01-30 18:20 Manfred Spraul
  2003-01-30 18:26 ` Andrea Arcangeli
  2003-01-30 22:32 ` Alan Cox
  0 siblings, 2 replies; 11+ messages in thread
From: Manfred Spraul @ 2003-01-30 18:20 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Andrea Arcangeli, Richard Henderson, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 675 bytes --]

Stephen wrote:

[snip - memory barrier for fr_write_begin]

>Using mb() is more paranoid than necessary. 


What about the memory barrier in fr_read_begin?
If I understand the Intel documentation correctly, then i386 doesn't need them:
"Writes by a single processor are observed in the same order by all processors"

I think "smp_read_barrier_depends()" (i.e. a nop for i386) is sufficient. Attached is a test app - could someone try it? I don't have access to a SMP system right now.


What about permitting arch overrides for the memory barriers? E.g. ia64 has acquire and release memory barriers - it doesn't map to the Linux wmb()/rmb() scheme.

--
	Manfred 













[-- Attachment #2: frlock.cpp --]
[-- Type: text/plain, Size: 2579 bytes --]

/*
 * frlock: test for Intel memory ordering.
 * Copyright (C) 1999,2003 by Manfred Spraul.
 * 
 * Redistribution of this file is permitted under the terms of the GNU 
 * Public License (GPL)
 * $Header: /pub/home/manfred/cvs-tree/movopt/frlock.cpp,v 1.2 2003/01/26 10:41:39 manfred Exp $
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <assert.h>

static volatile int g_val1;
static volatile int g_val2;
static volatile int g_seq1;
static volatile int g_seq2;

static volatile int start;
#define MB()	__asm__ __volatile__ ("lock;addl $0,(%%esp)\n\t" \
					:/* no output*/ \
					:/* no input*/:"cc","memory")

#define DELAY()		do { int i; for(i=0;i<1000;i++); } while(0)

void* threadfnc(void* param)
{
	while(!start);
	if(1 == (int)param)
		goto cpu1;
	if(2 == (int)param)
		goto cpu2;
	assert(0);
cpu1:
	{	// reader:
		for(;;) {
			int x1,x2,val1,val2;

			x1 = g_seq1;
			val1 = g_val1;
			val2 = g_val2;
			x2 = g_seq2;
			if (x1 == x2) {
				if (val1 != val2) {
					printf("Bad! memory ordering violation with %d/%d: %d/%d.\n", x1, x2, val1, val2);
				}
			}
	    	}
	}
cpu2:
	{	// writer:
	    	int target = 0;
		for (;;) {

			// write 1:
			target++;
			g_seq1 = target;
			g_val1 = target;
			g_val2 = target;
			g_seq2 = target;
			DELAY();

			// write 2:
			target++;
			g_seq1 = target;
			g_val1 = target;
			MB();
			g_val2 = target;
			g_seq2 = target;
			DELAY();

			// write 3:
			target++;
			g_seq1 = target;
			g_val2 = target;
			g_val1 = target;
			g_seq2 = target;
			DELAY();

			// write 4:
			target++;
			g_seq1 = target;
			g_val2 = target;
			MB();
			g_val1 = target;
			g_seq2 = target;
			DELAY();
			


			// write 5:
			target++;
			g_seq1 = target;
			g_val1 = target;
			MB(); MB();
			g_val2 = target;
			g_seq2 = target;
			DELAY();

			// write 6:
			target++;
			g_seq1 = target;
			g_val1 = target;
			MB(); DELAY();
			g_val2 = target;
			g_seq2 = target;
			DELAY();

			// write 7:
			target++;
			g_seq1 = target;
			g_val2 = target;
			MB(); MB();
			g_val1 = target;
			g_seq2 = target;
			DELAY();

			// write 8:
			target++;
			g_seq1 = target;
			g_val2 = target;
			MB(); DELAY();
			g_val1 = target;
			g_seq2 = target;
			DELAY();
		}
	}
}

void start_thread(int id)
{
	pthread_t thread;
	int res;

	res = pthread_create(&thread,NULL,threadfnc,(void*)id);
	if(res != 0)
		assert(false);
}



int main()
{
	printf("movopt:\n");
	start_thread(1);
	start_thread(2);
	printf(" starting, please wait.\n");
	fflush(stdout);
	start = 1;
	for(;;) sleep(1000);
}

^ permalink raw reply	[flat|nested] 11+ messages in thread
* [PATCH] (1/4) 2.5.59 fast reader/writer lock for gettimeofday
@ 2003-01-28 23:42 Stephen Hemminger
  2003-01-29  7:06 ` Richard Henderson
  0 siblings, 1 reply; 11+ messages in thread
From: Stephen Hemminger @ 2003-01-28 23:42 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrea Arcangeli, Andrew Morton, Andi Kleen,
	Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 131 bytes --]

This is the generic portion of lockless gettimeofday. It defines frlock
and changes locking of xtime_lock from rwlock to frlock.



[-- Attachment #2: frlock-xtime.patch --]
[-- Type: text/x-patch, Size: 8362 bytes --]

diff -urN -X dontdiff linux-2.5.59/include/linux/frlock.h linux-2.5-frlock/include/linux/frlock.h
--- linux-2.5.59/include/linux/frlock.h	1969-12-31 16:00:00.000000000 -0800
+++ linux-2.5-frlock/include/linux/frlock.h	2003-01-28 14:53:37.000000000 -0800
@@ -0,0 +1,124 @@
+#ifndef __LINUX_FRLOCK_H
+#define __LINUX_FRLOCK_H
+
+/*
+ * Fast read-write spinlocks.
+ *
+ * Fast reader/writer locks without starving writers. This type of
+ * lock for data where the reader wants a consitent set of information
+ * and is willing to retry if the information changes.  Readers never
+ * block but they may have to retry if a writer is in
+ * progress. Writers do not wait for readers. 
+ *
+ * Generalization on sequence variables used for gettimeofday on x86-64 
+ * by Andrea Arcangeli
+ *
+ * This is not as cache friendly as brlock. Also, this will not work
+ * for data that contains pointers, because any writer could
+ * invalidate a pointer that a reader was following.
+ *
+ * Expected reader usage:
+ * 	do {
+ *	    seq = fr_read_begin();
+ * 	...
+ *      } while (seq != fr_read_end());
+ *
+ * On non-SMP the spin locks disappear but the writer still needs
+ * to increment the sequence variables because an interrupt routine could
+ * change the state of the data.
+ */
+
+#include <linux/config.h>
+#include <linux/spinlock.h>
+#include <linux/preempt.h>
+
+typedef struct {
+	unsigned pre_sequence;
+	unsigned post_sequence;
+	spinlock_t lock;
+} frlock_t;
+
+#define FR_LOCK_UNLOCKED   { 0, 0, SPIN_LOCK_UNLOCKED }
+#define frlock_init(x)	   do { *(x) = (frlock_t) FR_LOCK_UNLOCKED; } while (0)
+
+/* Update sequence count only
+ * Assumes caller is doing own mutual exclusion with other lock
+ * or semaphore.
+ */
+static inline void fr_write_begin(frlock_t *rw)
+{
+	preempt_disable();
+	rw->pre_sequence++;
+	wmb();
+}
+
+static inline void fr_write_end(frlock_t *rw)
+{
+	wmb();
+	rw->post_sequence++;
+	BUG_ON(rw->post_sequence != rw->pre_sequence);
+	preempt_enable();
+}
+
+/* Lock out other writers and update the count.
+ * Acts like a normal spin_lock/unlock.
+ */
+static inline void fr_write_lock(frlock_t *rw)
+{
+	spin_lock(&rw->lock);
+	fr_write_begin(rw);
+}	
+
+static inline void fr_write_unlock(frlock_t *rw) 
+{
+	fr_write_end(rw);
+	spin_unlock(&rw->lock);
+}
+
+static inline int fr_write_trylock(frlock_t *rw)
+{
+	int ret = spin_trylock(&rw->lock);
+
+	if (ret) {
+		++rw->pre_sequence;
+		wmb();
+	}
+	return ret;
+}
+
+/* Start of read calculation -- fetch last complete writer token */
+static inline unsigned fr_read_begin(const frlock_t *rw) 
+{
+	unsigned ret;
+
+	ret = rw->post_sequence;
+	rmb();
+	return ret;
+	
+}
+
+/* End of reader calculation -- fetch last writer start token */
+static inline unsigned fr_read_end(const frlock_t *rw)
+{
+	rmb();
+	return rw->pre_sequence;
+}
+
+/*
+ * Possible sw/hw IRQ protected versions of the interfaces.
+ */
+#define fr_write_lock_irqsave(lock, flags) \
+	do { local_irq_save(flags);	 fr_write_lock(lock); } while (0)
+#define fr_write_lock_irq(lock) \
+	do { local_irq_disable();	 fr_write_lock(lock); } while (0)
+#define fr_write_lock_bh(lock) \
+        do { local_bh_disable();	 fr_write_lock(lock); } while (0)
+
+#define fr_write_unlock_irqrestore(lock, flags)	\
+	do { fr_write_unlock(lock); local_irq_restore(flags); } while(0)
+#define fr_write_unlock_irq(lock) \
+	do { fr_write_unlock(lock); local_irq_enable(); } while(0)
+#define fr_write_unlock_bh(lock) \
+	do { fr_write_unlock(lock); local_bh_enable(); } while(0)
+
+#endif /* __LINUX_FRLOCK_H */
diff -urN -X dontdiff linux-2.5.59/include/linux/time.h linux-2.5-frlock/include/linux/time.h
--- linux-2.5.59/include/linux/time.h	2003-01-17 09:43:06.000000000 -0800
+++ linux-2.5-frlock/include/linux/time.h	2003-01-24 14:54:17.000000000 -0800
@@ -25,6 +25,7 @@
 #ifdef __KERNEL__
 
 #include <linux/spinlock.h>
+#include <linux/frlock.h>
 
 /*
  * Change timeval to jiffies, trying to avoid the
@@ -120,7 +121,7 @@
 }
 
 extern struct timespec xtime;
-extern rwlock_t xtime_lock;
+extern frlock_t xtime_lock;
 
 static inline unsigned long get_seconds(void)
 { 
diff -urN -X dontdiff linux-2.5.59/kernel/time.c linux-2.5-frlock/kernel/time.c
--- linux-2.5.59/kernel/time.c	2003-01-17 09:43:08.000000000 -0800
+++ linux-2.5-frlock/kernel/time.c	2003-01-24 15:05:53.000000000 -0800
@@ -27,7 +27,6 @@
 #include <linux/timex.h>
 #include <linux/errno.h>
 #include <linux/smp_lock.h>
-
 #include <asm/uaccess.h>
 
 /* 
@@ -38,7 +37,7 @@
 
 /* The xtime_lock is not only serializing the xtime read/writes but it's also
    serializing all accesses to the global NTP variables now. */
-extern rwlock_t xtime_lock;
+extern frlock_t xtime_lock;
 extern unsigned long last_time_offset;
 
 #if !defined(__alpha__) && !defined(__ia64__)
@@ -80,7 +79,7 @@
 		return -EPERM;
 	if (get_user(value, tptr))
 		return -EFAULT;
-	write_lock_irq(&xtime_lock);
+	fr_write_lock_irq(&xtime_lock);
 	xtime.tv_sec = value;
 	xtime.tv_nsec = 0;
 	last_time_offset = 0;
@@ -88,7 +87,7 @@
 	time_status |= STA_UNSYNC;
 	time_maxerror = NTP_PHASE_LIMIT;
 	time_esterror = NTP_PHASE_LIMIT;
-	write_unlock_irq(&xtime_lock);
+	fr_write_unlock_irq(&xtime_lock);
 	return 0;
 }
 
@@ -96,13 +95,13 @@
 
 asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
-	if (tv) {
+	if (likely(tv != NULL)) {
 		struct timeval ktv;
 		do_gettimeofday(&ktv);
 		if (copy_to_user(tv, &ktv, sizeof(ktv)))
 			return -EFAULT;
 	}
-	if (tz) {
+	if (unlikely(tz != NULL)) {
 		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
 			return -EFAULT;
 	}
@@ -127,10 +126,10 @@
  */
 inline static void warp_clock(void)
 {
-	write_lock_irq(&xtime_lock);
+	fr_write_lock_irq(&xtime_lock);
 	xtime.tv_sec += sys_tz.tz_minuteswest * 60;
 	last_time_offset = 0;
-	write_unlock_irq(&xtime_lock);
+	fr_write_unlock_irq(&xtime_lock);
 }
 
 /*
@@ -235,7 +234,7 @@
 		    txc->tick > 1100000/USER_HZ)
 			return -EINVAL;
 
-	write_lock_irq(&xtime_lock);
+	fr_write_lock_irq(&xtime_lock);
 	result = time_state;	/* mostly `TIME_OK' */
 
 	/* Save for later - semantics of adjtime is to return old value */
@@ -386,7 +385,7 @@
 	txc->errcnt	   = pps_errcnt;
 	txc->stbcnt	   = pps_stbcnt;
 	last_time_offset = 0;
-	write_unlock_irq(&xtime_lock);
+	fr_write_unlock_irq(&xtime_lock);
 	do_gettimeofday(&txc->time);
 	return(result);
 }
@@ -409,9 +408,13 @@
 struct timespec current_kernel_time(void)
 {
         struct timespec now;
-        unsigned long flags;
-        read_lock_irqsave(&xtime_lock,flags);
-	now = xtime;
-        read_unlock_irqrestore(&xtime_lock,flags);
+        unsigned long seq;
+
+	do {
+		seq = fr_read_begin(&xtime_lock);
+		
+		now = xtime;
+	} while (seq != fr_read_end(&xtime_lock));
+
 	return now; 
 }
diff -urN -X dontdiff linux-2.5.59/kernel/timer.c linux-2.5-frlock/kernel/timer.c
--- linux-2.5.59/kernel/timer.c	2003-01-17 09:43:08.000000000 -0800
+++ linux-2.5-frlock/kernel/timer.c	2003-01-21 13:23:05.000000000 -0800
@@ -758,7 +758,7 @@
  * This read-write spinlock protects us from races in SMP while
  * playing with xtime and avenrun.
  */
-rwlock_t xtime_lock __cacheline_aligned_in_smp = RW_LOCK_UNLOCKED;
+frlock_t xtime_lock __cacheline_aligned_in_smp = FR_LOCK_UNLOCKED;
 unsigned long last_time_offset;
 
 /*
@@ -798,8 +798,7 @@
 }
   
 /*
- * The 64-bit jiffies value is not atomic - you MUST NOT read it
- * without holding read_lock_irq(&xtime_lock).
+ * The 64-bit jiffies value is not atomic 
  * jiffies is defined in the linker script...
  */
 
@@ -1087,18 +1086,21 @@
 	struct sysinfo val;
 	unsigned long mem_total, sav_total;
 	unsigned int mem_unit, bitcount;
+	unsigned long seq;
 
 	memset((char *)&val, 0, sizeof(struct sysinfo));
 
-	read_lock_irq(&xtime_lock);
-	val.uptime = jiffies / HZ;
+	do {
+		seq = fr_read_begin(&xtime_lock);
 
-	val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
-	val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
-	val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+		val.uptime = jiffies / HZ;
 
-	val.procs = nr_threads;
-	read_unlock_irq(&xtime_lock);
+		val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+		val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+		val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+
+		val.procs = nr_threads;
+	} while (seq != fr_read_end(&xtime_lock));
 
 	si_meminfo(&val);
 	si_swapinfo(&val);

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2003-01-31  0:48 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-01-30 18:20 frlock and barrier discussion Manfred Spraul
2003-01-30 18:26 ` Andrea Arcangeli
2003-01-30 19:05   ` Manfred Spraul
2003-01-30 19:54   ` Davide Libenzi
2003-01-30 22:32 ` Alan Cox
  -- strict thread matches above, loose matches on Subject: below --
2003-01-28 23:42 [PATCH] (1/4) 2.5.59 fast reader/writer lock for gettimeofday Stephen Hemminger
2003-01-29  7:06 ` Richard Henderson
2003-01-30  1:15   ` frlock and barrier discussion Stephen Hemminger
2003-01-30  1:29     ` Andrea Arcangeli
2003-01-30  1:41     ` Richard Henderson
2003-01-30  1:52       ` Andrea Arcangeli
2003-01-31  0:41         ` Richard Henderson
2003-01-31  0:57           ` Andrea Arcangeli

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox