All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zoltan Menyhart <Zoltan.Menyhart@bull.net>
To: linux-ia64@vger.kernel.org
Subject: Re: Fix ia64 bit ops: Full barriers for bit operations returning
Date: Wed, 05 Apr 2006 15:30:34 +0000	[thread overview]
Message-ID: <4433E29A.5010300@bull.net> (raw)
In-Reply-To: <Pine.LNX.4.64.0604031129510.21064@schroedinger.engr.sgi.com>

[-- Attachment #1: Type: text/plain, Size: 1458 bytes --]

Christoph Lameter wrote:

> Could you come up with a patch? Currently, I do not seem to be able to 
> spend enough time on it.

Please have a look at this patch.

Temporary solution while we are waiting for:

	test_and_set_bit (int nr, volatile void *addr, MODE_BARRIER)

& co.

Changing the temp. variables to be 64 bit wide was not a good idea => alignment faults.
In order to eliminate the extra "zxt4", I hanged the type of the return values of my
intrinsic macros to be 32 bit wide. Here is what I get (NOP-s removed):

reserve_bootmem_core+240:  [MMI]       mf;;
reserve_bootmem_core+241:              and r10=31,r18
reserve_bootmem_core+257:              extr r11=r18,5,27;;
reserve_bootmem_core+272:  [MFI]       shladd r16=r11,2,r16
reserve_bootmem_core+274:              shl r17=r19,r10;;
reserve_bootmem_core+288:  [MMI]       ld4.bias.nta r20=[r16];;
reserve_bootmem_core+289:              or r22=r17,r20
reserve_bootmem_core+305:              mov.m ar.ccv=r20;;
reserve_bootmem_core+320:  [MMI]       cmpxchg4.acq.nta r21=[r16],r22,ar.ccv;;
reserve_bootmem_core+322:              cmp4.eq p14,p15=r20,r21
reserve_bootmem_core+336:  [BBB] (p15) br.cond.dptk.few reserve_bootmem_core+288

BTW why do all the intrinsic macros return 64 bit wide values, independently of
their actual operand width? E.g.:

#define ia64_cmpxchg4_acq(ptr, new, old)
...
	__u64 ia64_intri_res;

Thanks,

Zoltan

Signed-off-by: Zoltan Menyhart <Zoltan.Menyhart@bull.net>

[-- Attachment #2: diff --]
[-- Type: text/plain, Size: 5780 bytes --]

--- old/include/asm-ia64/bitops.h	2006-04-04 18:19:50.000000000 +0200
+++ linux-2.6.16/include/asm-ia64/bitops.h	2006-04-05 16:49:12.000000000 +0200
@@ -7,6 +7,19 @@
  *
  * 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64 O(1)
  *	    scheduler patch
+ * 06/04/05 Cache hints added:
+ *	    For loads before the atomic operations:
+ *		"bias" is a hint to acquire exclusive ownership.
+ *		"nta" is a hint to allocate the cache line only in L2
+ *		and to bias it to be replaced.
+ *	    For the atomic operations (as they are handled exclusively by L2):
+ *		"nta" is a hint not to allocate the cache line else than in L2,
+ *		to bias it to be replaced and not to write it back into L3.
+ *	    Added full fencing semantics to the atomic bit operations returning
+ *	    values.
+ *	    Note that it is a temporary solution while we are waiting for explicitly
+ *	    indicated fencing behavior, e.g.:
+ *			test_and_set_bit (int nr, void *addr, MODE_BARRIER)
  */
 
 #include <linux/compiler.h>
@@ -42,9 +55,9 @@ set_bit (int nr, volatile void *addr)
 	bit = 1 << (nr & 31);
 	do {
 		CMPXCHG_BUGCHECK(m);
-		old = *m;
+		old = ia64_ld4_bias_nta(m);
 		new = old | bit;
-	} while (cmpxchg_acq(m, old, new) != old);
+	} while (ia64_cmpxchg4_acq_nta(m, new, old) != old);
 }
 
 /**
@@ -89,9 +102,9 @@ clear_bit (int nr, volatile void *addr)
 	mask = ~(1 << (nr & 31));
 	do {
 		CMPXCHG_BUGCHECK(m);
-		old = *m;
+		old = ia64_ld4_bias_nta(m);
 		new = old & mask;
-	} while (cmpxchg_acq(m, old, new) != old);
+	} while (ia64_cmpxchg4_acq_nta(m, new, old) != old);
 }
 
 /**
@@ -100,14 +113,12 @@ clear_bit (int nr, volatile void *addr)
 static __inline__ void
 __clear_bit (int nr, volatile void *addr)
 {
-	volatile __u32 *p = (__u32 *) addr + (nr >> 5);
-	__u32 m = 1 << (nr & 31);
-	*p &= ~m;
+	*((__u32 *) addr + (nr >> 5)) &= ~(1 << (nr & 31));
 }
 
 /**
  * change_bit - Toggle a bit in memory
- * @nr: Bit to clear
+ * @nr: Bit to change
  * @addr: Address to start counting from
  *
  * change_bit() is atomic and may not be reordered.
@@ -122,17 +133,17 @@ change_bit (int nr, volatile void *addr)
 	CMPXCHG_BUGCHECK_DECL
 
 	m = (volatile __u32 *) addr + (nr >> 5);
-	bit = (1 << (nr & 31));
+	bit = 1 << (nr & 31);
 	do {
 		CMPXCHG_BUGCHECK(m);
-		old = *m;
+		old = ia64_ld4_bias_nta(m);
 		new = old ^ bit;
-	} while (cmpxchg_acq(m, old, new) != old);
+	} while (ia64_cmpxchg4_acq_nta(m, new, old) != old);
 }
 
 /**
  * __change_bit - Toggle a bit in memory
- * @nr: the bit to set
+ * @nr: the bit to change
  * @addr: the address to start counting from
  *
  * Unlike change_bit(), this function is non-atomic and may be reordered.
@@ -160,13 +171,14 @@ test_and_set_bit (int nr, volatile void 
 	volatile __u32 *m;
 	CMPXCHG_BUGCHECK_DECL
 
+	ia64_mf();
 	m = (volatile __u32 *) addr + (nr >> 5);
 	bit = 1 << (nr & 31);
 	do {
 		CMPXCHG_BUGCHECK(m);
-		old = *m;
+		old = ia64_ld4_bias_nta(m);
 		new = old | bit;
-	} while (cmpxchg_acq(m, old, new) != old);
+	} while (ia64_cmpxchg4_acq_nta(m, new, old) != old);
 	return (old & bit) != 0;
 }
 
@@ -192,7 +204,7 @@ __test_and_set_bit (int nr, volatile voi
 
 /**
  * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to set
+ * @nr: Bit to clear
  * @addr: Address to count from
  *
  * This operation is atomic and cannot be reordered.  
@@ -205,19 +217,20 @@ test_and_clear_bit (int nr, volatile voi
 	volatile __u32 *m;
 	CMPXCHG_BUGCHECK_DECL
 
+	ia64_mf();
 	m = (volatile __u32 *) addr + (nr >> 5);
 	mask = ~(1 << (nr & 31));
 	do {
 		CMPXCHG_BUGCHECK(m);
-		old = *m;
+		old = ia64_ld4_bias_nta(m);
 		new = old & mask;
-	} while (cmpxchg_acq(m, old, new) != old);
+	} while (ia64_cmpxchg4_acq_nta(m, new, old) != old);
 	return (old & ~mask) != 0;
 }
 
 /**
  * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to set
+ * @nr: Bit to clear
  * @addr: Address to count from
  *
  * This operation is non-atomic and can be reordered.  
@@ -237,7 +250,7 @@ __test_and_clear_bit(int nr, volatile vo
 
 /**
  * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to set
+ * @nr: Bit to change
  * @addr: Address to count from
  *
  * This operation is atomic and cannot be reordered.  
@@ -250,13 +263,14 @@ test_and_change_bit (int nr, volatile vo
 	volatile __u32 *m;
 	CMPXCHG_BUGCHECK_DECL
 
+	ia64_mf();
 	m = (volatile __u32 *) addr + (nr >> 5);
 	bit = (1 << (nr & 31));
 	do {
 		CMPXCHG_BUGCHECK(m);
-		old = *m;
+		old = ia64_ld4_bias_nta(m);
 		new = old ^ bit;
-	} while (cmpxchg_acq(m, old, new) != old);
+	} while (ia64_cmpxchg4_acq_nta(m, new, old) != old);
 	return (old & bit) != 0;
 }
 
--- old/include/asm-ia64/gcc_intrin.h	2006-04-04 18:19:50.000000000 +0200
+++ linux-2.6.16/include/asm-ia64/gcc_intrin.h	2006-04-05 17:07:29.000000000 +0200
@@ -221,6 +221,14 @@ register unsigned long ia64_r13 asm ("r1
 	asm volatile ("stf.spill [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
 })
 
+#define ia64_ld4_bias_nta(ptr)							\
+({										\
+	__u32 ia64_intri_res;							\
+	asm volatile ("ld4.bias.nta %0=[%1]":					\
+			      "=r"(ia64_intri_res) : "r"(ptr) : "memory");	\
+	ia64_intri_res;								\
+})
+
 #define ia64_fetchadd4_acq(p, inc)						\
 ({										\
 										\
@@ -350,6 +358,15 @@ register unsigned long ia64_r13 asm ("r1
 	ia64_intri_res;									\
 })
 
+#define ia64_cmpxchg4_acq_nta(ptr, new, old)						\
+({											\
+	__u32 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg4.acq.nta %0=[%1],%2,ar.ccv":				\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
 #define ia64_cmpxchg8_acq(ptr, new, old)						\
 ({											\
 	__u64 ia64_intri_res;								\

  parent reply	other threads:[~2006-04-05 15:30 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-04-03 18:32 Fix ia64 bit ops: Full barriers for bit operations returning a value Christoph Lameter
2006-04-03 23:11 ` Chen, Kenneth W
2006-04-04  1:05 ` Fix ia64 bit ops: Full barriers for bit operations returning Nick Piggin
2006-04-04  2:11 ` Fix ia64 bit ops: Full barriers for bit operations returning a Christoph Lameter
2006-04-04 13:30 ` Fix ia64 bit ops: Full barriers for bit operations returning a value Chen, Kenneth W
2006-04-04 14:40 ` Fix ia64 bit ops: Full barriers for bit operations returning a Christoph Lameter
2006-04-04 16:48 ` Fix ia64 bit ops: Full barriers for bit operations returning Zoltan Menyhart
2006-04-05 15:30 ` Zoltan Menyhart [this message]
2006-04-05 16:17 ` Fix ia64 bit ops: Full barriers for bit operations returning a Christoph Lameter
2006-04-05 16:44 ` Fix ia64 bit ops: Full barriers for bit operations returning Zoltan Menyhart
2006-04-05 17:31 ` Fix ia64 bit ops: Full barriers for bit operations returning a Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4433E29A.5010300@bull.net \
    --to=zoltan.menyhart@bull.net \
    --cc=linux-ia64@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.