--- old/include/asm-ia64/bitops.h 2006-04-04 18:19:50.000000000 +0200 +++ linux-2.6.16/include/asm-ia64/bitops.h 2006-04-05 16:49:12.000000000 +0200 @@ -7,6 +7,19 @@ * * 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64 O(1) * scheduler patch + * 06/04/05 Cache hints added: + * For loads before the atomic operations: + * "bias" is a hint to acquire exclusive ownership. + * "nta" is a hint to allocate the cache line only in L2 + * and to bias it to be replaced. + * For the atomic operations (as they are handled exclusively by L2): + * "nta" is a hint not to allocate the cache line else than in L2, + * to bias it to be replaced and not to write it back into L3. + * Added full fencing semantics to the atomic bit operations returning + * values. + * Note that it is a temporary solution while we are waiting for explicitly + * indicated fencing behavior, e.g.: + * test_and_set_bit (int nr, void *addr, MODE_BARRIER) */ #include @@ -42,9 +55,9 @@ set_bit (int nr, volatile void *addr) bit = 1 << (nr & 31); do { CMPXCHG_BUGCHECK(m); - old = *m; + old = ia64_ld4_bias_nta(m); new = old | bit; - } while (cmpxchg_acq(m, old, new) != old); + } while (ia64_cmpxchg4_acq_nta(m, new, old) != old); } /** @@ -89,9 +102,9 @@ clear_bit (int nr, volatile void *addr) mask = ~(1 << (nr & 31)); do { CMPXCHG_BUGCHECK(m); - old = *m; + old = ia64_ld4_bias_nta(m); new = old & mask; - } while (cmpxchg_acq(m, old, new) != old); + } while (ia64_cmpxchg4_acq_nta(m, new, old) != old); } /** @@ -100,14 +113,12 @@ clear_bit (int nr, volatile void *addr) static __inline__ void __clear_bit (int nr, volatile void *addr) { - volatile __u32 *p = (__u32 *) addr + (nr >> 5); - __u32 m = 1 << (nr & 31); - *p &= ~m; + *((__u32 *) addr + (nr >> 5)) &= ~(1 << (nr & 31)); } /** * change_bit - Toggle a bit in memory - * @nr: Bit to clear + * @nr: Bit to change * @addr: Address to start counting from * * change_bit() is atomic and may not be reordered. @@ -122,17 +133,17 @@ change_bit (int nr, volatile void *addr) CMPXCHG_BUGCHECK_DECL m = (volatile __u32 *) addr + (nr >> 5); - bit = (1 << (nr & 31)); + bit = 1 << (nr & 31); do { CMPXCHG_BUGCHECK(m); - old = *m; + old = ia64_ld4_bias_nta(m); new = old ^ bit; - } while (cmpxchg_acq(m, old, new) != old); + } while (ia64_cmpxchg4_acq_nta(m, new, old) != old); } /** * __change_bit - Toggle a bit in memory - * @nr: the bit to set + * @nr: the bit to change * @addr: the address to start counting from * * Unlike change_bit(), this function is non-atomic and may be reordered. @@ -160,13 +171,14 @@ test_and_set_bit (int nr, volatile void volatile __u32 *m; CMPXCHG_BUGCHECK_DECL + ia64_mf(); m = (volatile __u32 *) addr + (nr >> 5); bit = 1 << (nr & 31); do { CMPXCHG_BUGCHECK(m); - old = *m; + old = ia64_ld4_bias_nta(m); new = old | bit; - } while (cmpxchg_acq(m, old, new) != old); + } while (ia64_cmpxchg4_acq_nta(m, new, old) != old); return (old & bit) != 0; } @@ -192,7 +204,7 @@ __test_and_set_bit (int nr, volatile voi /** * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to set + * @nr: Bit to clear * @addr: Address to count from * * This operation is atomic and cannot be reordered. @@ -205,19 +217,20 @@ test_and_clear_bit (int nr, volatile voi volatile __u32 *m; CMPXCHG_BUGCHECK_DECL + ia64_mf(); m = (volatile __u32 *) addr + (nr >> 5); mask = ~(1 << (nr & 31)); do { CMPXCHG_BUGCHECK(m); - old = *m; + old = ia64_ld4_bias_nta(m); new = old & mask; - } while (cmpxchg_acq(m, old, new) != old); + } while (ia64_cmpxchg4_acq_nta(m, new, old) != old); return (old & ~mask) != 0; } /** * __test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to set + * @nr: Bit to clear * @addr: Address to count from * * This operation is non-atomic and can be reordered. @@ -237,7 +250,7 @@ __test_and_clear_bit(int nr, volatile vo /** * test_and_change_bit - Change a bit and return its old value - * @nr: Bit to set + * @nr: Bit to change * @addr: Address to count from * * This operation is atomic and cannot be reordered. @@ -250,13 +263,14 @@ test_and_change_bit (int nr, volatile vo volatile __u32 *m; CMPXCHG_BUGCHECK_DECL + ia64_mf(); m = (volatile __u32 *) addr + (nr >> 5); bit = (1 << (nr & 31)); do { CMPXCHG_BUGCHECK(m); - old = *m; + old = ia64_ld4_bias_nta(m); new = old ^ bit; - } while (cmpxchg_acq(m, old, new) != old); + } while (ia64_cmpxchg4_acq_nta(m, new, old) != old); return (old & bit) != 0; } --- old/include/asm-ia64/gcc_intrin.h 2006-04-04 18:19:50.000000000 +0200 +++ linux-2.6.16/include/asm-ia64/gcc_intrin.h 2006-04-05 17:07:29.000000000 +0200 @@ -221,6 +221,14 @@ register unsigned long ia64_r13 asm ("r1 asm volatile ("stf.spill [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ }) +#define ia64_ld4_bias_nta(ptr) \ +({ \ + __u32 ia64_intri_res; \ + asm volatile ("ld4.bias.nta %0=[%1]": \ + "=r"(ia64_intri_res) : "r"(ptr) : "memory"); \ + ia64_intri_res; \ +}) + #define ia64_fetchadd4_acq(p, inc) \ ({ \ \ @@ -350,6 +358,15 @@ register unsigned long ia64_r13 asm ("r1 ia64_intri_res; \ }) +#define ia64_cmpxchg4_acq_nta(ptr, new, old) \ +({ \ + __u32 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg4.acq.nta %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + #define ia64_cmpxchg8_acq(ptr, new, old) \ ({ \ __u64 ia64_intri_res; \