typedef unsigned long word; #define ldq(X) (*(const word *)(X)) #define ldq_u(X) (*(const word *)((X) & -8)) #define cmpbge __builtin_alpha_cmpbge #define extql __builtin_alpha_extql #define extqh __builtin_alpha_extqh #define insbl __builtin_alpha_insbl #define insqh __builtin_alpha_insqh #define zap __builtin_alpha_zap #define unlikely(X) __builtin_expect ((X), 0) #define prefetch(X) __builtin_prefetch ((void *)(X), 0) #define find(X, Y) cmpbge (0, (X) ^ (Y)) void *memchr (const void *xs, int xc, word n) { word s = (word)xs; word c; word current, found; word s_align; if (unlikely (n == 0)) return 0; current = ldq_u (s); /* Replicate low byte of C into all bytes. */ { word t = insbl (xc, 1); /* 000000c0 */ c = (xc & 0xff) | t; /* 000000cc */ c = (c << 16) | c; /* 0000cccc */ c = (c << 32) | c; /* cccccccc */ } if (unlikely (n < 9)) goto only_quad; /* Align the source, and decrement the count by the number of bytes searched in the first word. */ s_align = s & -8; n += (s & 7); n -= 8; /* Deal with misalignment in the first word for the comparison. */ { word mask = insqh (-1, s); found = cmpbge (0, (current ^ c) | mask); if (found) goto found_it; } s_align += 8; /* If the block is sufficiently large, align to cacheline (minimum 64-bytes), prefetch the next line, and read entire cachelines at a time. */ if (unlikely (n >= 256)) { prefetch (s_align + 64); while (s_align & 63) { current = ldq (s_align); found = find (current, c); if (found) goto found_it; s_align += 8; n -= 8; } while (n > 64) { word c0, c1, c2, c3, c4, c5, c6, c7; prefetch (s_align + 64); c0 = ldq (s_align + 0*8); c1 = ldq (s_align + 1*8); c2 = ldq (s_align + 2*8); c3 = ldq (s_align + 3*8); c4 = ldq (s_align + 4*8); c5 = ldq (s_align + 5*8); c6 = ldq (s_align + 6*8); c7 = ldq (s_align + 7*8); found = find (c0, c); if (unlikely (found)) goto found_it; s_align += 8; found = find (c1, c); if (unlikely (found)) goto found_it; s_align += 8; found = find (c2, c); if (unlikely (found)) goto found_it; s_align += 8; found = find (c3, c); if (unlikely (found)) goto found_it; s_align += 8; found = find (c4, c); if (unlikely (found)) goto found_it; s_align += 8; found = find (c5, c); if (unlikely (found)) goto found_it; s_align += 8; found = find (c6, c); if (unlikely (found)) goto found_it; s_align += 8; found = find (c7, c); if (unlikely (found)) goto found_it; s_align += 8; n -= 64; } } /* Quadword aligned loop. */ while (1) { current = ldq (s_align); if (n < 8) goto last_quad; found = find (current, c); if (found) goto found_it; s_align += 8; n -= 8; } only_quad: { word end = zap (n, 0x80) - 1; word last = extqh (ldq_u (s + end), s); word first = extql (current, s); current = first | last; /* We've read one word and aligned it in the register. Thus the bit offset in current is relative to S. */ s_align = s; } last_quad: { word mask = (-1ul >> -n); found = find (current, c) & mask; if (found == 0) return 0; } found_it: { word offset; #ifdef __alpha_cix__ offset = __builtin_alpha_cttz (found); #else /* Extract LSB. */ found &= -found; /* Binary search for the LSB. */ offset = (found & 0x0f ? 0 : 4); offset += (found & 0x33 ? 0 : 2); offset += (found & 0x55 ? 0 : 1); #endif return (void *)(s_align + offset); } }