From: "Bob Pearson" <rpearson@systemfabricworks.com>
To: <linux-kernel@vger.kernel.org>, <linux@horizon.com>,
<joakim.tjernlund@transmode.se>, <fzago@systemfabricworks.com>,
<akpm@linux-foundation.org>
Subject: RE: [patch v3 7/7] crc32: final-cleanup.diff
Date: Tue, 9 Aug 2011 01:21:38 -0500 [thread overview]
Message-ID: <00d401cc565c$98eeab60$cacc0220$@systemfabricworks.com> (raw)
In-Reply-To: <4E40C5C7.2050609@systemfabricworks.com>
Additional comments.
> Some final cleanup changes
>
> - added a comment at the top of crc32.c
> - moved macros ahead of function prototype
> - replaced loops with for (i = 0; i < xxx; i++) which
> requires fewer instructions on x86 since the
> buffer lookups can use i as an index.
>
> -/* implements slicing-by-4 or slicing-by-8 algorithm */
> -static inline u32
> -crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32
> (*tab)[256])
After careful measurements and looking at asm code I figured out that there
was no penalty for using 2D array. That allowed me to go back to the
original form.
> -{
> # ifdef __LITTLE_ENDIAN
> # define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8)
> -# define DO_CRC4 crc = tab[3][(crc) & 255] ^ \
> - tab[2][(crc >> 8) & 255] ^ \
> - tab[1][(crc >> 16) & 255] ^ \
> - tab[0][(crc >> 24) & 255]
> -# define DO_CRC8a (tab[7][(q) & 255] ^ \
> - tab[6][(q >> 8) & 255] ^ \
> - tab[5][(q >> 16) & 255] ^ \
> - tab[4][(q >> 24) & 255])
> -# define DO_CRC8b (tab[3][(q) & 255] ^ \
> +# define DO_CRC4 (tab[3][(q) & 255] ^ \
> tab[2][(q >> 8) & 255] ^ \
> tab[1][(q >> 16) & 255] ^ \
> tab[0][(q >> 24) & 255])
By changing the syntax a little so that the assignment is done down below
the 4 byte and 8 byte algorithms can share DO_CRC4.
> +# define DO_CRC8 (tab[7][(q) & 255] ^ \
> + tab[6][(q >> 8) & 255] ^ \
> + tab[5][(q >> 16) & 255] ^ \
> + tab[4][(q >> 24) & 255])
> # else
> # define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
> -# define DO_CRC4 crc = tab[0][(crc) & 255] ^ \
> - tab[1][(crc >> 8) & 255] ^ \
> - tab[2][(crc >> 16) & 255] ^ \
> - tab[3][(crc >> 24) & 255]
> -# define DO_CRC8a (tab[4][(q) & 255] ^ \
> - tab[5][(q >> 8) & 255] ^ \
> - tab[6][(q >> 16) & 255] ^ \
> - tab[8][(q >> 24) & 255])
> -# define DO_CRC8b (tab[0][(q) & 255] ^ \
> +# define DO_CRC4 (tab[0][(q) & 255] ^ \
> tab[1][(q >> 8) & 255] ^ \
> tab[2][(q >> 16) & 255] ^ \
> tab[3][(q >> 24) & 255])
> +# define DO_CRC8 (tab[4][(q) & 255] ^ \
> + tab[5][(q >> 8) & 255] ^ \
> + tab[6][(q >> 16) & 255] ^ \
> + tab[7][(q >> 24) & 255])
> # endif
> +
> +/* implements slicing-by-4 or slicing-by-8 algorithm */
> +static inline u32 crc32_body(u32 crc, unsigned char const *buf,
> + size_t len, const u32 (*tab)[256])
> +{
> const u32 *b;
> + u8 *p;
> + u32 q;
> size_t init_len;
> size_t middle_len;
> size_t rem_len;
> + size_t i;
>
> /* break buf into init_len bytes before and
> * rem_len bytes after a middle section with
> @@ -96,37 +96,34 @@ crc32_body(u32 crc, unsigned char const
> rem_len = (len - init_len) & 7;
> # endif
>
> - /* Align it */
> - if (unlikely(init_len)) {
> - do {
> - DO_CRC(*buf++);
> - } while (--init_len);
> - }
> - b = (const u32 *)buf;
> - for (--b; middle_len; --middle_len) {
> + /* process unaligned initial bytes */
> + for (i = 0; i < init_len; i++)
> + DO_CRC(*buf++);
> +
> + /* process aligned words */
> + b = (const u32 *)(buf - 4);
> +
> + for (i = 0; i < middle_len; i++) {
> # if CRC_LE_BITS == 32
> - crc ^= *++b; /* use pre increment for speed */
> - DO_CRC4;
> + /* slicing-by-4 algorithm */
> + q = crc ^ *++b; /* use pre increment for speed */
> + crc = DO_CRC4;
> # else
> - u32 q;
> + /* slicing-by-8 algorithm */
> q = crc ^ *++b;
> - crc = DO_CRC8a;
> + crc = DO_CRC8;
> q = *++b;
> - crc ^= DO_CRC8b;
> + crc ^= DO_CRC4;
> # endif
This really shows how close the two algorithms are.
> }
> - /* And the last few bytes */
> - if (rem_len) {
> - u8 *p = (u8 *)(b + 1) - 1;
> - do {
> - DO_CRC(*++p); /* use pre increment for speed */
> - } while (--rem_len);
> - }
> +
> + /* process unaligned remaining bytes */
> + p = (u8 *)(b + 1) - 1;
> +
> + for (i = 0; i < rem_len; i++)
> + DO_CRC(*++p); /* use pre increment for speed */
> +
> return crc;
> -#undef DO_CRC
> -#undef DO_CRC4
> -#undef DO_CRC8a
> -#undef DO_CRC8b
> }
> #endif
next prev parent reply other threads:[~2011-08-09 6:21 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-08-09 5:29 [patch v3 7/7] crc32: final-cleanup.diff Bob Pearson
2011-08-09 6:21 ` Bob Pearson [this message]
2011-08-09 17:39 ` Joakim Tjernlund
2011-08-09 23:05 ` Bob Pearson
2011-08-10 11:40 ` Joakim Tjernlund
2011-08-10 15:13 ` Bob Pearson
2011-08-10 15:48 ` Joakim Tjernlund
2011-08-10 21:57 ` Bob Pearson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='00d401cc565c$98eeab60$cacc0220$@systemfabricworks.com' \
--to=rpearson@systemfabricworks.com \
--cc=akpm@linux-foundation.org \
--cc=fzago@systemfabricworks.com \
--cc=joakim.tjernlund@transmode.se \
--cc=linux-kernel@vger.kernel.org \
--cc=linux@horizon.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox