From: Al Viro <viro@ftp.linux.org.uk>
To: Linus Torvalds <torvalds@osdl.org>
Cc: linux-sparse@vger.kernel.org
Subject: [PATCH] switch to hash-based get_one_special()
Date: Sun, 1 Oct 2006 19:55:02 +0100 [thread overview]
Message-ID: <20061001185502.GA29920@ftp.linux.org.uk> (raw)
Subject: [PATCH] switch to hash-based get_one_special()
Weird, but true: the set of C two-character punctuators and
two-symbol prefices of three-character punctuators is
distinguishable by 5-bit hash function (27 out of 32).
Application is obvious - we get much faster get_one_special()
out of that...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
[and now all patches older than a month are gone]
token.h | 18 +++++++-------
tokenize.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++---------
2 files changed, 75 insertions(+), 21 deletions(-)
diff --git a/token.h b/token.h
index 96b0c41..71ef151 100644
--- a/token.h
+++ b/token.h
@@ -88,13 +88,13 @@ #define COMBINATION_STRINGS { \
"*=", \
"/=", \
"%=", \
- "..", "...", \
- "<=", "<<", "<<=", \
- ">=", ">>", ">>=", \
+ "<=", ">=", \
"==", "!=", \
"&&", "&=", \
"||", "|=", \
"^=", "##", \
+ "<<", ">>", "..", \
+ "<<=", ">>=", "..." \
"", \
"<", ">", "<=", ">=" \
}
@@ -111,14 +111,8 @@ enum special_token {
SPECIAL_MUL_ASSIGN,
SPECIAL_DIV_ASSIGN,
SPECIAL_MOD_ASSIGN,
- SPECIAL_DOTDOT,
- SPECIAL_ELLIPSIS,
SPECIAL_LTE,
- SPECIAL_LEFTSHIFT,
- SPECIAL_SHL_ASSIGN,
SPECIAL_GTE,
- SPECIAL_RIGHTSHIFT,
- SPECIAL_SHR_ASSIGN,
SPECIAL_EQUAL,
SPECIAL_NOTEQUAL,
SPECIAL_LOGICAL_AND,
@@ -127,6 +121,12 @@ enum special_token {
SPECIAL_OR_ASSIGN,
SPECIAL_XOR_ASSIGN,
SPECIAL_HASHHASH,
+ SPECIAL_LEFTSHIFT,
+ SPECIAL_RIGHTSHIFT,
+ SPECIAL_DOTDOT,
+ SPECIAL_SHL_ASSIGN,
+ SPECIAL_SHR_ASSIGN,
+ SPECIAL_ELLIPSIS,
SPECIAL_ARG_SEPARATOR,
SPECIAL_UNSIGNED_LT,
SPECIAL_UNSIGNED_GT,
diff --git a/tokenize.c b/tokenize.c
index 497da13..67f5a67 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -283,7 +283,7 @@ got_eof:
* Slow path (including the logics with line-splicing and EOF sanity
* checks) is in nextchar_slow().
*/
-static int nextchar(stream_t *stream)
+static inline int nextchar(stream_t *stream)
{
int offset = stream->offset;
@@ -615,12 +615,68 @@ unsigned char combinations[][3] = COMBIN
#define NR_COMBINATIONS (SPECIAL_ARG_SEPARATOR - SPECIAL_BASE)
+/* hash function for two-character punctuators - all give unique values */
+#define special_hash(c0, c1) (((c0*8+c1*2)+((c0*8+c1*2)>>5))&31)
+
+/*
+ * note that we won't get false positives - special_hash(0,0) is 0 and
+ * entry 0 is filled (by +=), so all the missing ones are OK.
+ */
+static unsigned char hash_results[32][2] = {
+#define RES(c0, c1) [special_hash(c0, c1)] = {c0, c1}
+ RES('+', '='), /* 00 */
+ RES('/', '='), /* 01 */
+ RES('^', '='), /* 05 */
+ RES('&', '&'), /* 07 */
+ RES('#', '#'), /* 08 */
+ RES('<', '<'), /* 0a */
+ RES('<', '='), /* 0c */
+ RES('!', '='), /* 0e */
+ RES('%', '='), /* 0f */
+ RES('-', '-'), /* 10 */
+ RES('-', '='), /* 11 */
+ RES('-', '>'), /* 13 */
+ RES('=', '='), /* 15 */
+ RES('&', '='), /* 17 */
+ RES('*', '='), /* 18 */
+ RES('.', '.'), /* 1a */
+ RES('+', '+'), /* 1b */
+ RES('|', '='), /* 1c */
+ RES('>', '='), /* 1d */
+ RES('|', '|'), /* 1e */
+ RES('>', '>') /* 1f */
+#undef RES
+};
+static int code[32] = {
+#define CODE(c0, c1, value) [special_hash(c0, c1)] = value
+ CODE('+', '=', SPECIAL_ADD_ASSIGN), /* 00 */
+ CODE('/', '=', SPECIAL_DIV_ASSIGN), /* 01 */
+ CODE('^', '=', SPECIAL_XOR_ASSIGN), /* 05 */
+ CODE('&', '&', SPECIAL_LOGICAL_AND), /* 07 */
+ CODE('#', '#', SPECIAL_HASHHASH), /* 08 */
+ CODE('<', '<', SPECIAL_LEFTSHIFT), /* 0a */
+ CODE('<', '=', SPECIAL_LTE), /* 0c */
+ CODE('!', '=', SPECIAL_NOTEQUAL), /* 0e */
+ CODE('%', '=', SPECIAL_MOD_ASSIGN), /* 0f */
+ CODE('-', '-', SPECIAL_DECREMENT), /* 10 */
+ CODE('-', '=', SPECIAL_SUB_ASSIGN), /* 11 */
+ CODE('-', '>', SPECIAL_DEREFERENCE), /* 13 */
+ CODE('=', '=', SPECIAL_EQUAL), /* 15 */
+ CODE('&', '=', SPECIAL_AND_ASSIGN), /* 17 */
+ CODE('*', '=', SPECIAL_MUL_ASSIGN), /* 18 */
+ CODE('.', '.', SPECIAL_DOTDOT), /* 1a */
+ CODE('+', '+', SPECIAL_INCREMENT), /* 1b */
+ CODE('|', '=', SPECIAL_OR_ASSIGN), /* 1c */
+ CODE('>', '=', SPECIAL_GTE), /* 1d */
+ CODE('|', '|', SPECIAL_LOGICAL_OR), /* 1e */
+ CODE('>', '>', SPECIAL_RIGHTSHIFT) /* 1f */
+#undef CODE
+};
+
static int get_one_special(int c, stream_t *stream)
{
struct token *token;
- unsigned char c1, c2, c3;
int next, value, i;
- unsigned char *comb;
next = nextchar(stream);
@@ -648,17 +704,15 @@ static int get_one_special(int c, stream
*/
value = c;
if (cclass[next + 1] & ValidSecond) {
- comb = combinations[0];
- c1 = c; c2 = next; c3 = 0;
- for (i = 0; i < NR_COMBINATIONS; i++) {
- if (comb[0] == c1 && comb[1] == c2 && comb[2] == c3) {
- value = i + SPECIAL_BASE;
+ i = special_hash(c, next);
+ if (hash_results[i][0] == c && hash_results[i][1] == next) {
+ value = code[i];
+ next = nextchar(stream);
+ if (value >= SPECIAL_LEFTSHIFT &&
+ next == "==."[value - SPECIAL_LEFTSHIFT]) {
+ value += 3;
next = nextchar(stream);
- if (c3)
- break;
- c3 = next;
}
- comb += 3;
}
}
--
1.4.2.GIT
next reply other threads:[~2006-10-01 18:55 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-10-01 18:55 Al Viro [this message]
2006-10-24 15:42 ` [PATCH] switch to hash-based get_one_special() Josh Triplett
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061001185502.GA29920@ftp.linux.org.uk \
--to=viro@ftp.linux.org.uk \
--cc=linux-sparse@vger.kernel.org \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.