From: Al Viro <viro@ftp.linux.org.uk>
To: Linus Torvalds <torvalds@osdl.org>
Cc: linux-sparse@vger.kernel.org
Subject: [PATCH] switch to hash-based get_one_special()
Date: Sun, 1 Oct 2006 19:55:02 +0100 [thread overview]
Message-ID: <20061001185502.GA29920@ftp.linux.org.uk> (raw)
Subject: [PATCH] switch to hash-based get_one_special()
Weird, but true: the set of C two-character punctuators and
two-symbol prefices of three-character punctuators is
distinguishable by 5-bit hash function (27 out of 32).
Application is obvious - we get much faster get_one_special()
out of that...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
[and now all patches older than a month are gone]
token.h | 18 +++++++-------
tokenize.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++---------
2 files changed, 75 insertions(+), 21 deletions(-)
diff --git a/token.h b/token.h
index 96b0c41..71ef151 100644
--- a/token.h
+++ b/token.h
@@ -88,13 +88,13 @@ #define COMBINATION_STRINGS { \
"*=", \
"/=", \
"%=", \
- "..", "...", \
- "<=", "<<", "<<=", \
- ">=", ">>", ">>=", \
+ "<=", ">=", \
"==", "!=", \
"&&", "&=", \
"||", "|=", \
"^=", "##", \
+ "<<", ">>", "..", \
+ "<<=", ">>=", "..." \
"", \
"<", ">", "<=", ">=" \
}
@@ -111,14 +111,8 @@ enum special_token {
SPECIAL_MUL_ASSIGN,
SPECIAL_DIV_ASSIGN,
SPECIAL_MOD_ASSIGN,
- SPECIAL_DOTDOT,
- SPECIAL_ELLIPSIS,
SPECIAL_LTE,
- SPECIAL_LEFTSHIFT,
- SPECIAL_SHL_ASSIGN,
SPECIAL_GTE,
- SPECIAL_RIGHTSHIFT,
- SPECIAL_SHR_ASSIGN,
SPECIAL_EQUAL,
SPECIAL_NOTEQUAL,
SPECIAL_LOGICAL_AND,
@@ -127,6 +121,12 @@ enum special_token {
SPECIAL_OR_ASSIGN,
SPECIAL_XOR_ASSIGN,
SPECIAL_HASHHASH,
+ SPECIAL_LEFTSHIFT,
+ SPECIAL_RIGHTSHIFT,
+ SPECIAL_DOTDOT,
+ SPECIAL_SHL_ASSIGN,
+ SPECIAL_SHR_ASSIGN,
+ SPECIAL_ELLIPSIS,
SPECIAL_ARG_SEPARATOR,
SPECIAL_UNSIGNED_LT,
SPECIAL_UNSIGNED_GT,
diff --git a/tokenize.c b/tokenize.c
index 497da13..67f5a67 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -283,7 +283,7 @@ got_eof:
* Slow path (including the logics with line-splicing and EOF sanity
* checks) is in nextchar_slow().
*/
-static int nextchar(stream_t *stream)
+static inline int nextchar(stream_t *stream)
{
int offset = stream->offset;
@@ -615,12 +615,68 @@ unsigned char combinations[][3] = COMBIN
#define NR_COMBINATIONS (SPECIAL_ARG_SEPARATOR - SPECIAL_BASE)
+/* hash function for two-character punctuators - all give unique values */
+#define special_hash(c0, c1) (((c0*8+c1*2)+((c0*8+c1*2)>>5))&31)
+
+/*
+ * note that we won't get false positives - special_hash(0,0) is 0 and
+ * entry 0 is filled (by +=), so all the missing ones are OK.
+ */
+static unsigned char hash_results[32][2] = {
+#define RES(c0, c1) [special_hash(c0, c1)] = {c0, c1}
+ RES('+', '='), /* 00 */
+ RES('/', '='), /* 01 */
+ RES('^', '='), /* 05 */
+ RES('&', '&'), /* 07 */
+ RES('#', '#'), /* 08 */
+ RES('<', '<'), /* 0a */
+ RES('<', '='), /* 0c */
+ RES('!', '='), /* 0e */
+ RES('%', '='), /* 0f */
+ RES('-', '-'), /* 10 */
+ RES('-', '='), /* 11 */
+ RES('-', '>'), /* 13 */
+ RES('=', '='), /* 15 */
+ RES('&', '='), /* 17 */
+ RES('*', '='), /* 18 */
+ RES('.', '.'), /* 1a */
+ RES('+', '+'), /* 1b */
+ RES('|', '='), /* 1c */
+ RES('>', '='), /* 1d */
+ RES('|', '|'), /* 1e */
+ RES('>', '>') /* 1f */
+#undef RES
+};
+static int code[32] = {
+#define CODE(c0, c1, value) [special_hash(c0, c1)] = value
+ CODE('+', '=', SPECIAL_ADD_ASSIGN), /* 00 */
+ CODE('/', '=', SPECIAL_DIV_ASSIGN), /* 01 */
+ CODE('^', '=', SPECIAL_XOR_ASSIGN), /* 05 */
+ CODE('&', '&', SPECIAL_LOGICAL_AND), /* 07 */
+ CODE('#', '#', SPECIAL_HASHHASH), /* 08 */
+ CODE('<', '<', SPECIAL_LEFTSHIFT), /* 0a */
+ CODE('<', '=', SPECIAL_LTE), /* 0c */
+ CODE('!', '=', SPECIAL_NOTEQUAL), /* 0e */
+ CODE('%', '=', SPECIAL_MOD_ASSIGN), /* 0f */
+ CODE('-', '-', SPECIAL_DECREMENT), /* 10 */
+ CODE('-', '=', SPECIAL_SUB_ASSIGN), /* 11 */
+ CODE('-', '>', SPECIAL_DEREFERENCE), /* 13 */
+ CODE('=', '=', SPECIAL_EQUAL), /* 15 */
+ CODE('&', '=', SPECIAL_AND_ASSIGN), /* 17 */
+ CODE('*', '=', SPECIAL_MUL_ASSIGN), /* 18 */
+ CODE('.', '.', SPECIAL_DOTDOT), /* 1a */
+ CODE('+', '+', SPECIAL_INCREMENT), /* 1b */
+ CODE('|', '=', SPECIAL_OR_ASSIGN), /* 1c */
+ CODE('>', '=', SPECIAL_GTE), /* 1d */
+ CODE('|', '|', SPECIAL_LOGICAL_OR), /* 1e */
+ CODE('>', '>', SPECIAL_RIGHTSHIFT) /* 1f */
+#undef CODE
+};
+
static int get_one_special(int c, stream_t *stream)
{
struct token *token;
- unsigned char c1, c2, c3;
int next, value, i;
- unsigned char *comb;
next = nextchar(stream);
@@ -648,17 +704,15 @@ static int get_one_special(int c, stream
*/
value = c;
if (cclass[next + 1] & ValidSecond) {
- comb = combinations[0];
- c1 = c; c2 = next; c3 = 0;
- for (i = 0; i < NR_COMBINATIONS; i++) {
- if (comb[0] == c1 && comb[1] == c2 && comb[2] == c3) {
- value = i + SPECIAL_BASE;
+ i = special_hash(c, next);
+ if (hash_results[i][0] == c && hash_results[i][1] == next) {
+ value = code[i];
+ next = nextchar(stream);
+ if (value >= SPECIAL_LEFTSHIFT &&
+ next == "==."[value - SPECIAL_LEFTSHIFT]) {
+ value += 3;
next = nextchar(stream);
- if (c3)
- break;
- c3 = next;
}
- comb += 3;
}
}
--
1.4.2.GIT
next reply other threads:[~2006-10-01 18:55 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-10-01 18:55 Al Viro [this message]
2006-10-24 15:42 ` [PATCH] switch to hash-based get_one_special() Josh Triplett
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061001185502.GA29920@ftp.linux.org.uk \
--to=viro@ftp.linux.org.uk \
--cc=linux-sparse@vger.kernel.org \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).