From: Al Viro <viro@zeniv.linux.org.uk>
To: linux-sparse@vger.kernel.org
Cc: chriscli@google.com, torvalds@linux-foundation.org,
zxh@xh-zhang.com, ben.dooks@codethink.co.uk,
dan.carpenter@linaro.org, rf@opensource.cirrus.com
Subject: [PATCH 2/6] simplify the inlined side of nextchar()
Date: Tue, 31 Mar 2026 09:07:25 +0100 [thread overview]
Message-ID: <20260331080729.1378613-2-viro@zeniv.linux.org.uk> (raw)
In-Reply-To: <20260331080729.1378613-1-viro@zeniv.linux.org.uk>
* make sure that data stream->buffer + stream->size always points to
'\0'. That is enough to send nextchar() towards the slow path without
the need to check offset for overflow.
* replace stream->offset with stream->current - pointer to current
location in buffer rather than offset in it.
* have the increments of stream->current and stream->pos done before
we check whether we need to call nextchar_slow() (with nextchar_slow()
adjusted to be called with incremented ->current and ->pos).
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
tokenize.c | 71 +++++++++++++++++++++++++++---------------------------
1 file changed, 36 insertions(+), 35 deletions(-)
diff --git a/tokenize.c b/tokenize.c
index c3c6c234..7c12cf6e 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -47,7 +47,8 @@ unsigned int tabstop = 8;
#define BUFSIZE (8192)
typedef struct {
- int fd, offset, size;
+ unsigned char *current;
+ int fd, size;
int pos, line, nr;
int newline, whitespace;
struct token **tokenlist;
@@ -351,31 +352,34 @@ static struct token * alloc_token(stream_t *stream)
*/
static int nextchar_slow(stream_t *stream)
{
- int offset = stream->offset;
+ unsigned char *p = stream->current; // bumped by fast path
int size = stream->size;
int c;
- int spliced = 0, had_cr, had_backslash;
+ bool spliced = false, had_cr, had_backslash;
restart:
- had_cr = had_backslash = 0;
+ had_cr = had_backslash = false;
repeat:
- if (offset >= size) {
+ if (p > stream->buffer + size) {
if (stream->fd < 0)
goto got_eof;
size = read(stream->fd, stream->buffer, BUFSIZE);
if (size <= 0)
goto got_eof;
+ stream->buffer[size] = '\0'; // sentry
stream->size = size;
- stream->offset = offset = 0;
+ stream->current = stream->buffer;
+ p = stream->buffer + 1;
}
- c = stream->buffer[offset++];
+ c = p[-1];
if (had_cr)
goto check_lf;
if (c == '\r') {
- had_cr = 1;
+ had_cr = true;
+ p++;
goto repeat;
}
@@ -383,6 +387,7 @@ norm:
if (!had_backslash) {
switch (c) {
case '\t':
+ stream->pos--;
stream->pos += tabstop - stream->pos % tabstop;
break;
case '\n':
@@ -391,38 +396,40 @@ norm:
stream->newline = 1;
break;
case '\\':
- had_backslash = 1;
+ had_backslash = true;
stream->pos++;
+ p++;
goto repeat;
- default:
- stream->pos++;
}
} else {
if (c == '\n') {
stream->line++;
- stream->pos = 0;
- spliced = 1;
+ stream->pos = 1;
+ spliced = true;
+ p++;
goto restart;
}
- offset--;
c = '\\';
+ stream->pos--;
+ p--;
}
-out:
- stream->offset = offset;
+ stream->current = p;
return c;
-check_lf:
+check_lf: // CR+LF => LF, solitary CR => LF
if (c != '\n')
- offset--;
+ p--;
c = '\n';
goto norm;
got_eof:
- if (had_backslash) {
- c = '\\';
- goto out;
- }
+ stream->pos--;
+ stream->buffer[0] = '\0'; // sentry
+ stream->current = stream->buffer;
+ stream->size = 0;
+ if (had_backslash)
+ return '\\';
if (stream->pos & Wnewline_eof)
warning(stream_pos(stream), "no newline at end of file");
else if (spliced)
@@ -437,16 +444,10 @@ got_eof:
*/
static inline int nextchar(stream_t *stream)
{
- int offset = stream->offset;
-
- if (offset < stream->size) {
- int c = stream->buffer[offset++];
- if (c >= ' ' && c != '\\') {
- stream->offset = offset;
- stream->pos++;
- return c;
- }
- }
+ int c = *stream->current++;
+ stream->pos++;
+ if (c != '\\' && c >= ' ')
+ return c;
return nextchar_slow(stream);
}
@@ -972,9 +973,8 @@ static struct token *setup_stream(stream_t *stream, int idx, int fd,
stream->token = NULL;
stream->fd = fd;
- stream->offset = 0;
stream->size = buf_size;
- stream->buffer = buf;
+ stream->current = stream->buffer = buf;
begin = alloc_token(stream);
token_type(begin) = TOKEN_STREAMBEGIN;
@@ -1014,7 +1014,7 @@ struct token * tokenize(const struct position *pos, const char *name, int fd, st
{
struct token *begin, *end;
stream_t stream;
- unsigned char buffer[BUFSIZE];
+ unsigned char buffer[BUFSIZE + 1];
int idx;
idx = init_stream(pos, name, fd, next_path);
@@ -1023,6 +1023,7 @@ struct token * tokenize(const struct position *pos, const char *name, int fd, st
return endtoken;
}
+ buffer[0] = '\0';
begin = setup_stream(&stream, idx, fd, buffer, 0);
end = tokenize_stream(&stream);
if (endtoken)
--
2.47.3
next prev parent reply other threads:[~2026-03-31 8:03 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <cover.1771930766.git.dan.carpenter@linaro.org>
2026-02-24 11:07 ` [PATCH] sparse: add support for __VA_OPT__ Dan Carpenter
2026-02-24 11:16 ` Ben Dooks
2026-02-24 11:56 ` Dan Carpenter
2026-02-24 12:42 ` Richard Fitzgerald
2026-02-24 13:15 ` Ben Dooks
2026-02-25 2:39 ` Chris Li
2026-02-25 3:36 ` Al Viro
2026-02-25 5:29 ` [RFC PATCH] pre-process: add __VA_OPT__ support Eric Zhang
2026-02-25 6:40 ` Al Viro
2026-02-25 7:27 ` Al Viro
2026-02-25 8:14 ` Eric Zhang
2026-02-25 22:18 ` Al Viro
2026-02-26 7:29 ` Al Viro
2026-03-16 6:56 ` Al Viro
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
2026-03-16 7:03 ` [PATCH 02/21] expand and simplify the call of dup_token() in copy() Al Viro
2026-03-16 7:03 ` [PATCH 03/21] more dup_token() optimizations Al Viro
2026-03-16 7:03 ` [PATCH 04/21] parsing #define: saner handling of argument count, part 1 Al Viro
2026-03-16 7:03 ` [PATCH 05/21] simplify collect_arguments() and fix error handling there Al Viro
2026-03-16 7:04 ` [PATCH 06/21] try_arg(): don't use arglist for argument name lookups Al Viro
2026-03-16 7:04 ` [PATCH 07/21] make expand_has_...() responsible for expanding its argument Al Viro
2026-03-16 7:04 ` [PATCH 08/21] preparing to change argument number encoding for TOKEN_..._ARGUMENT Al Viro
2026-03-16 7:04 ` [PATCH 09/21] steal 2 bits from argnum for argument kind Al Viro
2026-03-16 7:04 ` [PATCH 10/21] on-demand argument expansion Al Viro
2026-03-16 7:04 ` [PATCH 11/21] kill create_arglist() Al Viro
2026-03-16 7:04 ` [PATCH 12/21] stop mangling arglist, get rid of TOKEN_ARG_COUNT Al Viro
2026-03-16 7:04 ` [PATCH 13/21] deal with ## on arguments separately Al Viro
2026-03-16 7:04 ` [PATCH 14/21] preparations for __VA_OPT__ support: reshuffle argument slot assignments Al Viro
2026-03-16 7:04 ` [PATCH 15/21] pre-process.c: split try_arg() Al Viro
2026-03-16 7:04 ` [PATCH 16/21] __VA_OPT__: parsing Al Viro
2026-03-16 7:04 ` [PATCH 17/21] expansion-time va_opt handling Al Viro
2026-03-16 7:04 ` [PATCH 18/21] merge(): saner handling of ->noexpand Al Viro
2026-03-16 7:04 ` [PATCH 19/21] simplify the calling conventions of collect_arguments() Al Viro
2026-03-16 7:04 ` [PATCH 20/21] make expand_one_symbol() inline Al Viro
2026-03-16 7:04 ` [PATCH 21/21] substitute(): convert switch() into cascade of ifs Al Viro
2026-03-16 16:42 ` [RFC PATCH] pre-process: add __VA_OPT__ support Linus Torvalds
2026-03-19 3:53 ` Al Viro
2026-03-19 4:07 ` Linus Torvalds
2026-03-19 5:34 ` Al Viro
2026-03-17 7:41 ` Chris Li
2026-03-18 6:35 ` Eric Zhang
2026-03-31 8:06 ` Al Viro
2026-03-31 8:07 ` [PATCH 1/6] nextchar(): get rid of special[] Al Viro
2026-03-31 8:07 ` Al Viro [this message]
2026-03-31 8:07 ` [PATCH 3/6] tokenize_stream(): don't bother with isspace() Al Viro
2026-03-31 8:07 ` [PATCH 4/6] TOKEN_DIRECTIVE: recognize directive-introducing # in tokenizer Al Viro
2026-03-31 8:07 ` [PATCH 5/6] saner collect_arg() code generation Al Viro
2026-03-31 8:07 ` [PATCH 6/6] try to get whitespaces right Al Viro
2026-04-01 10:39 ` [RFC PATCH] pre-process: add __VA_OPT__ support Al Viro
2026-04-01 16:18 ` Linus Torvalds
2026-04-01 19:52 ` Al Viro
2026-04-01 20:22 ` Al Viro
2026-02-25 7:05 ` [PATCH] sparse: add support for __VA_OPT__ Chris Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260331080729.1378613-2-viro@zeniv.linux.org.uk \
--to=viro@zeniv.linux.org.uk \
--cc=ben.dooks@codethink.co.uk \
--cc=chriscli@google.com \
--cc=dan.carpenter@linaro.org \
--cc=linux-sparse@vger.kernel.org \
--cc=rf@opensource.cirrus.com \
--cc=torvalds@linux-foundation.org \
--cc=zxh@xh-zhang.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox