public inbox for linux-sparse@vger.kernel.org
 help / color / mirror / Atom feed
From: Al Viro <viro@zeniv.linux.org.uk>
To: linux-sparse@vger.kernel.org
Cc: chriscli@google.com, torvalds@linux-foundation.org,
	zxh@xh-zhang.com, ben.dooks@codethink.co.uk,
	dan.carpenter@linaro.org, rf@opensource.cirrus.com
Subject: [PATCH 2/6] simplify the inlined side of nextchar()
Date: Tue, 31 Mar 2026 09:07:25 +0100	[thread overview]
Message-ID: <20260331080729.1378613-2-viro@zeniv.linux.org.uk> (raw)
In-Reply-To: <20260331080729.1378613-1-viro@zeniv.linux.org.uk>

* make sure that data stream->buffer + stream->size always points to
'\0'.  That is enough to send nextchar() towards the slow path without
the need to check offset for overflow.
* replace stream->offset with stream->current - pointer to current
location in buffer rather than offset in it.
* have the increments of stream->current and stream->pos done before
we check whether we need to call nextchar_slow() (with nextchar_slow()
adjusted to be called with incremented ->current and ->pos).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 tokenize.c | 71 +++++++++++++++++++++++++++---------------------------
 1 file changed, 36 insertions(+), 35 deletions(-)

diff --git a/tokenize.c b/tokenize.c
index c3c6c234..7c12cf6e 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -47,7 +47,8 @@ unsigned int tabstop = 8;
 #define BUFSIZE (8192)
 
 typedef struct {
-	int fd, offset, size;
+	unsigned char *current;
+	int fd, size;
 	int pos, line, nr;
 	int newline, whitespace;
 	struct token **tokenlist;
@@ -351,31 +352,34 @@ static struct token * alloc_token(stream_t *stream)
  */
 static int nextchar_slow(stream_t *stream)
 {
-	int offset = stream->offset;
+	unsigned char *p = stream->current;	// bumped by fast path
 	int size = stream->size;
 	int c;
-	int spliced = 0, had_cr, had_backslash;
+	bool spliced = false, had_cr, had_backslash;
 
 restart:
-	had_cr = had_backslash = 0;
+	had_cr = had_backslash = false;
 
 repeat:
-	if (offset >= size) {
+	if (p > stream->buffer + size) {
 		if (stream->fd < 0)
 			goto got_eof;
 		size = read(stream->fd, stream->buffer, BUFSIZE);
 		if (size <= 0)
 			goto got_eof;
+		stream->buffer[size] = '\0';	// sentry
 		stream->size = size;
-		stream->offset = offset = 0;
+		stream->current = stream->buffer;
+		p = stream->buffer + 1;
 	}
 
-	c = stream->buffer[offset++];
+	c = p[-1];
 	if (had_cr)
 		goto check_lf;
 
 	if (c == '\r') {
-		had_cr = 1;
+		had_cr = true;
+		p++;
 		goto repeat;
 	}
 
@@ -383,6 +387,7 @@ norm:
 	if (!had_backslash) {
 		switch (c) {
 		case '\t':
+			stream->pos--;
 			stream->pos += tabstop - stream->pos % tabstop;
 			break;
 		case '\n':
@@ -391,38 +396,40 @@ norm:
 			stream->newline = 1;
 			break;
 		case '\\':
-			had_backslash = 1;
+			had_backslash = true;
 			stream->pos++;
+			p++;
 			goto repeat;
-		default:
-			stream->pos++;
 		}
 	} else {
 		if (c == '\n') {
 			stream->line++;
-			stream->pos = 0;
-			spliced = 1;
+			stream->pos = 1;
+			spliced = true;
+			p++;
 			goto restart;
 		}
-		offset--;
 		c = '\\';
+		stream->pos--;
+		p--;
 	}
-out:
-	stream->offset = offset;
+	stream->current = p;
 
 	return c;
 
-check_lf:
+check_lf:	// CR+LF => LF, solitary CR => LF
 	if (c != '\n')
-		offset--;
+		p--;
 	c = '\n';
 	goto norm;
 
 got_eof:
-	if (had_backslash) {
-		c = '\\';
-		goto out;
-	}
+	stream->pos--;
+	stream->buffer[0] = '\0';	// sentry
+	stream->current = stream->buffer;
+	stream->size = 0;
+	if (had_backslash)
+		return '\\';
 	if (stream->pos & Wnewline_eof)
 		warning(stream_pos(stream), "no newline at end of file");
 	else if (spliced)
@@ -437,16 +444,10 @@ got_eof:
  */
 static inline int nextchar(stream_t *stream)
 {
-	int offset = stream->offset;
-
-	if (offset < stream->size) {
-		int c = stream->buffer[offset++];
-		if (c >= ' ' && c != '\\') {
-			stream->offset = offset;
-			stream->pos++;
-			return c;
-		}
-	}
+	int c = *stream->current++;
+	stream->pos++;
+	if (c != '\\' && c >= ' ')
+		return c;
 	return nextchar_slow(stream);
 }
 
@@ -972,9 +973,8 @@ static struct token *setup_stream(stream_t *stream, int idx, int fd,
 
 	stream->token = NULL;
 	stream->fd = fd;
-	stream->offset = 0;
 	stream->size = buf_size;
-	stream->buffer = buf;
+	stream->current = stream->buffer = buf;
 
 	begin = alloc_token(stream);
 	token_type(begin) = TOKEN_STREAMBEGIN;
@@ -1014,7 +1014,7 @@ struct token * tokenize(const struct position *pos, const char *name, int fd, st
 {
 	struct token *begin, *end;
 	stream_t stream;
-	unsigned char buffer[BUFSIZE];
+	unsigned char buffer[BUFSIZE + 1];
 	int idx;
 
 	idx = init_stream(pos, name, fd, next_path);
@@ -1023,6 +1023,7 @@ struct token * tokenize(const struct position *pos, const char *name, int fd, st
 		return endtoken;
 	}
 
+	buffer[0] = '\0';
 	begin = setup_stream(&stream, idx, fd, buffer, 0);
 	end = tokenize_stream(&stream);
 	if (endtoken)
-- 
2.47.3


  reply	other threads:[~2026-03-31  8:03 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <cover.1771930766.git.dan.carpenter@linaro.org>
2026-02-24 11:07 ` [PATCH] sparse: add support for __VA_OPT__ Dan Carpenter
2026-02-24 11:16   ` Ben Dooks
2026-02-24 11:56     ` Dan Carpenter
2026-02-24 12:42       ` Richard Fitzgerald
2026-02-24 13:15         ` Ben Dooks
2026-02-25  2:39   ` Chris Li
2026-02-25  3:36     ` Al Viro
2026-02-25  5:29       ` [RFC PATCH] pre-process: add __VA_OPT__ support Eric Zhang
2026-02-25  6:40         ` Al Viro
2026-02-25  7:27           ` Al Viro
2026-02-25  8:14             ` Eric Zhang
2026-02-25 22:18               ` Al Viro
2026-02-26  7:29                 ` Al Viro
2026-03-16  6:56                   ` Al Viro
2026-03-16  7:03                     ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
2026-03-16  7:03                       ` [PATCH 02/21] expand and simplify the call of dup_token() in copy() Al Viro
2026-03-16  7:03                       ` [PATCH 03/21] more dup_token() optimizations Al Viro
2026-03-16  7:03                       ` [PATCH 04/21] parsing #define: saner handling of argument count, part 1 Al Viro
2026-03-16  7:03                       ` [PATCH 05/21] simplify collect_arguments() and fix error handling there Al Viro
2026-03-16  7:04                       ` [PATCH 06/21] try_arg(): don't use arglist for argument name lookups Al Viro
2026-03-16  7:04                       ` [PATCH 07/21] make expand_has_...() responsible for expanding its argument Al Viro
2026-03-16  7:04                       ` [PATCH 08/21] preparing to change argument number encoding for TOKEN_..._ARGUMENT Al Viro
2026-03-16  7:04                       ` [PATCH 09/21] steal 2 bits from argnum for argument kind Al Viro
2026-03-16  7:04                       ` [PATCH 10/21] on-demand argument expansion Al Viro
2026-03-16  7:04                       ` [PATCH 11/21] kill create_arglist() Al Viro
2026-03-16  7:04                       ` [PATCH 12/21] stop mangling arglist, get rid of TOKEN_ARG_COUNT Al Viro
2026-03-16  7:04                       ` [PATCH 13/21] deal with ## on arguments separately Al Viro
2026-03-16  7:04                       ` [PATCH 14/21] preparations for __VA_OPT__ support: reshuffle argument slot assignments Al Viro
2026-03-16  7:04                       ` [PATCH 15/21] pre-process.c: split try_arg() Al Viro
2026-03-16  7:04                       ` [PATCH 16/21] __VA_OPT__: parsing Al Viro
2026-03-16  7:04                       ` [PATCH 17/21] expansion-time va_opt handling Al Viro
2026-03-16  7:04                       ` [PATCH 18/21] merge(): saner handling of ->noexpand Al Viro
2026-03-16  7:04                       ` [PATCH 19/21] simplify the calling conventions of collect_arguments() Al Viro
2026-03-16  7:04                       ` [PATCH 20/21] make expand_one_symbol() inline Al Viro
2026-03-16  7:04                       ` [PATCH 21/21] substitute(): convert switch() into cascade of ifs Al Viro
2026-03-16 16:42                     ` [RFC PATCH] pre-process: add __VA_OPT__ support Linus Torvalds
2026-03-19  3:53                       ` Al Viro
2026-03-19  4:07                         ` Linus Torvalds
2026-03-19  5:34                           ` Al Viro
2026-03-17  7:41                     ` Chris Li
2026-03-18  6:35                     ` Eric Zhang
2026-03-31  8:06                     ` Al Viro
2026-03-31  8:07                       ` [PATCH 1/6] nextchar(): get rid of special[] Al Viro
2026-03-31  8:07                         ` Al Viro [this message]
2026-03-31  8:07                         ` [PATCH 3/6] tokenize_stream(): don't bother with isspace() Al Viro
2026-03-31  8:07                         ` [PATCH 4/6] TOKEN_DIRECTIVE: recognize directive-introducing # in tokenizer Al Viro
2026-03-31  8:07                         ` [PATCH 5/6] saner collect_arg() code generation Al Viro
2026-03-31  8:07                         ` [PATCH 6/6] try to get whitespaces right Al Viro
2026-04-01 10:39                       ` [RFC PATCH] pre-process: add __VA_OPT__ support Al Viro
2026-04-01 16:18                         ` Linus Torvalds
2026-04-01 19:52                           ` Al Viro
2026-04-01 20:22                             ` Al Viro
2026-02-25  7:05       ` [PATCH] sparse: add support for __VA_OPT__ Chris Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260331080729.1378613-2-viro@zeniv.linux.org.uk \
    --to=viro@zeniv.linux.org.uk \
    --cc=ben.dooks@codethink.co.uk \
    --cc=chriscli@google.com \
    --cc=dan.carpenter@linaro.org \
    --cc=linux-sparse@vger.kernel.org \
    --cc=rf@opensource.cirrus.com \
    --cc=torvalds@linux-foundation.org \
    --cc=zxh@xh-zhang.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox