public inbox for linux-sparse@vger.kernel.org
 help / color / mirror / Atom feed
From: Al Viro <viro@zeniv.linux.org.uk>
To: linux-sparse@vger.kernel.org
Cc: chriscli@google.com, torvalds@linux-foundation.org,
	zxh@xh-zhang.com, ben.dooks@codethink.co.uk,
	dan.carpenter@linaro.org, rf@opensource.cirrus.com
Subject: [PATCH 04/21] parsing #define: saner handling of argument count, part 1
Date: Mon, 16 Mar 2026 07:03:58 +0000	[thread overview]
Message-ID: <20260316070415.768839-4-viro@zeniv.linux.org.uk> (raw)
In-Reply-To: <20260316070415.768839-1-viro@zeniv.linux.org.uk>

Mangling arglist the way we do is a bad kludge and it limits what we
can do both at parsing a macro definition and at expansion time.

We use it to
	* store the number of arguments (gets stashed in the cannibalized
token of opening parenthesis)
	* store the number of times each argument is used expanded,
unexpanded and stringified (stashed in cannibalized token of comma or
closing parenthesis that follows an argument)
	* mark the vararg argument (ditto)

Total number of arguments would be better off in struct symbol, next
to arglist.  As the matter of fact, we'd be better off with number of
non-vararg arguments and "is there a vararg" stored separately.

Number of times each argument occurs expanded, etc. is used to find if
given occurrence of argument in the body is the last one of given sort
- by counting down as we process the body during expansion, no less.
Each counter runs down at the same token of the body every time we expand
the macro, and we can just as easily mark those tokens when we parse the
definition.  It is also used to tell whether we need to expand and/or
stringify the argument in the first place.  Again, easily expressed
as marking the tokens and we can easily steal bits for TOKEN_..._ARG
payload - we have a 32bit value that represents the argument's number.

"Is it a vararg argument" flag is used both at definition parsing time
(when we would be better off with "the index of vararg argument or -1
if there's none") and at expansion time, when we collect the arguments.
There we pass those values to collect_argument(), telling it whether it
should stop on (unprotected) commas.  The current logics is seriously
convoluted, especially around the error recovery.  Untangling that ends
up with a variant that wants to know the number of non-vararg arguments
along with "do we have a vararg at all" flag, upfront and not scattered
through the arglist.

As the first step, introduce sym->fixed_args and sym->vararg and have
them calculated when we parse a macro definition; stop storing the number
of arguments in the first token of arglist.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 pre-process.c | 64 +++++++++++++++++++++++++++++++++++----------------
 symbol.h      |  1 +
 2 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/pre-process.c b/pre-process.c
index 320a5247..d591a183 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -312,9 +312,10 @@ struct arg {
 	int n_str;
 };
 
-static int collect_arguments(struct token *start, struct token *arglist, struct arg *args, struct token *what)
+static int collect_arguments(struct token *start, struct symbol *sym, struct arg *args, struct token *what)
 {
-	int wanted = arglist->count.normal;
+	struct token *arglist = sym->arglist;
+	int wanted = sym->fixed_args + sym->vararg;
 	struct token *next = NULL;
 	int count = 0;
 
@@ -760,7 +761,7 @@ static int expand(struct token **list, struct symbol *sym)
 	struct ident *expanding = token->ident;
 	struct token **tail;
 	struct token *expansion = sym->expansion;
-	int nargs = sym->arglist ? sym->arglist->count.normal : 0;
+	int nargs = sym->fixed_args + sym->vararg;
 	struct arg args[nargs];
 
 	if (expanding->tainted) {
@@ -771,7 +772,7 @@ static int expand(struct token **list, struct symbol *sym)
 	if (sym->arglist) {
 		if (!match_op(scan_next(&token->next), '('))
 			return 1;
-		if (!collect_arguments(token->next, sym->arglist, args, token))
+		if (!collect_arguments(token->next, sym, args, token))
 			return 1;
 		expand_arguments(nargs, args);
 	}
@@ -1087,6 +1088,21 @@ static int token_list_different(struct token *list1, struct token *list2)
 	}
 }
 
+static int macro_nargs = 0;
+static int macro_vararg = -1;
+static bool macro_funclike = false;
+
+static bool macro_add_arg(struct position pos, struct ident *ident)
+{
+	if (macro_nargs == 1024)
+		goto Eargs;
+	macro_nargs++;
+	return true;
+Eargs:
+	sparse_error(pos, "too many arguments in macro definition");
+	return false;
+}
+
 static inline void set_arg_count(struct token *token)
 {
 	token_type(token) = TOKEN_ARG_COUNT;
@@ -1097,7 +1113,6 @@ static inline void set_arg_count(struct token *token)
 static struct token *parse_arguments(struct token *list)
 {
 	struct token *arg = list->next, *next = list;
-	struct argcount *count = &list->count;
 
 	set_arg_count(list);
 
@@ -1110,10 +1125,10 @@ static struct token *parse_arguments(struct token *list)
 	while (token_type(arg) == TOKEN_IDENT) {
 		if (arg->ident == &__VA_ARGS___ident)
 			goto Eva_args;
-		if (!++count->normal)
-			goto Eargs;
-		next = arg->next;
+		if (!macro_add_arg(arg->pos, arg->ident))
+			return NULL;
 
+		next = arg->next;
 		if (match_op(next, ',')) {
 			set_arg_count(next);
 			arg = next->next;
@@ -1132,6 +1147,7 @@ static struct token *parse_arguments(struct token *list)
 		if (match_op(next, SPECIAL_ELLIPSIS)) {
 			if (match_op(next->next, ')')) {
 				set_arg_count(next);
+				macro_vararg = macro_nargs - 1;
 				next->count.vararg = 1;
 				next = next->next;
 				arg->next->next = &eof_token_entry;
@@ -1156,9 +1172,10 @@ static struct token *parse_arguments(struct token *list)
 		arg->ident = &__VA_ARGS___ident;
 		if (!match_op(next, ')'))
 			goto Enotclosed;
-		if (!++count->normal)
-			goto Eargs;
+		if (!macro_add_arg(arg->pos, &__VA_ARGS___ident))
+			return NULL;
 		set_arg_count(next);
+		macro_vararg = macro_nargs - 1;
 		next->count.vararg = 1;
 		next = next->next;
 		arg->next->next = &eof_token_entry;
@@ -1188,9 +1205,6 @@ Enotclosed:
 Eva_args:
 	sparse_error(arg->pos, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 	return NULL;
-Eargs:
-	sparse_error(arg->pos, "too many arguments in macro definition");
-	return NULL;
 }
 
 static int try_arg(struct token *token, enum token_type type, struct token *arglist)
@@ -1198,7 +1212,7 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
 	struct ident *ident = token->ident;
 	int nr;
 
-	if (!arglist || token_type(token) != TOKEN_IDENT)
+	if (!macro_funclike || token_type(token) != TOKEN_IDENT)
 		return 0;
 
 	arglist = arglist->next;
@@ -1221,7 +1235,7 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
 				n = ++count->str;
 			}
 			if (n)
-				return count->vararg ? 2 : 1;
+				return nr == macro_vararg ? 2 : 1;
 			/*
 			 * XXX - need saner handling of that
 			 * (>= 1024 instances of argument)
@@ -1236,7 +1250,7 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
 static struct token *handle_hash(struct token **p, struct token *arglist)
 {
 	struct token *token = *p;
-	if (arglist) {
+	if (macro_funclike) {
 		struct token *next = token->next;
 		if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
 			goto Equote;
@@ -1354,7 +1368,7 @@ static int do_define(struct position pos, struct token *token, struct ident *nam
 
 	expansion = parse_expansion(expansion, arglist, name);
 	if (!expansion)
-		return 1;
+		goto out;
 
 	sym = lookup_symbol(name, NS_MACRO | NS_UNDEF);
 	if (sym) {
@@ -1388,6 +1402,8 @@ static int do_define(struct position pos, struct token *token, struct ident *nam
 	if (!ret) {
 		sym->expansion = expansion;
 		sym->arglist = arglist;
+		sym->vararg = macro_vararg >= 0;
+		sym->fixed_args = macro_nargs - sym->vararg;
 		if (token) /* Free the "define" token, but not the rest of the line */
 			__free_token(token);
 	}
@@ -1396,6 +1412,9 @@ static int do_define(struct position pos, struct token *token, struct ident *nam
 	sym->used_in = NULL;
 	sym->attr = attr;
 out:
+	macro_nargs = 0;
+	macro_vararg = -1;
+	macro_funclike = false;
 	return ret;
 }
 
@@ -1490,8 +1509,12 @@ static int do_handle_define(struct stream *stream, struct token **line, struct t
 		if (match_op(expansion, '(')) {
 			arglist = expansion;
 			expansion = parse_arguments(expansion);
-			if (!expansion)
+			if (!expansion) {
+				macro_nargs = 0;
+				macro_vararg = -1;
 				return 1;
+			}
+			macro_funclike = true;
 		} else if (!eof_token(expansion)) {
 			warning(expansion->pos,
 				"no whitespace before object-like macro body");
@@ -2075,8 +2098,9 @@ static void create_arglist(struct symbol *sym, int count)
 
 	token = __alloc_token(0);
 	token_type(token) = TOKEN_ARG_COUNT;
-	token->count.normal = count;
 	sym->arglist = token;
+	sym->fixed_args = count;
+	sym->vararg = 0;
 	next = &token->next;
 
 	while (count--) {
@@ -2300,7 +2324,7 @@ static int is_VA_ARGS_token(struct token *token)
 
 static void dump_macro(struct symbol *sym)
 {
-	int nargs = sym->arglist ? sym->arglist->count.normal : 0;
+	int nargs = sym->fixed_args + sym->vararg;
 	struct token *args[nargs];
 	struct token *token;
 
diff --git a/symbol.h b/symbol.h
index 3552d439..026dab6f 100644
--- a/symbol.h
+++ b/symbol.h
@@ -168,6 +168,7 @@ struct symbol {
 			struct scope *used_in;
 			void (*expand_simple)(struct token *);
 			bool (*expand)(struct token *, struct arg *args);
+			int fixed_args, vararg;
 		};
 		struct /* NS_PREPROCESSOR */ {
 			int (*handler)(struct stream *, struct token **, struct token *);
-- 
2.47.3


  parent reply	other threads:[~2026-03-16  7:01 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <cover.1771930766.git.dan.carpenter@linaro.org>
2026-02-24 11:07 ` [PATCH] sparse: add support for __VA_OPT__ Dan Carpenter
2026-02-24 11:16   ` Ben Dooks
2026-02-24 11:56     ` Dan Carpenter
2026-02-24 12:42       ` Richard Fitzgerald
2026-02-24 13:15         ` Ben Dooks
2026-02-25  2:39   ` Chris Li
2026-02-25  3:36     ` Al Viro
2026-02-25  5:29       ` [RFC PATCH] pre-process: add __VA_OPT__ support Eric Zhang
2026-02-25  6:40         ` Al Viro
2026-02-25  7:27           ` Al Viro
2026-02-25  8:14             ` Eric Zhang
2026-02-25 22:18               ` Al Viro
2026-02-26  7:29                 ` Al Viro
2026-03-16  6:56                   ` Al Viro
2026-03-16  7:03                     ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
2026-03-16  7:03                       ` [PATCH 02/21] expand and simplify the call of dup_token() in copy() Al Viro
2026-03-16  7:03                       ` [PATCH 03/21] more dup_token() optimizations Al Viro
2026-03-16  7:03                       ` Al Viro [this message]
2026-03-16  7:03                       ` [PATCH 05/21] simplify collect_arguments() and fix error handling there Al Viro
2026-03-16  7:04                       ` [PATCH 06/21] try_arg(): don't use arglist for argument name lookups Al Viro
2026-03-16  7:04                       ` [PATCH 07/21] make expand_has_...() responsible for expanding its argument Al Viro
2026-03-16  7:04                       ` [PATCH 08/21] preparing to change argument number encoding for TOKEN_..._ARGUMENT Al Viro
2026-03-16  7:04                       ` [PATCH 09/21] steal 2 bits from argnum for argument kind Al Viro
2026-03-16  7:04                       ` [PATCH 10/21] on-demand argument expansion Al Viro
2026-03-16  7:04                       ` [PATCH 11/21] kill create_arglist() Al Viro
2026-03-16  7:04                       ` [PATCH 12/21] stop mangling arglist, get rid of TOKEN_ARG_COUNT Al Viro
2026-03-16  7:04                       ` [PATCH 13/21] deal with ## on arguments separately Al Viro
2026-03-16  7:04                       ` [PATCH 14/21] preparations for __VA_OPT__ support: reshuffle argument slot assignments Al Viro
2026-03-16  7:04                       ` [PATCH 15/21] pre-process.c: split try_arg() Al Viro
2026-03-16  7:04                       ` [PATCH 16/21] __VA_OPT__: parsing Al Viro
2026-03-16  7:04                       ` [PATCH 17/21] expansion-time va_opt handling Al Viro
2026-03-16  7:04                       ` [PATCH 18/21] merge(): saner handling of ->noexpand Al Viro
2026-03-16  7:04                       ` [PATCH 19/21] simplify the calling conventions of collect_arguments() Al Viro
2026-03-16  7:04                       ` [PATCH 20/21] make expand_one_symbol() inline Al Viro
2026-03-16  7:04                       ` [PATCH 21/21] substitute(): convert switch() into cascade of ifs Al Viro
2026-03-16 16:42                     ` [RFC PATCH] pre-process: add __VA_OPT__ support Linus Torvalds
2026-03-19  3:53                       ` Al Viro
2026-03-19  4:07                         ` Linus Torvalds
2026-03-19  5:34                           ` Al Viro
2026-03-17  7:41                     ` Chris Li
2026-03-18  6:35                     ` Eric Zhang
2026-02-25  7:05       ` [PATCH] sparse: add support for __VA_OPT__ Chris Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260316070415.768839-4-viro@zeniv.linux.org.uk \
    --to=viro@zeniv.linux.org.uk \
    --cc=ben.dooks@codethink.co.uk \
    --cc=chriscli@google.com \
    --cc=dan.carpenter@linaro.org \
    --cc=linux-sparse@vger.kernel.org \
    --cc=rf@opensource.cirrus.com \
    --cc=torvalds@linux-foundation.org \
    --cc=zxh@xh-zhang.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox