* [PATCH 02/21] expand and simplify the call of dup_token() in copy()
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
@ 2026-03-16 7:03 ` Al Viro
2026-03-16 7:03 ` [PATCH 03/21] more dup_token() optimizations Al Viro
` (18 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:03 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
reducing ->pos handling helps there
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index ae493dc2..e43061b2 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -613,10 +613,15 @@ static struct token **move_into(struct token **where, struct token *list)
static struct token **copy(struct token **where, struct token *list)
{
while (!eof_token(list)) {
- struct token *token;
- token = dup_token(list, &list->pos);
- if (token_type(token) == TOKEN_IDENT && token->ident->tainted)
- token->pos.noexpand = 1;
+ struct position pos = list->pos;
+ struct token *token = __alloc_token(0);
+
+ token->ident = list->ident;
+ if (pos.type == TOKEN_STRING || pos.type == TOKEN_WIDE_STRING)
+ list->string->immutable = 1;
+ if (pos.type == TOKEN_IDENT && list->ident->tainted)
+ pos.noexpand = 1;
+ token->pos = pos;
*where = token;
where = &token->next;
list = list->next;
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 03/21] more dup_token() optimizations
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
2026-03-16 7:03 ` [PATCH 02/21] expand and simplify the call of dup_token() in copy() Al Viro
@ 2026-03-16 7:03 ` Al Viro
2026-03-16 7:03 ` [PATCH 04/21] parsing #define: saner handling of argument count, part 1 Al Viro
` (17 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:03 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
pull handling to tainted identifiers into dup_token(), eliminating reread
and resulting stall in there.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index e43061b2..320a5247 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -583,18 +583,21 @@ static int merge(struct token *left, struct token *right)
return 0;
}
-static struct token *dup_token(const struct token *token, struct position *streampos)
+static inline struct token *dup_token(const struct token *token, struct position *streampos)
{
struct position pos = *streampos;
struct token *alloc = __alloc_token(0);
+ struct position pos2 = token->pos;
- alloc->pos = token->pos;
- alloc->number = token->number;
- alloc->pos.stream = pos.stream;
- alloc->pos.line = pos.line;
- alloc->pos.pos = pos.pos;
- if (token_type(alloc) == TOKEN_STRING || token_type(alloc) == TOKEN_WIDE_STRING)
+ alloc->ident = token->ident;
+ pos2.stream = pos.stream;
+ pos2.line = pos.line;
+ pos2.pos = pos.pos;
+ if (pos2.type == TOKEN_STRING || pos2.type == TOKEN_WIDE_STRING)
token->string->immutable = 1;
+ if (pos2.type == TOKEN_IDENT && token->ident->tainted)
+ pos2.noexpand = 1;
+ alloc->pos = pos2;
return alloc;
}
@@ -728,9 +731,6 @@ static struct token **substitute(struct token **list, const struct token *body,
default:
added = dup_token(body, base_pos);
- if (token_type(body) == TOKEN_IDENT &&
- added->ident->tainted)
- added->pos.noexpand = 1;
tail = &added->next;
break;
}
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 04/21] parsing #define: saner handling of argument count, part 1
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
2026-03-16 7:03 ` [PATCH 02/21] expand and simplify the call of dup_token() in copy() Al Viro
2026-03-16 7:03 ` [PATCH 03/21] more dup_token() optimizations Al Viro
@ 2026-03-16 7:03 ` Al Viro
2026-03-16 7:03 ` [PATCH 05/21] simplify collect_arguments() and fix error handling there Al Viro
` (16 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:03 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
Mangling arglist the way we do is a bad kludge and it limits what we
can do both at parsing a macro definition and at expansion time.
We use it to
* store the number of arguments (gets stashed in the cannibalized
token of opening parenthesis)
* store the number of times each argument is used expanded,
unexpanded and stringified (stashed in cannibalized token of comma or
closing parenthesis that follows an argument)
* mark the vararg argument (ditto)
Total number of arguments would be better off in struct symbol, next
to arglist. As the matter of fact, we'd be better off with number of
non-vararg arguments and "is there a vararg" stored separately.
Number of times each argument occurs expanded, etc. is used to find if
given occurrence of argument in the body is the last one of given sort
- by counting down as we process the body during expansion, no less.
Each counter runs down at the same token of the body every time we expand
the macro, and we can just as easily mark those tokens when we parse the
definition. It is also used to tell whether we need to expand and/or
stringify the argument in the first place. Again, easily expressed
as marking the tokens and we can easily steal bits for TOKEN_..._ARG
payload - we have a 32bit value that represents the argument's number.
"Is it a vararg argument" flag is used both at definition parsing time
(when we would be better off with "the index of vararg argument or -1
if there's none") and at expansion time, when we collect the arguments.
There we pass those values to collect_argument(), telling it whether it
should stop on (unprotected) commas. The current logics is seriously
convoluted, especially around the error recovery. Untangling that ends
up with a variant that wants to know the number of non-vararg arguments
along with "do we have a vararg at all" flag, upfront and not scattered
through the arglist.
As the first step, introduce sym->fixed_args and sym->vararg and have
them calculated when we parse a macro definition; stop storing the number
of arguments in the first token of arglist.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 64 +++++++++++++++++++++++++++++++++++----------------
symbol.h | 1 +
2 files changed, 45 insertions(+), 20 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index 320a5247..d591a183 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -312,9 +312,10 @@ struct arg {
int n_str;
};
-static int collect_arguments(struct token *start, struct token *arglist, struct arg *args, struct token *what)
+static int collect_arguments(struct token *start, struct symbol *sym, struct arg *args, struct token *what)
{
- int wanted = arglist->count.normal;
+ struct token *arglist = sym->arglist;
+ int wanted = sym->fixed_args + sym->vararg;
struct token *next = NULL;
int count = 0;
@@ -760,7 +761,7 @@ static int expand(struct token **list, struct symbol *sym)
struct ident *expanding = token->ident;
struct token **tail;
struct token *expansion = sym->expansion;
- int nargs = sym->arglist ? sym->arglist->count.normal : 0;
+ int nargs = sym->fixed_args + sym->vararg;
struct arg args[nargs];
if (expanding->tainted) {
@@ -771,7 +772,7 @@ static int expand(struct token **list, struct symbol *sym)
if (sym->arglist) {
if (!match_op(scan_next(&token->next), '('))
return 1;
- if (!collect_arguments(token->next, sym->arglist, args, token))
+ if (!collect_arguments(token->next, sym, args, token))
return 1;
expand_arguments(nargs, args);
}
@@ -1087,6 +1088,21 @@ static int token_list_different(struct token *list1, struct token *list2)
}
}
+static int macro_nargs = 0;
+static int macro_vararg = -1;
+static bool macro_funclike = false;
+
+static bool macro_add_arg(struct position pos, struct ident *ident)
+{
+ if (macro_nargs == 1024)
+ goto Eargs;
+ macro_nargs++;
+ return true;
+Eargs:
+ sparse_error(pos, "too many arguments in macro definition");
+ return false;
+}
+
static inline void set_arg_count(struct token *token)
{
token_type(token) = TOKEN_ARG_COUNT;
@@ -1097,7 +1113,6 @@ static inline void set_arg_count(struct token *token)
static struct token *parse_arguments(struct token *list)
{
struct token *arg = list->next, *next = list;
- struct argcount *count = &list->count;
set_arg_count(list);
@@ -1110,10 +1125,10 @@ static struct token *parse_arguments(struct token *list)
while (token_type(arg) == TOKEN_IDENT) {
if (arg->ident == &__VA_ARGS___ident)
goto Eva_args;
- if (!++count->normal)
- goto Eargs;
- next = arg->next;
+ if (!macro_add_arg(arg->pos, arg->ident))
+ return NULL;
+ next = arg->next;
if (match_op(next, ',')) {
set_arg_count(next);
arg = next->next;
@@ -1132,6 +1147,7 @@ static struct token *parse_arguments(struct token *list)
if (match_op(next, SPECIAL_ELLIPSIS)) {
if (match_op(next->next, ')')) {
set_arg_count(next);
+ macro_vararg = macro_nargs - 1;
next->count.vararg = 1;
next = next->next;
arg->next->next = &eof_token_entry;
@@ -1156,9 +1172,10 @@ static struct token *parse_arguments(struct token *list)
arg->ident = &__VA_ARGS___ident;
if (!match_op(next, ')'))
goto Enotclosed;
- if (!++count->normal)
- goto Eargs;
+ if (!macro_add_arg(arg->pos, &__VA_ARGS___ident))
+ return NULL;
set_arg_count(next);
+ macro_vararg = macro_nargs - 1;
next->count.vararg = 1;
next = next->next;
arg->next->next = &eof_token_entry;
@@ -1188,9 +1205,6 @@ Enotclosed:
Eva_args:
sparse_error(arg->pos, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
return NULL;
-Eargs:
- sparse_error(arg->pos, "too many arguments in macro definition");
- return NULL;
}
static int try_arg(struct token *token, enum token_type type, struct token *arglist)
@@ -1198,7 +1212,7 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
struct ident *ident = token->ident;
int nr;
- if (!arglist || token_type(token) != TOKEN_IDENT)
+ if (!macro_funclike || token_type(token) != TOKEN_IDENT)
return 0;
arglist = arglist->next;
@@ -1221,7 +1235,7 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
n = ++count->str;
}
if (n)
- return count->vararg ? 2 : 1;
+ return nr == macro_vararg ? 2 : 1;
/*
* XXX - need saner handling of that
* (>= 1024 instances of argument)
@@ -1236,7 +1250,7 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
static struct token *handle_hash(struct token **p, struct token *arglist)
{
struct token *token = *p;
- if (arglist) {
+ if (macro_funclike) {
struct token *next = token->next;
if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
goto Equote;
@@ -1354,7 +1368,7 @@ static int do_define(struct position pos, struct token *token, struct ident *nam
expansion = parse_expansion(expansion, arglist, name);
if (!expansion)
- return 1;
+ goto out;
sym = lookup_symbol(name, NS_MACRO | NS_UNDEF);
if (sym) {
@@ -1388,6 +1402,8 @@ static int do_define(struct position pos, struct token *token, struct ident *nam
if (!ret) {
sym->expansion = expansion;
sym->arglist = arglist;
+ sym->vararg = macro_vararg >= 0;
+ sym->fixed_args = macro_nargs - sym->vararg;
if (token) /* Free the "define" token, but not the rest of the line */
__free_token(token);
}
@@ -1396,6 +1412,9 @@ static int do_define(struct position pos, struct token *token, struct ident *nam
sym->used_in = NULL;
sym->attr = attr;
out:
+ macro_nargs = 0;
+ macro_vararg = -1;
+ macro_funclike = false;
return ret;
}
@@ -1490,8 +1509,12 @@ static int do_handle_define(struct stream *stream, struct token **line, struct t
if (match_op(expansion, '(')) {
arglist = expansion;
expansion = parse_arguments(expansion);
- if (!expansion)
+ if (!expansion) {
+ macro_nargs = 0;
+ macro_vararg = -1;
return 1;
+ }
+ macro_funclike = true;
} else if (!eof_token(expansion)) {
warning(expansion->pos,
"no whitespace before object-like macro body");
@@ -2075,8 +2098,9 @@ static void create_arglist(struct symbol *sym, int count)
token = __alloc_token(0);
token_type(token) = TOKEN_ARG_COUNT;
- token->count.normal = count;
sym->arglist = token;
+ sym->fixed_args = count;
+ sym->vararg = 0;
next = &token->next;
while (count--) {
@@ -2300,7 +2324,7 @@ static int is_VA_ARGS_token(struct token *token)
static void dump_macro(struct symbol *sym)
{
- int nargs = sym->arglist ? sym->arglist->count.normal : 0;
+ int nargs = sym->fixed_args + sym->vararg;
struct token *args[nargs];
struct token *token;
diff --git a/symbol.h b/symbol.h
index 3552d439..026dab6f 100644
--- a/symbol.h
+++ b/symbol.h
@@ -168,6 +168,7 @@ struct symbol {
struct scope *used_in;
void (*expand_simple)(struct token *);
bool (*expand)(struct token *, struct arg *args);
+ int fixed_args, vararg;
};
struct /* NS_PREPROCESSOR */ {
int (*handler)(struct stream *, struct token **, struct token *);
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 05/21] simplify collect_arguments() and fix error handling there
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (2 preceding siblings ...)
2026-03-16 7:03 ` [PATCH 04/21] parsing #define: saner handling of argument count, part 1 Al Viro
@ 2026-03-16 7:03 ` Al Viro
2026-03-16 7:04 ` [PATCH 06/21] try_arg(): don't use arglist for argument name lookups Al Viro
` (15 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:03 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
The current logics is too convoluted. We use collect_arg() to carve
an argument out; it takes a pointer to previous token (either opening
parenthesis or comma), finds how far does the argument extend, cuts the
list at its end and returns the token that follows it (normally either
closing parenthesis or the comma). collect_arg() is told whether we
want a vararg argument or not - the difference is that normal arguments
terminate on commas.
When macro has N non-vararg arguments and V (0 or 1) vararg ones, we want
* N calls of collect_arg() asking for non-vararg arguments;
all but the last one must be followed by commas. The last one may be
followed either by comma or by closing parenthesis.
* If we have seen exactly N commas, call collect_arg() asking
to collect everything until the closing parenthesis. That will get us
to the end of arguments.
The only potential gotcha is that there is a case when "exactly N commas"
for non-vararg macro does _not_ mean excessive arguments - N = V = 0.
Not hard to account for - in that case we must look at the chunk carved
out by the last (and only) call of collect_arg(); that would be everything
between the parentheses. If it's empty, we are fine, otherwise we've
excessive arguments.
Rather than trying to fold all of that into a single loop, separate
the handling of non-vararg arguments from the rest; the logics becomes
simpler that way, especially around the error recovery.
As a side benefit the 'vararg' bit in struct argcount becomes unused
and can be removed.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 104 ++++++++++++++++++++++----------------------------
token.h | 1 -
2 files changed, 46 insertions(+), 59 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index d591a183..25990dfa 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -255,7 +255,7 @@ static void expand_list(struct token **list)
static void preprocessor_line(struct stream *stream, struct token **line);
-static struct token *collect_arg(struct token *prev, int vararg, const struct position *pos)
+static struct token *collect_arg(struct token *prev, bool vararg, const struct position *pos)
{
struct stream *stream = input_streams + prev->pos.stream;
struct token **p = &prev->next;
@@ -314,76 +314,66 @@ struct arg {
static int collect_arguments(struct token *start, struct symbol *sym, struct arg *args, struct token *what)
{
+ int fixed = sym->fixed_args;
+ bool vararg = sym->vararg;
struct token *arglist = sym->arglist;
- int wanted = sym->fixed_args + sym->vararg;
- struct token *next = NULL;
- int count = 0;
+ struct argcount *p;
+ struct token *next = NULL, *v = NULL;
+ const char *err;
+ int commas;
arglist = arglist->next; /* skip counter */
- if (!wanted) {
- next = collect_arg(start, 0, &what->pos);
- if (eof_token(next))
+ for (commas = 0; commas < fixed; commas++) {
+ next = collect_arg(start, false, &what->pos);
+ if (token_type(next) != TOKEN_SPECIAL)
goto Eclosing;
- if (!eof_token(start->next) || !match_op(next, ')')) {
- count++;
- goto Emany;
- }
- } else {
- for (count = 0; count < wanted; count++) {
- struct argcount *p = &arglist->next->count;
- next = collect_arg(start, p->vararg, &what->pos);
- if (eof_token(next))
- goto Eclosing;
- if (p->vararg && wanted == 1 && eof_token(start->next))
- break;
- arglist = arglist->next->next;
- args[count].arg = start->next;
- args[count].n_normal = p->normal;
- args[count].n_quoted = p->quoted;
- args[count].n_str = p->str;
- if (match_op(next, ')')) {
- count++;
- break;
- }
- start = next;
- }
- if (count == wanted && !match_op(next, ')'))
- goto Emany;
- if (count == wanted - 1) {
- struct argcount *p = &arglist->next->count;
- if (!p->vararg)
+ p = &arglist->next->count;
+ arglist = arglist->next->next;
+ args[commas].arg = start->next;
+ args[commas].n_normal = p->normal;
+ args[commas].n_quoted = p->quoted;
+ args[commas].n_str = p->str;
+ if (!match_op(next, ',')) {
+ if (commas < fixed - 1)
goto Efew;
- args[count].arg = NULL;
- args[count].n_normal = p->normal;
- args[count].n_quoted = p->quoted;
- args[count].n_str = p->str;
+ break;
}
- if (count < wanted - 1)
- goto Efew;
+ start = next;
+ }
+ if (commas == fixed) {
+ next = collect_arg(start, true, &what->pos);
+ if (token_type(next) != TOKEN_SPECIAL)
+ goto Eclosing;
+ v = start->next;
+ if (fixed == 0 && eof_token(v))
+ v = NULL;
+ }
+ if (v && !vararg)
+ goto Eexcess;
+ if (vararg) {
+ p = &arglist->next->count;
+ args[fixed].arg = v;
+ args[fixed].n_normal = p->normal;
+ args[fixed].n_quoted = p->quoted;
+ args[fixed].n_str = p->str;
}
what->next = next->next;
return 1;
Efew:
- sparse_error(what->pos, "macro \"%s\" requires %d arguments, but only %d given",
- show_token(what), wanted, count);
+ err = "too few arguments provided to";
+ next = next->next;
goto out;
-Emany:
- while (match_op(next, ',')) {
- next = collect_arg(next, 0, &what->pos);
- count++;
- }
- if (eof_token(next))
- goto Eclosing;
- sparse_error(what->pos, "macro \"%s\" passed %d arguments, but takes just %d",
- show_token(what), count, wanted);
+Eexcess:
+ err = "too many arguments provided to";
+ next = next->next;
goto out;
Eclosing:
- sparse_error(what->pos, "unterminated argument list invoking macro \"%s\"",
- show_token(what));
+ err = "unterminated argument list invoking";
out:
- what->next = next->next;
+ sparse_error(what->pos, "%s macro \"%s\"", err, show_ident(sym->ident));
+ what->next = next;
return 0;
}
@@ -1107,7 +1097,7 @@ static inline void set_arg_count(struct token *token)
{
token_type(token) = TOKEN_ARG_COUNT;
token->count.normal = token->count.quoted =
- token->count.str = token->count.vararg = 0;
+ token->count.str = 0;
}
static struct token *parse_arguments(struct token *list)
@@ -1148,7 +1138,6 @@ static struct token *parse_arguments(struct token *list)
if (match_op(next->next, ')')) {
set_arg_count(next);
macro_vararg = macro_nargs - 1;
- next->count.vararg = 1;
next = next->next;
arg->next->next = &eof_token_entry;
return next->next;
@@ -1176,7 +1165,6 @@ static struct token *parse_arguments(struct token *list)
return NULL;
set_arg_count(next);
macro_vararg = macro_nargs - 1;
- next->count.vararg = 1;
next = next->next;
arg->next->next = &eof_token_entry;
return next;
diff --git a/token.h b/token.h
index 9000e0cb..5dcd8594 100644
--- a/token.h
+++ b/token.h
@@ -175,7 +175,6 @@ struct argcount {
unsigned normal:10;
unsigned quoted:10;
unsigned str:10;
- unsigned vararg:1;
};
/*
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 06/21] try_arg(): don't use arglist for argument name lookups
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (3 preceding siblings ...)
2026-03-16 7:03 ` [PATCH 05/21] simplify collect_arguments() and fix error handling there Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 07/21] make expand_has_...() responsible for expanding its argument Al Viro
` (14 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
Just store them into a global array and search there. That allows to
get rid of mangling ... in the arglist along with the is_VA_ARGS_token()
kludge. For now we still need to access the arglist in try_arg(),
but that's going away as soon as we get rid of the use counters...
Added a check for duplicate argument names, while we are at it - we
didn't do that.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 90 ++++++++++++++--------------
validation/preprocessor/bad-args.c | 18 ++++++
validation/preprocessor/dump-macro.c | 9 +++
3 files changed, 72 insertions(+), 45 deletions(-)
create mode 100644 validation/preprocessor/bad-args.c
create mode 100644 validation/preprocessor/dump-macro.c
diff --git a/pre-process.c b/pre-process.c
index 25990dfa..17ed7f85 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -1078,16 +1078,24 @@ static int token_list_different(struct token *list1, struct token *list2)
}
}
+static struct ident *macro_arg_name[1024];
static int macro_nargs = 0;
static int macro_vararg = -1;
static bool macro_funclike = false;
static bool macro_add_arg(struct position pos, struct ident *ident)
{
+ for (int i = 0; i < macro_nargs; i++) {
+ if (ident == macro_arg_name[i])
+ goto Edup_arg;
+ }
if (macro_nargs == 1024)
goto Eargs;
- macro_nargs++;
+ macro_arg_name[macro_nargs++] = ident;
return true;
+Edup_arg:
+ sparse_error(pos, "duplicate macro parameter \"%s\"", show_ident(ident));
+ return false;
Eargs:
sparse_error(pos, "too many arguments in macro definition");
return false;
@@ -1157,8 +1165,6 @@ static struct token *parse_arguments(struct token *list)
if (match_op(arg, SPECIAL_ELLIPSIS)) {
next = arg->next;
- token_type(arg) = TOKEN_IDENT;
- arg->ident = &__VA_ARGS___ident;
if (!match_op(next, ')'))
goto Enotclosed;
if (!macro_add_arg(arg->pos, &__VA_ARGS___ident))
@@ -1198,41 +1204,41 @@ Eva_args:
static int try_arg(struct token *token, enum token_type type, struct token *arglist)
{
struct ident *ident = token->ident;
- int nr;
+ int nr, n;
if (!macro_funclike || token_type(token) != TOKEN_IDENT)
return 0;
- arglist = arglist->next;
+ for (nr = 0; nr < macro_nargs && macro_arg_name[nr] != ident; nr++)
+ ;
- for (nr = 0; !eof_token(arglist); nr++, arglist = arglist->next->next) {
- if (arglist->ident == ident) {
- struct argcount *count = &arglist->next->count;
- int n;
+ if (nr == macro_nargs)
+ return 0;
- token->argnum = nr;
- token_type(token) = type;
- switch (type) {
- case TOKEN_MACRO_ARGUMENT:
- n = ++count->normal;
- break;
- case TOKEN_QUOTED_ARGUMENT:
- n = ++count->quoted;
- break;
- default:
- n = ++count->str;
- }
- if (n)
- return nr == macro_vararg ? 2 : 1;
- /*
- * XXX - need saner handling of that
- * (>= 1024 instances of argument)
- */
- token_type(token) = TOKEN_ERROR;
- return -1;
- }
+ arglist = arglist->next;
+ for (int i = 0; i < nr; i++)
+ arglist = arglist->next->next;
+
+ token->argnum = nr;
+ token_type(token) = type;
+ switch (type) {
+ case TOKEN_MACRO_ARGUMENT:
+ n = ++arglist->next->count.normal;
+ break;
+ case TOKEN_QUOTED_ARGUMENT:
+ n = ++arglist->next->count.quoted;
+ break;
+ default:
+ n = ++arglist->next->count.str;
}
- return 0;
+ if (n)
+ return nr == macro_vararg ? 2 : 1;
+ /*
+ * XXX - need saner handling of that
+ * (>= 1024 instances of argument)
+ */
+ token_type(token) = TOKEN_ERROR;
+ return -1;
}
static struct token *handle_hash(struct token **p, struct token *arglist)
@@ -2304,16 +2310,10 @@ struct token * preprocess(struct token *token)
return token;
}
-static int is_VA_ARGS_token(struct token *token)
-{
- return (token_type(token) == TOKEN_IDENT) &&
- (token->ident == &__VA_ARGS___ident);
-}
-
static void dump_macro(struct symbol *sym)
{
int nargs = sym->fixed_args + sym->vararg;
- struct token *args[nargs];
+ struct ident *args[nargs];
struct token *token;
printf("#define %s", show_ident(sym->ident));
@@ -2325,13 +2325,13 @@ static void dump_macro(struct symbol *sym)
for (; !eof_token(token); token = token->next) {
if (token_type(token) == TOKEN_ARG_COUNT)
continue;
- if (is_VA_ARGS_token(token))
- printf("%s...", sep);
- else
- printf("%s%s", sep, show_token(token));
- args[narg++] = token;
+ printf("%s%s", sep, show_token(token));
+ if (token_type(token) == TOKEN_IDENT)
+ args[narg++] = token->ident;
sep = ",";
}
+ if (narg < nargs)
+ args[narg] = &__VA_ARGS___ident;
putchar(')');
}
@@ -2349,8 +2349,8 @@ static void dump_macro(struct symbol *sym)
/* fall-through */
case TOKEN_QUOTED_ARGUMENT:
case TOKEN_MACRO_ARGUMENT:
- token = args[token->argnum];
- /* fall-through */
+ printf("%s", show_ident(args[token->argnum]));
+ break;
default:
printf("%s", show_token(token));
}
diff --git a/validation/preprocessor/bad-args.c b/validation/preprocessor/bad-args.c
new file mode 100644
index 00000000..3dbb6f92
--- /dev/null
+++ b/validation/preprocessor/bad-args.c
@@ -0,0 +1,18 @@
+#define A(1)
+#define B(__VA_ARGS__)
+#define C(X,Y,X)
+/*
+ * check-name: macro arguments validation
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+
+ * check-output-end
+ *
+ * check-error-start
+preprocessor/bad-args.c:1:11: error: "1" may not appear in macro parameter list
+preprocessor/bad-args.c:2:11: error: __VA_ARGS__ can only appear in the expansion of a C99 variadic macro
+preprocessor/bad-args.c:3:15: error: duplicate macro parameter "X"
+ * check-error-end
+ */
diff --git a/validation/preprocessor/dump-macro.c b/validation/preprocessor/dump-macro.c
new file mode 100644
index 00000000..46d70b34
--- /dev/null
+++ b/validation/preprocessor/dump-macro.c
@@ -0,0 +1,9 @@
+#define A(X,Y,...) __VA_ARGS__,Y,X
+/*
+ * check-name: -dM handling of varargs
+ * check-command: sparse -E -dM $file | tail -1
+ *
+ * check-output-start
+#define A(X,Y,...) __VA_ARGS__,Y,X
+ * check-output-end
+ */
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 07/21] make expand_has_...() responsible for expanding its argument
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (4 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 06/21] try_arg(): don't use arglist for argument name lookups Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 08/21] preparing to change argument number encoding for TOKEN_..._ARGUMENT Al Viro
` (13 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
If we want to make expansion of arguments on-demand, we need to adjust
->expand() calling conventions first, passing it unexpanded arguments.
Switch create_arglist() to setting argcounts from normal=1 to quoted=1,
provide a helper (first_arg()) that does actual expansion and make
->expand() instances use it. After that these fake arglists are used
only for two things: they indicate that these macros are function-like
and they short-circuit expand_arguments(). Once we switch to on-demand
argument expansion, the second role will disappear and we can just use
&eof_token_entry as ->arglist for those.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 17 ++++++++++++-----
validation/preprocessor/has-attribute.c | 3 +++
validation/preprocessor/has-builtin.c | 3 +++
3 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index 17ed7f85..85662365 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -2000,9 +2000,16 @@ static int handle_nondirective(struct stream *stream, struct token **line, struc
return 1;
}
+static struct token *first_arg(struct arg *args)
+{
+ struct token *arg = args[0].arg;
+ expand_list(&arg);
+ return arg;
+}
+
static bool expand_has_attribute(struct token *token, struct arg *args)
{
- struct token *arg = args[0].expanded;
+ struct token *arg = first_arg(args);
struct symbol *sym;
if (token_type(arg) != TOKEN_IDENT) {
@@ -2017,7 +2024,7 @@ static bool expand_has_attribute(struct token *token, struct arg *args)
static bool expand_has_builtin(struct token *token, struct arg *args)
{
- struct token *arg = args[0].expanded;
+ struct token *arg = first_arg(args);
struct symbol *sym;
if (token_type(arg) != TOKEN_IDENT) {
@@ -2032,7 +2039,7 @@ static bool expand_has_builtin(struct token *token, struct arg *args)
static bool expand_has_extension(struct token *token, struct arg *args)
{
- struct token *arg = args[0].expanded;
+ struct token *arg = first_arg(args);
struct ident *ident;
bool val = false;
@@ -2057,7 +2064,7 @@ static bool expand_has_extension(struct token *token, struct arg *args)
static bool expand_has_feature(struct token *token, struct arg *args)
{
- struct token *arg = args[0].expanded;
+ struct token *arg = first_arg(args);
struct ident *ident;
bool val = false;
@@ -2103,7 +2110,7 @@ static void create_arglist(struct symbol *sym, int count)
token_type(id) = TOKEN_IDENT;
uses = __alloc_token(0);
token_type(uses) = TOKEN_ARG_COUNT;
- uses->count.normal = 1;
+ uses->count.quoted = 1;
*next = id;
id->next = uses;
diff --git a/validation/preprocessor/has-attribute.c b/validation/preprocessor/has-attribute.c
index 3149cbfa..dd0f275e 100644
--- a/validation/preprocessor/has-attribute.c
+++ b/validation/preprocessor/has-attribute.c
@@ -6,6 +6,8 @@ __has_attribute()??? Quesako?
#endif
123 __has_attribute(nothinx) def
+#define A packed
+456 __has_attribute(A)
#if __has_attribute(nothinx)
#error "not a attribute!"
@@ -49,6 +51,7 @@ __has_attribute()??? Quesako?
"has __has_attribute(), yeah!"
123 0 def
+456 1
"ok gcc"
"ok gcc ignore"
"ok sparse specific"
diff --git a/validation/preprocessor/has-builtin.c b/validation/preprocessor/has-builtin.c
index 03272fc9..010d44bd 100644
--- a/validation/preprocessor/has-builtin.c
+++ b/validation/preprocessor/has-builtin.c
@@ -28,6 +28,8 @@ constant_p
#endif
123 __has_builtin(abc) def
+#define A __builtin_constant_p
+456 __has_builtin(A)
/*
* check-name: has-builtin
@@ -39,5 +41,6 @@ constant_p
abs
constant_p
123 0 def
+456 1
* check-output-end
*/
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 08/21] preparing to change argument number encoding for TOKEN_..._ARGUMENT
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (5 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 07/21] make expand_has_...() responsible for expanding its argument Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 09/21] steal 2 bits from argnum for argument kind Al Viro
` (12 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
We want to steal some bits from TOKEN_..._ARGUMENT; it's not a problem,
seeing that payload is a 32bit number and we are *not* going to support
many millions of arguments in macros. For now, wrap the accesses into
an inline helper (argnum(token)), to reduce the amount of noise in
subsequent patches.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 23 ++++++++++++++---------
token.h | 4 ++++
2 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index 85662365..efb208e7 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -624,11 +624,16 @@ static struct token **copy(struct token **where, struct token *list)
return where;
}
+static inline int argnum(const struct token *arg)
+{
+ return arg->argnum >> ARGNUM_BITS_STOLEN;
+}
+
static int handle_kludge(const struct token **p, struct arg *args)
{
const struct token *t = (*p)->next->next;
while (1) {
- struct arg *v = &args[t->argnum];
+ struct arg *v = &args[argnum(t)];
if (token_type(t->next) != TOKEN_CONCAT) {
if (v->arg) {
/* ignore the first ## */
@@ -681,13 +686,13 @@ static struct token **substitute(struct token **list, const struct token *body,
break;
case TOKEN_STR_ARGUMENT:
- arg = args[body->argnum].str;
- count = &args[body->argnum].n_str;
+ arg = args[argnum(body)].str;
+ count = &args[argnum(body)].n_str;
goto copy_arg;
case TOKEN_QUOTED_ARGUMENT:
- arg = args[body->argnum].arg;
- count = &args[body->argnum].n_quoted;
+ arg = args[argnum(body)].arg;
+ count = &args[argnum(body)].n_quoted;
if (!arg || eof_token(arg)) {
if (state == Concat)
state = Normal;
@@ -698,8 +703,8 @@ static struct token **substitute(struct token **list, const struct token *body,
goto copy_arg;
case TOKEN_MACRO_ARGUMENT:
- arg = args[body->argnum].expanded;
- count = &args[body->argnum].n_normal;
+ arg = args[argnum(body)].expanded;
+ count = &args[argnum(body)].n_normal;
if (eof_token(arg)) {
state = Normal;
continue;
@@ -1219,7 +1224,7 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
for (int i = 0; i < nr; i++)
arglist = arglist->next->next;
- token->argnum = nr;
+ token->argnum = nr << ARGNUM_BITS_STOLEN;
token_type(token) = type;
switch (type) {
case TOKEN_MACRO_ARGUMENT:
@@ -2356,7 +2361,7 @@ static void dump_macro(struct symbol *sym)
/* fall-through */
case TOKEN_QUOTED_ARGUMENT:
case TOKEN_MACRO_ARGUMENT:
- printf("%s", show_ident(args[token->argnum]));
+ printf("%s", show_ident(args[argnum(token)]));
break;
default:
printf("%s", show_token(token));
diff --git a/token.h b/token.h
index 5dcd8594..fe7c7fe9 100644
--- a/token.h
+++ b/token.h
@@ -177,6 +177,10 @@ struct argcount {
unsigned str:10;
};
+enum {
+ ARGNUM_BITS_STOLEN
+};
+
/*
* This is a very common data structure, it should be kept
* as small as humanly possible. Big (rare) types go as
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 09/21] steal 2 bits from argnum for argument kind
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (6 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 08/21] preparing to change argument number encoding for TOKEN_..._ARGUMENT Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 10/21] on-demand argument expansion Al Viro
` (11 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
We have 3 separate token types (TOKEN_{MACRO,QUOTED,STR}_ARGUMENT),
with fairly similar handling at expansion time. Let's steal two bits
from ->argnum and use them to represent the kind of occurrence; that
simplifies substitute() and allows for better code generation there.
The object we use to store the argument state at expansion time (struct
arg) is already a structure with 3 pointers to token lists (unexpanded,
expanded and stringified forms of the argument) and 3 integer counters -
the number of remaining occurrencies of each kind. Gather those into
3-element arrays indexed by the kind; counts will be gone soon, token
lists will remain.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 106 +++++++++++++++++++++-----------------------------
token.h | 14 +++++--
2 files changed, 55 insertions(+), 65 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index efb208e7..b45688d5 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -304,12 +304,8 @@ static struct token *collect_arg(struct token *prev, bool vararg, const struct p
*/
struct arg {
- struct token *arg;
- struct token *expanded;
- struct token *str;
- int n_normal;
- int n_quoted;
- int n_str;
+ struct token *arg[3];
+ int count[3];
};
static int collect_arguments(struct token *start, struct symbol *sym, struct arg *args, struct token *what)
@@ -330,10 +326,10 @@ static int collect_arguments(struct token *start, struct symbol *sym, struct arg
goto Eclosing;
p = &arglist->next->count;
arglist = arglist->next->next;
- args[commas].arg = start->next;
- args[commas].n_normal = p->normal;
- args[commas].n_quoted = p->quoted;
- args[commas].n_str = p->str;
+ args[commas].arg[ARG_QUOTED] = start->next;
+ args[commas].count[ARG_NORMAL] = p->normal;
+ args[commas].count[ARG_QUOTED] = p->quoted;
+ args[commas].count[ARG_STR] = p->str;
if (!match_op(next, ',')) {
if (commas < fixed - 1)
goto Efew;
@@ -353,10 +349,10 @@ static int collect_arguments(struct token *start, struct symbol *sym, struct arg
goto Eexcess;
if (vararg) {
p = &arglist->next->count;
- args[fixed].arg = v;
- args[fixed].n_normal = p->normal;
- args[fixed].n_quoted = p->quoted;
- args[fixed].n_str = p->str;
+ args[fixed].arg[ARG_QUOTED] = v;
+ args[fixed].count[ARG_NORMAL] = p->normal;
+ args[fixed].count[ARG_QUOTED] = p->quoted;
+ args[fixed].count[ARG_STR] = p->str;
}
what->next = next->next;
return 1;
@@ -440,21 +436,21 @@ static void expand_arguments(int count, struct arg *args)
{
int i;
for (i = 0; i < count; i++) {
- struct token *arg = args[i].arg;
+ struct token *arg = args[i].arg[ARG_QUOTED];
if (!arg)
arg = &eof_token_entry;
- if (args[i].n_str)
- args[i].str = stringify(arg);
- if (args[i].n_normal) {
- if (!args[i].n_quoted) {
- args[i].expanded = arg;
- args[i].arg = NULL;
+ if (args[i].count[ARG_STR])
+ args[i].arg[ARG_STR] = stringify(arg);
+ if (args[i].count[ARG_NORMAL]) {
+ if (!args[i].count[ARG_QUOTED]) {
+ args[i].arg[ARG_NORMAL] = arg;
+ args[i].arg[ARG_QUOTED] = NULL;
} else if (eof_token(arg)) {
- args[i].expanded = arg;
+ args[i].arg[ARG_NORMAL] = arg;
} else {
- args[i].expanded = dup_list(arg);
+ args[i].arg[ARG_NORMAL] = dup_list(arg);
}
- expand_list(&args[i].expanded);
+ expand_list(&args[i].arg[ARG_NORMAL]);
}
}
}
@@ -629,13 +625,18 @@ static inline int argnum(const struct token *arg)
return arg->argnum >> ARGNUM_BITS_STOLEN;
}
+static inline enum arg_kind argkind(const struct token *arg)
+{
+ return arg->argnum & ARGNUM_KIND_MASK;
+}
+
static int handle_kludge(const struct token **p, struct arg *args)
{
const struct token *t = (*p)->next->next;
while (1) {
- struct arg *v = &args[argnum(t)];
+ struct token *v = args[argnum(t)].arg[ARG_QUOTED];
if (token_type(t->next) != TOKEN_CONCAT) {
- if (v->arg) {
+ if (v) {
/* ignore the first ## */
*p = (*p)->next;
return 0;
@@ -644,7 +645,7 @@ static int handle_kludge(const struct token **p, struct arg *args)
*p = t;
return 1;
}
- if (v->arg && !eof_token(v->arg))
+ if (v && !eof_token(v))
return 0; /* no magic */
t = t->next->next;
}
@@ -685,14 +686,9 @@ static struct token **substitute(struct token **list, const struct token *body,
tail = &added->next;
break;
- case TOKEN_STR_ARGUMENT:
- arg = args[argnum(body)].str;
- count = &args[argnum(body)].n_str;
- goto copy_arg;
-
- case TOKEN_QUOTED_ARGUMENT:
- arg = args[argnum(body)].arg;
- count = &args[argnum(body)].n_quoted;
+ case TOKEN_MACRO_ARGUMENT:
+ arg = args[argnum(body)].arg[argkind(body)];
+ count = &args[argnum(body)].count[argkind(body)];
if (!arg || eof_token(arg)) {
if (state == Concat)
state = Normal;
@@ -700,16 +696,6 @@ static struct token **substitute(struct token **list, const struct token *body,
state = Placeholder;
continue;
}
- goto copy_arg;
-
- case TOKEN_MACRO_ARGUMENT:
- arg = args[argnum(body)].expanded;
- count = &args[argnum(body)].n_normal;
- if (eof_token(arg)) {
- state = Normal;
- continue;
- }
- copy_arg:
if (!--*count)
tail = move_into(&added, arg);
else
@@ -1040,8 +1026,6 @@ static int token_different(struct token *t1, struct token *t2)
different = t1->special != t2->special;
break;
case TOKEN_MACRO_ARGUMENT:
- case TOKEN_QUOTED_ARGUMENT:
- case TOKEN_STR_ARGUMENT:
different = t1->argnum != t2->argnum;
break;
case TOKEN_CHAR_EMBEDDED_0 ... TOKEN_CHAR_EMBEDDED_3:
@@ -1206,7 +1190,7 @@ Eva_args:
return NULL;
}
-static int try_arg(struct token *token, enum token_type type, struct token *arglist)
+static int try_arg(struct token *token, enum arg_kind kind, struct token *arglist)
{
struct ident *ident = token->ident;
int nr, n;
@@ -1224,13 +1208,13 @@ static int try_arg(struct token *token, enum token_type type, struct token *argl
for (int i = 0; i < nr; i++)
arglist = arglist->next->next;
- token->argnum = nr << ARGNUM_BITS_STOLEN;
- token_type(token) = type;
- switch (type) {
- case TOKEN_MACRO_ARGUMENT:
+ token->argnum = (nr << ARGNUM_BITS_STOLEN) | kind;
+ token_type(token) = TOKEN_MACRO_ARGUMENT;
+ switch (kind) {
+ case ARG_NORMAL:
n = ++arglist->next->count.normal;
break;
- case TOKEN_QUOTED_ARGUMENT:
+ case ARG_QUOTED:
n = ++arglist->next->count.quoted;
break;
default:
@@ -1251,7 +1235,7 @@ static struct token *handle_hash(struct token **p, struct token *arglist)
struct token *token = *p;
if (macro_funclike) {
struct token *next = token->next;
- if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
+ if (!try_arg(next, ARG_STR, arglist))
goto Equote;
next->pos.whitespace = token->pos.whitespace;
__free_token(token);
@@ -1273,7 +1257,7 @@ static struct token *handle_hashhash(struct token *token, struct token *arglist)
struct token *concat;
int state = match_op(token, ',');
- try_arg(token, TOKEN_QUOTED_ARGUMENT, arglist);
+ try_arg(token, ARG_QUOTED, arglist);
while (1) {
struct token *t;
@@ -1297,7 +1281,7 @@ static struct token *handle_hashhash(struct token *token, struct token *arglist)
return NULL;
}
- is_arg = try_arg(t, TOKEN_QUOTED_ARGUMENT, arglist);
+ is_arg = try_arg(t, ARG_QUOTED, arglist);
if (state == 1 && is_arg) {
state = is_arg;
@@ -1339,7 +1323,7 @@ static struct token *parse_expansion(struct token *expansion, struct token *argl
if (!token)
return NULL;
} else {
- try_arg(token, TOKEN_MACRO_ARGUMENT, arglist);
+ try_arg(token, ARG_NORMAL, arglist);
}
if (token_type(token) == TOKEN_ERROR)
goto Earg;
@@ -2007,7 +1991,7 @@ static int handle_nondirective(struct stream *stream, struct token **line, struc
static struct token *first_arg(struct arg *args)
{
- struct token *arg = args[0].arg;
+ struct token *arg = args[0].arg[ARG_QUOTED];
expand_list(&arg);
return arg;
}
@@ -2356,11 +2340,9 @@ static void dump_macro(struct symbol *sym)
case TOKEN_CONCAT:
printf("##");
break;
- case TOKEN_STR_ARGUMENT:
- printf("#");
- /* fall-through */
- case TOKEN_QUOTED_ARGUMENT:
case TOKEN_MACRO_ARGUMENT:
+ if (argkind(token) == ARG_STR)
+ printf("#");
printf("%s", show_ident(args[argnum(token)]));
break;
default:
diff --git a/token.h b/token.h
index fe7c7fe9..273da39a 100644
--- a/token.h
+++ b/token.h
@@ -100,8 +100,6 @@ enum token_type {
TOKEN_STREAMBEGIN,
TOKEN_STREAMEND,
TOKEN_MACRO_ARGUMENT,
- TOKEN_STR_ARGUMENT,
- TOKEN_QUOTED_ARGUMENT,
TOKEN_CONCAT,
TOKEN_GNU_KLUDGE,
TOKEN_UNTAINT,
@@ -177,8 +175,18 @@ struct argcount {
unsigned str:10;
};
+enum arg_kind {
+ ARG_QUOTED = 0,
+ ARG_NORMAL = 1,
+ ARG_STR = 2,
+};
+
+enum {
+ ARGNUM_BITS_STOLEN = 2
+};
+
enum {
- ARGNUM_BITS_STOLEN
+ ARGNUM_KIND_MASK = 3
};
/*
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 10/21] on-demand argument expansion
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (7 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 09/21] steal 2 bits from argnum for argument kind Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 11/21] kill create_arglist() Al Viro
` (10 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
Instead of calculating expanded and stringified forms of arguments before
we get to interpolating them into the body, do that on demand.
There are several subtle points involved:
* array of arguments ('args' in collect_arguments() and friends)
needs to be explicitly zeroed; then we can easily see whether we'd
already expanded or stringified the sucker. Better done as an explicit
memset(), and it's better off in collect_arguments() than in expand()
itself - that way we don't need to bother with that for macros that
don't have arguments in the first place. And yes, doing that as an
empty initializer in expand() where that VLA is declared does result in
a measurable slowdown...
* anti-recursion rules do not apply to argument expansion;
something like
#define A(x) x + 1
A(A(1))
should result in x + 1 + 1, not A(1) + 1. If expansion is done before
we mark 'A' and call substitute() that happens automatically; if we
delay it until substitute() runs into the first normal instance of an
argument, we need to remove the mark for duration of that expansion.
Not a problem, fortunately, since the set of marked symbols at the time
we get to TOKEN_MACRO_ARGUMENT will be the same as it used to be at the
time we enter substitute() - arguments can't contain any TOKEN_UNTAINT
(collect_arg() would eat all of those) and any taint added during
expand_list() will come with matching TOKEN_UNTAINT inserted into the
list. All such TOKEN_UNTAINT will be consumed before expand_list()
returns, restoring the original conditions. All we need to do is to
remove the taint from macro being substituted just before expanding an
argument and restore it right after that - it will do the right thing.
It makes sense to shift setting the taint from the caller of substitute()
where it's currently done into the very beginning of substitute() itself,
while we are at it.
* instead of using counters to determine if this form of argument
is not needed after this place just mark the last place where given
form is needed when we are parsing the body - it's easy enough to do.
The only subtlety here is that unexpanded argument is needed to calculate
an expanded form, so in
#define A(x) foo_##x = x
we can't cannibalize the unexpanded form of x until the second instance of
x in the body. Rules for unexpanded form are
1) it's needed for any unexpanded occurrence (obviously)
2) it's needed for the first expanded occurrence
3) it's needed for the first stringified occurrence
Fortunately, we don't need nothing non-trivial at #define time - no
separate passes, etc. Note that in real world most of the macros seen
in given compile unit are never expanded in it, so we need to keep the
handling of #define light - trading the overhead at expansion time for
overhead at definition time is a bad idea.
* since the counters are gone, we no longer need to pass arglist
all over the place; parsing side gets an array of struct arg_state,
which is where we keep the information about argument occurrences
through the parsing. Expansion side doesn't need arglist anymore -
the last place that used to need it was collect_arguments(), and only
to copy the counters. No more of that...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 159 ++++++++++++++++++++++++--------------------------
token.h | 8 ++-
2 files changed, 82 insertions(+), 85 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index b45688d5..bd049620 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -305,31 +305,23 @@ static struct token *collect_arg(struct token *prev, bool vararg, const struct p
struct arg {
struct token *arg[3];
- int count[3];
};
static int collect_arguments(struct token *start, struct symbol *sym, struct arg *args, struct token *what)
{
int fixed = sym->fixed_args;
bool vararg = sym->vararg;
- struct token *arglist = sym->arglist;
- struct argcount *p;
struct token *next = NULL, *v = NULL;
const char *err;
int commas;
- arglist = arglist->next; /* skip counter */
+ memset(args, 0, sizeof(struct arg) * (fixed + 1));
for (commas = 0; commas < fixed; commas++) {
next = collect_arg(start, false, &what->pos);
if (token_type(next) != TOKEN_SPECIAL)
goto Eclosing;
- p = &arglist->next->count;
- arglist = arglist->next->next;
args[commas].arg[ARG_QUOTED] = start->next;
- args[commas].count[ARG_NORMAL] = p->normal;
- args[commas].count[ARG_QUOTED] = p->quoted;
- args[commas].count[ARG_STR] = p->str;
if (!match_op(next, ',')) {
if (commas < fixed - 1)
goto Efew;
@@ -347,13 +339,8 @@ static int collect_arguments(struct token *start, struct symbol *sym, struct arg
}
if (v && !vararg)
goto Eexcess;
- if (vararg) {
- p = &arglist->next->count;
+ if (vararg)
args[fixed].arg[ARG_QUOTED] = v;
- args[fixed].count[ARG_NORMAL] = p->normal;
- args[fixed].count[ARG_QUOTED] = p->quoted;
- args[fixed].count[ARG_STR] = p->str;
- }
what->next = next->next;
return 1;
@@ -432,29 +419,6 @@ static struct token *stringify(struct token *arg)
return token;
}
-static void expand_arguments(int count, struct arg *args)
-{
- int i;
- for (i = 0; i < count; i++) {
- struct token *arg = args[i].arg[ARG_QUOTED];
- if (!arg)
- arg = &eof_token_entry;
- if (args[i].count[ARG_STR])
- args[i].arg[ARG_STR] = stringify(arg);
- if (args[i].count[ARG_NORMAL]) {
- if (!args[i].count[ARG_QUOTED]) {
- args[i].arg[ARG_NORMAL] = arg;
- args[i].arg[ARG_QUOTED] = NULL;
- } else if (eof_token(arg)) {
- args[i].arg[ARG_NORMAL] = arg;
- } else {
- args[i].arg[ARG_NORMAL] = dup_list(arg);
- }
- expand_list(&args[i].arg[ARG_NORMAL]);
- }
- }
-}
-
/*
* Possibly valid combinations:
* - ident + ident -> ident
@@ -651,11 +615,38 @@ static int handle_kludge(const struct token **p, struct arg *args)
}
}
+static struct token *do_argument(const struct token *body,
+ struct arg *args,
+ struct ident *expanding)
+{
+ struct token *arg = args[argnum(body)].arg[argkind(body)];
+ if (arg)
+ return arg;
+ arg = args[argnum(body)].arg[ARG_QUOTED];
+ if (!arg)
+ arg = &eof_token_entry;
+ if (argkind(body) == ARG_NORMAL) {
+ if (!eof_token(arg)) {
+ if (!(body->argnum & (1 << ARGNUM_CONSUME_EXPAND)))
+ arg = dup_list(arg);
+ expanding->tainted = 0;
+ expand_list(&arg);
+ expanding->tainted = 1;
+ }
+ return args[argnum(body)].arg[ARG_NORMAL] = arg;
+ }
+ if (argkind(body) == ARG_STR)
+ return args[argnum(body)].arg[ARG_STR] = stringify(arg);
+ return arg; // ARG_QUOTED
+}
+
static struct token **substitute(struct token **list, const struct token *body, struct arg *args)
{
struct position *base_pos = &(*list)->pos;
- int *count;
enum {Normal, Placeholder, Concat} state = Normal;
+ struct ident *expanding = (*list)->ident;
+
+ expanding->tainted = 1;
for (; !eof_token(body); body = body->next) {
struct token *added, *arg;
@@ -687,8 +678,7 @@ static struct token **substitute(struct token **list, const struct token *body,
break;
case TOKEN_MACRO_ARGUMENT:
- arg = args[argnum(body)].arg[argkind(body)];
- count = &args[argnum(body)].count[argkind(body)];
+ arg = do_argument(body, args, expanding);
if (!arg || eof_token(arg)) {
if (state == Concat)
state = Normal;
@@ -696,7 +686,7 @@ static struct token **substitute(struct token **list, const struct token *body,
state = Placeholder;
continue;
}
- if (!--*count)
+ if (body->argnum & (1 << ARGNUM_CONSUME))
tail = move_into(&added, arg);
else
tail = copy(&added, arg);
@@ -755,14 +745,11 @@ static int expand(struct token **list, struct symbol *sym)
return 1;
if (!collect_arguments(token->next, sym, args, token))
return 1;
- expand_arguments(nargs, args);
}
if (sym->expand)
return sym->expand(token, args) ? 0 : 1;
- expanding->tainted = 1;
-
last = token->next;
tail = substitute(list, expansion, args);
/*
@@ -1093,8 +1080,6 @@ Eargs:
static inline void set_arg_count(struct token *token)
{
token_type(token) = TOKEN_ARG_COUNT;
- token->count.normal = token->count.quoted =
- token->count.str = 0;
}
static struct token *parse_arguments(struct token *list)
@@ -1190,10 +1175,16 @@ Eva_args:
return NULL;
}
-static int try_arg(struct token *token, enum arg_kind kind, struct token *arglist)
+struct arg_state {
+ struct token *needs_raw;
+ struct token *needs_expanded;
+ struct token *needs_str;
+};
+
+static int try_arg(struct token *token, enum arg_kind kind, struct arg_state args[])
{
struct ident *ident = token->ident;
- int nr, n;
+ int nr;
if (!macro_funclike || token_type(token) != TOKEN_IDENT)
return 0;
@@ -1204,38 +1195,31 @@ static int try_arg(struct token *token, enum arg_kind kind, struct token *arglis
if (nr == macro_nargs)
return 0;
- arglist = arglist->next;
- for (int i = 0; i < nr; i++)
- arglist = arglist->next->next;
-
token->argnum = (nr << ARGNUM_BITS_STOLEN) | kind;
token_type(token) = TOKEN_MACRO_ARGUMENT;
switch (kind) {
- case ARG_NORMAL:
- n = ++arglist->next->count.normal;
- break;
case ARG_QUOTED:
- n = ++arglist->next->count.quoted;
+ args[nr].needs_raw = token;
break;
- default:
- n = ++arglist->next->count.str;
+ case ARG_NORMAL:
+ if (!args[nr].needs_expanded)
+ args[nr].needs_raw = token;
+ args[nr].needs_expanded = token;
+ break;
+ default: // ARG_STR
+ if (!args[nr].needs_str)
+ args[nr].needs_raw = token;
+ args[nr].needs_str = token;
}
- if (n)
- return nr == macro_vararg ? 2 : 1;
- /*
- * XXX - need saner handling of that
- * (>= 1024 instances of argument)
- */
- token_type(token) = TOKEN_ERROR;
- return -1;
+ return nr == macro_vararg ? 2 : 1;
}
-static struct token *handle_hash(struct token **p, struct token *arglist)
+static struct token *handle_hash(struct token **p, struct arg_state args[])
{
struct token *token = *p;
if (macro_funclike) {
struct token *next = token->next;
- if (!try_arg(next, ARG_STR, arglist))
+ if (!try_arg(next, ARG_STR, args))
goto Equote;
next->pos.whitespace = token->pos.whitespace;
__free_token(token);
@@ -1251,13 +1235,13 @@ Equote:
}
/* token->next is ## */
-static struct token *handle_hashhash(struct token *token, struct token *arglist)
+static struct token *handle_hashhash(struct token *token, struct arg_state args[])
{
struct token *last = token;
struct token *concat;
int state = match_op(token, ',');
- try_arg(token, ARG_QUOTED, arglist);
+ try_arg(token, ARG_QUOTED, args);
while (1) {
struct token *t;
@@ -1276,12 +1260,12 @@ static struct token *handle_hashhash(struct token *token, struct token *arglist)
goto Econcat;
if (match_op(t, '#')) {
- t = handle_hash(&concat->next, arglist);
+ t = handle_hash(&concat->next, args);
if (!t)
return NULL;
}
- is_arg = try_arg(t, ARG_QUOTED, arglist);
+ is_arg = try_arg(t, ARG_QUOTED, args);
if (state == 1 && is_arg) {
state = is_arg;
@@ -1304,8 +1288,9 @@ Econcat:
return NULL;
}
-static struct token *parse_expansion(struct token *expansion, struct token *arglist, struct ident *name)
+static struct token *parse_expansion(struct token *expansion, struct ident *name)
{
+ struct arg_state args[macro_nargs] = {};
struct token *token = expansion;
struct token **p;
@@ -1314,19 +1299,30 @@ static struct token *parse_expansion(struct token *expansion, struct token *argl
for (p = &expansion; !eof_token(token); p = &token->next, token = *p) {
if (match_op(token, '#')) {
- token = handle_hash(p, arglist);
+ token = handle_hash(p, args);
if (!token)
return NULL;
}
if (match_op(token->next, SPECIAL_HASHHASH)) {
- token = handle_hashhash(token, arglist);
+ token = handle_hashhash(token, args);
if (!token)
return NULL;
} else {
- try_arg(token, ARG_NORMAL, arglist);
+ try_arg(token, ARG_NORMAL, args);
+ }
+ }
+ for (int i = 0; i < macro_nargs; i++) {
+ if (args[i].needs_str)
+ args[i].needs_str->argnum |= 1 << ARGNUM_CONSUME;
+ if (args[i].needs_expanded)
+ args[i].needs_expanded->argnum |= 1 << ARGNUM_CONSUME;
+ if (args[i].needs_raw) {
+ struct token *p = args[i].needs_raw;
+ if (argkind(p) == ARG_QUOTED)
+ p->argnum |= 1 << ARGNUM_CONSUME;
+ else if (argkind(p) == ARG_NORMAL)
+ p->argnum |= 1 << ARGNUM_CONSUME_EXPAND;
}
- if (token_type(token) == TOKEN_ERROR)
- goto Earg;
}
token = alloc_token(&expansion->pos);
token_type(token) = TOKEN_UNTAINT;
@@ -1338,9 +1334,6 @@ static struct token *parse_expansion(struct token *expansion, struct token *argl
Econcat:
sparse_error(token->pos, "'##' cannot appear at the ends of macro expansion");
return NULL;
-Earg:
- sparse_error(token->pos, "too many instances of argument in body");
- return NULL;
}
static int do_define(struct position pos, struct token *token, struct ident *name,
@@ -1349,7 +1342,7 @@ static int do_define(struct position pos, struct token *token, struct ident *nam
struct symbol *sym;
int ret = 1;
- expansion = parse_expansion(expansion, arglist, name);
+ expansion = parse_expansion(expansion, name);
if (!expansion)
goto out;
diff --git a/token.h b/token.h
index 273da39a..b28ac2ca 100644
--- a/token.h
+++ b/token.h
@@ -182,13 +182,17 @@ enum arg_kind {
};
enum {
- ARGNUM_BITS_STOLEN = 2
+ ARGNUM_CONSUME = 2,
+ ARGNUM_CONSUME_EXPAND,
+ ARGNUM_BITS_STOLEN
};
enum {
- ARGNUM_KIND_MASK = 3
+ ARGNUM_KIND_MASK = (1 << ARGNUM_CONSUME) - 1
};
+// _Static_assert(ARGNUM_KIND_MASK >= ARG_STR)
+
/*
* This is a very common data structure, it should be kept
* as small as humanly possible. Big (rare) types go as
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 11/21] kill create_arglist()
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (8 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 10/21] on-demand argument expansion Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 12/21] stop mangling arglist, get rid of TOKEN_ARG_COUNT Al Viro
` (9 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
we don't need the fake arglist for __has_extension() and its ilk anymore;
just set the ->arglist to &eof_token_entry to indicate that arguments
are expected and set ->fixed_args and ->vararg to tell how much.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 37 +++++--------------------------------
1 file changed, 5 insertions(+), 32 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index bd049620..aaf60293 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -2071,36 +2071,6 @@ static bool expand_has_feature(struct token *token, struct arg *args)
return 1;
}
-static void create_arglist(struct symbol *sym, int count)
-{
- struct token *token;
- struct token **next;
-
- if (!count)
- return;
-
- token = __alloc_token(0);
- token_type(token) = TOKEN_ARG_COUNT;
- sym->arglist = token;
- sym->fixed_args = count;
- sym->vararg = 0;
- next = &token->next;
-
- while (count--) {
- struct token *id, *uses;
- id = __alloc_token(0);
- token_type(id) = TOKEN_IDENT;
- uses = __alloc_token(0);
- token_type(uses) = TOKEN_ARG_COUNT;
- uses->count.quoted = 1;
-
- *next = id;
- id->next = uses;
- next = &uses->next;
- }
- *next = &eof_token_entry;
-}
-
static void init_preprocessor(void)
{
int i;
@@ -2172,8 +2142,11 @@ static void init_preprocessor(void)
struct symbol *sym;
sym = create_symbol(stream, dynamic[i].name, SYM_NODE, NS_MACRO);
sym->expand_simple = dynamic[i].expand_simple;
- if ((sym->expand = dynamic[i].expand) != NULL)
- create_arglist(sym, 1);
+ if ((sym->expand = dynamic[i].expand) != NULL) {
+ sym->fixed_args = 1;
+ sym->vararg = false;
+ sym->arglist = &eof_token_entry;
+ }
}
counter_macro = 0;
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 12/21] stop mangling arglist, get rid of TOKEN_ARG_COUNT
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (9 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 11/21] kill create_arglist() Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 13/21] deal with ## on arguments separately Al Viro
` (8 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
Now it can be done - we no longer store the counters in arglist, so
there's no reason to mangle it. Just have it return the pointer to
closing ) on success and let the caller split the list at that point.
Simplifies both the parse_arguments() and dump_macro() and fixes
a bug in the latter - pre-C99 gcc vararg macros used to lose ... in
-dM output. They did work correctly, but dump_macro() output had
produced #define A(X,Y) instead of correct #define A(X,Y...)
Testcase added.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 47 +++++++---------------------
token.h | 9 ------
tokenize.c | 4 ---
validation/preprocessor/dump-macro.c | 4 ++-
4 files changed, 15 insertions(+), 49 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index aaf60293..a60ad687 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -1000,7 +1000,6 @@ static int token_different(struct token *t1, struct token *t2)
case TOKEN_IDENT:
different = t1->ident != t2->ident;
break;
- case TOKEN_ARG_COUNT:
case TOKEN_UNTAINT:
case TOKEN_CONCAT:
case TOKEN_GNU_KLUDGE:
@@ -1077,22 +1076,12 @@ Eargs:
return false;
}
-static inline void set_arg_count(struct token *token)
-{
- token_type(token) = TOKEN_ARG_COUNT;
-}
-
static struct token *parse_arguments(struct token *list)
{
struct token *arg = list->next, *next = list;
- set_arg_count(list);
-
- if (match_op(arg, ')')) {
- next = arg->next;
- list->next = &eof_token_entry;
- return next;
- }
+ if (match_op(arg, ')'))
+ return arg;
while (token_type(arg) == TOKEN_IDENT) {
if (arg->ident == &__VA_ARGS___ident)
@@ -1102,26 +1091,18 @@ static struct token *parse_arguments(struct token *list)
next = arg->next;
if (match_op(next, ',')) {
- set_arg_count(next);
arg = next->next;
continue;
}
- if (match_op(next, ')')) {
- set_arg_count(next);
- next = next->next;
- arg->next->next = &eof_token_entry;
+ if (match_op(next, ')'))
return next;
- }
/* normal cases are finished here */
if (match_op(next, SPECIAL_ELLIPSIS)) {
if (match_op(next->next, ')')) {
- set_arg_count(next);
macro_vararg = macro_nargs - 1;
- next = next->next;
- arg->next->next = &eof_token_entry;
return next->next;
}
@@ -1143,10 +1124,7 @@ static struct token *parse_arguments(struct token *list)
goto Enotclosed;
if (!macro_add_arg(arg->pos, &__VA_ARGS___ident))
return NULL;
- set_arg_count(next);
macro_vararg = macro_nargs - 1;
- next = next->next;
- arg->next->next = &eof_token_entry;
return next;
}
@@ -1483,14 +1461,19 @@ static int do_handle_define(struct stream *stream, struct token **line, struct t
expansion = left->next;
if (!expansion->pos.whitespace) {
if (match_op(expansion, '(')) {
- arglist = expansion;
- expansion = parse_arguments(expansion);
- if (!expansion) {
+ struct token *last = parse_arguments(expansion);
+ if (!last) {
macro_nargs = 0;
macro_vararg = -1;
return 1;
}
+ // last points to ) at the end of arguments,
+ // expansion starts right after that,
+ // everything up to that point is arglist.
macro_funclike = true;
+ arglist = expansion;
+ expansion = last->next;
+ last->next = &eof_token_entry;
} else if (!eof_token(expansion)) {
warning(expansion->pos,
"no whitespace before object-like macro body");
@@ -2281,20 +2264,14 @@ static void dump_macro(struct symbol *sym)
printf("#define %s", show_ident(sym->ident));
token = sym->arglist;
if (token) {
- const char *sep = "";
int narg = 0;
- putchar('(');
for (; !eof_token(token); token = token->next) {
- if (token_type(token) == TOKEN_ARG_COUNT)
- continue;
- printf("%s%s", sep, show_token(token));
+ printf("%s", show_token(token));
if (token_type(token) == TOKEN_IDENT)
args[narg++] = token->ident;
- sep = ",";
}
if (narg < nargs)
args[narg] = &__VA_ARGS___ident;
- putchar(')');
}
token = sym->expansion;
diff --git a/token.h b/token.h
index b28ac2ca..e469e02d 100644
--- a/token.h
+++ b/token.h
@@ -103,7 +103,6 @@ enum token_type {
TOKEN_CONCAT,
TOKEN_GNU_KLUDGE,
TOKEN_UNTAINT,
- TOKEN_ARG_COUNT,
TOKEN_IF,
TOKEN_SKIP_GROUPS,
TOKEN_ELSE,
@@ -168,13 +167,6 @@ struct string {
char data[];
};
-/* will fit into 32 bits */
-struct argcount {
- unsigned normal:10;
- unsigned quoted:10;
- unsigned str:10;
-};
-
enum arg_kind {
ARG_QUOTED = 0,
ARG_NORMAL = 1,
@@ -207,7 +199,6 @@ struct token {
unsigned int special;
struct string *string;
int argnum;
- struct argcount count;
char embedded[4];
};
};
diff --git a/tokenize.c b/tokenize.c
index 54ea348c..85bc3f49 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -241,10 +241,6 @@ const char *show_token(const struct token *token)
sprintf(buffer, "<untaint>");
return buffer;
- case TOKEN_ARG_COUNT:
- sprintf(buffer, "<argcnt>");
- return buffer;
-
default:
sprintf(buffer, "unhandled token type '%d' ", token_type(token));
return buffer;
diff --git a/validation/preprocessor/dump-macro.c b/validation/preprocessor/dump-macro.c
index 46d70b34..710c1027 100644
--- a/validation/preprocessor/dump-macro.c
+++ b/validation/preprocessor/dump-macro.c
@@ -1,9 +1,11 @@
#define A(X,Y,...) __VA_ARGS__,Y,X
+#define B(X,Y...) Y
/*
* check-name: -dM handling of varargs
- * check-command: sparse -E -dM $file | tail -1
+ * check-command: sparse -E -dM $file | tail -2
*
* check-output-start
#define A(X,Y,...) __VA_ARGS__,Y,X
+#define B(X,Y...) Y
* check-output-end
*/
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 13/21] deal with ## on arguments separately
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (10 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 12/21] stop mangling arglist, get rid of TOKEN_ARG_COUNT Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 14/21] preparations for __VA_OPT__ support: reshuffle argument slot assignments Al Viro
` (7 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
Adding/concatenating the chunks to growing expansion is done in the end
of loop body in substitute(); preceding switch leaves the data for it in
two variables - 'added' is the first token of the next chunk and 'tail'
points to the forward pointer in the last token of that chunk.
The only case when we might be adding more than one token is macro
argument; forcing it to use the same path as everything else complicates
things for no good reason, especially when it comes to concatenation.
Let the TOKEN_MACRO_ARGUMENT case deal with that stuff on its own.
In case of concatenation let it merge the first token before
copying/inserting the rest; that simplifies the common case and it
simplifies the data flow for everyone since we don't need to bother with
'tail' anymore.
As a side benefit, merge() is no longer inlined, which reduces the spills.
That chunk could go after __VA_OPT__ handling, but having it done first
simplifies the things for __VA_OPT__ (and especially for #__VA_OPT__()),
so let's put that one first.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 38 +++++++++++++++++++++++---------------
1 file changed, 23 insertions(+), 15 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index a60ad687..16cec8e1 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -650,7 +650,7 @@ static struct token **substitute(struct token **list, const struct token *body,
for (; !eof_token(body); body = body->next) {
struct token *added, *arg;
- struct token **tail;
+ struct token **inserted_at;
const struct token *t;
switch (token_type(body)) {
@@ -674,7 +674,6 @@ static struct token **substitute(struct token **list, const struct token *body,
}
added = dup_token(t, base_pos);
token_type(added) = TOKEN_SPECIAL;
- tail = &added->next;
break;
case TOKEN_MACRO_ARGUMENT:
@@ -686,13 +685,28 @@ static struct token **substitute(struct token **list, const struct token *body,
state = Placeholder;
continue;
}
+ if (state == Concat && merge(containing_token(list), arg)) {
+ arg = arg->next;
+ if (eof_token(arg)) {
+ // merged the sole token in
+ state = Normal;
+ continue;
+ }
+ inserted_at = NULL;
+ } else {
+ inserted_at = list;
+ }
if (body->argnum & (1 << ARGNUM_CONSUME))
- tail = move_into(&added, arg);
+ list = move_into(list, arg);
else
- tail = copy(&added, arg);
- added->pos.newline = body->pos.newline;
- added->pos.whitespace = body->pos.whitespace;
- break;
+ list = copy(list, arg);
+ if (inserted_at) {
+ struct token *p = *inserted_at;
+ p->pos.whitespace = body->pos.whitespace;
+ p->pos.newline = 0;
+ }
+ state = Normal;
+ continue;
case TOKEN_CONCAT:
if (state == Placeholder)
@@ -703,7 +717,6 @@ static struct token **substitute(struct token **list, const struct token *body,
default:
added = dup_token(body, base_pos);
- tail = &added->next;
break;
}
@@ -711,17 +724,12 @@ static struct token **substitute(struct token **list, const struct token *body,
* if we got to doing real concatenation, we already have
* added something into the list, so containing_token() is OK.
*/
- if (state == Concat && merge(containing_token(list), added)) {
- *list = added->next;
- if (tail != &added->next)
- list = tail;
- } else {
+ if (state != Concat || !merge(containing_token(list), added)) {
*list = added;
- list = tail;
+ list = &added->next;
}
state = Normal;
}
- *list = &eof_token_entry;
return list;
}
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 14/21] preparations for __VA_OPT__ support: reshuffle argument slot assignments
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (11 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 13/21] deal with ## on arguments separately Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 15/21] pre-process.c: split try_arg() Al Viro
` (6 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
Move the vararg to slot 0, with the non-vararg arguments in slots
1..fixed_args; for macros with vararg arguments leave slot 0 unused.
Rationale: handling of __VA_OPT__ at expansion time will need to locate
the vararg; having it always in the same slot makes life easier.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 34 ++++++++++++++++++----------------
1 file changed, 18 insertions(+), 16 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index 16cec8e1..fed3dc2a 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -321,7 +321,7 @@ static int collect_arguments(struct token *start, struct symbol *sym, struct arg
next = collect_arg(start, false, &what->pos);
if (token_type(next) != TOKEN_SPECIAL)
goto Eclosing;
- args[commas].arg[ARG_QUOTED] = start->next;
+ args[commas + 1].arg[ARG_QUOTED] = start->next;
if (!match_op(next, ',')) {
if (commas < fixed - 1)
goto Efew;
@@ -340,7 +340,7 @@ static int collect_arguments(struct token *start, struct symbol *sym, struct arg
if (v && !vararg)
goto Eexcess;
if (vararg)
- args[fixed].arg[ARG_QUOTED] = v;
+ args[0].arg[ARG_QUOTED] = v;
what->next = next->next;
return 1;
@@ -740,8 +740,7 @@ static int expand(struct token **list, struct symbol *sym)
struct ident *expanding = token->ident;
struct token **tail;
struct token *expansion = sym->expansion;
- int nargs = sym->fixed_args + sym->vararg;
- struct arg args[nargs];
+ struct arg args[sym->fixed_args + 1];
if (expanding->tainted) {
token->pos.noexpand = 1;
@@ -1181,6 +1180,7 @@ static int try_arg(struct token *token, enum arg_kind kind, struct arg_state arg
if (nr == macro_nargs)
return 0;
+ nr = nr == macro_vararg ? 0 : nr + 1;
token->argnum = (nr << ARGNUM_BITS_STOLEN) | kind;
token_type(token) = TOKEN_MACRO_ARGUMENT;
switch (kind) {
@@ -1197,7 +1197,7 @@ static int try_arg(struct token *token, enum arg_kind kind, struct arg_state arg
args[nr].needs_raw = token;
args[nr].needs_str = token;
}
- return nr == macro_vararg ? 2 : 1;
+ return nr == 0 ? 2 : 1;
}
static struct token *handle_hash(struct token **p, struct arg_state args[])
@@ -1276,7 +1276,8 @@ Econcat:
static struct token *parse_expansion(struct token *expansion, struct ident *name)
{
- struct arg_state args[macro_nargs] = {};
+ int slots = macro_nargs + (macro_vararg < 0);
+ struct arg_state args[slots] = {};
struct token *token = expansion;
struct token **p;
@@ -1297,7 +1298,7 @@ static struct token *parse_expansion(struct token *expansion, struct ident *name
try_arg(token, ARG_NORMAL, args);
}
}
- for (int i = 0; i < macro_nargs; i++) {
+ for (int i = 0; i < slots; i++) {
if (args[i].needs_str)
args[i].needs_str->argnum |= 1 << ARGNUM_CONSUME;
if (args[i].needs_expanded)
@@ -1975,7 +1976,7 @@ static int handle_nondirective(struct stream *stream, struct token **line, struc
static struct token *first_arg(struct arg *args)
{
- struct token *arg = args[0].arg[ARG_QUOTED];
+ struct token *arg = args[1].arg[ARG_QUOTED];
expand_list(&arg);
return arg;
}
@@ -2265,21 +2266,22 @@ struct token * preprocess(struct token *token)
static void dump_macro(struct symbol *sym)
{
- int nargs = sym->fixed_args + sym->vararg;
- struct ident *args[nargs];
+ int fixed_args = sym->fixed_args;
+ struct ident *args[fixed_args + 1];
struct token *token;
printf("#define %s", show_ident(sym->ident));
token = sym->arglist;
if (token) {
- int narg = 0;
- for (; !eof_token(token); token = token->next) {
+ args[0] = &__VA_ARGS___ident;
+ for (int n = 1; !eof_token(token); token = token->next) {
printf("%s", show_token(token));
- if (token_type(token) == TOKEN_IDENT)
- args[narg++] = token->ident;
+ if (token_type(token) == TOKEN_IDENT) {
+ args[n] = token->ident;
+ if (n++ == fixed_args)
+ n = 0;
+ }
}
- if (narg < nargs)
- args[narg] = &__VA_ARGS___ident;
}
token = sym->expansion;
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 15/21] pre-process.c: split try_arg()
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (12 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 14/21] preparations for __VA_OPT__ support: reshuffle argument slot assignments Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 16/21] __VA_OPT__: parsing Al Viro
` (5 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
more __VA_OPT__ preparations - we want to split the "parse the possible
variable" from the parts that are sensitive to the kind of variable
access (in particular, to subsequent ## being or not being there).
With __VA_OPT__ we'll have a possibility of relevant ## being a lot
further ahead than the next token and we won't find it until we'd parsed
the entire __VA_OPT__(.....).
We could check for __VA_OPT__ _before_ checking for arguments, but that
ends up screwing code generation a lot, slowing down the normal case
where we've not a single __VA_OPT__ in the input.
Replace try_arg() with two new primitives:
* check_arg() - returns 0 if the next token is not an argument;
if the token is an argument, it gets converted to TOKEN_MACRO_ARGUMENT and
slot number + 1 is returned. That function gets only token and arg_state
array - 'kind' is not known yet. At the moment 'args' is not needed,
but it will be needed for __VA_OPT__ handling, so that argument stays.
Note that unlike try_arg() we don't need a special return value to tell
vararg from non-vararg argument - the slot number is sufficient now.
It's a vararg if and only if it occupies slot 0, i.e. if check_arg()
has returned 1.
* seen_arg() - gets called only for TOKEN_MACRO_ARGUMENT token,
does the rest of what try_arg() used to do. Returns void.
Calls of try_arg() are replaced with combinations of these two, the
first try_arg() in handle_hashhash() lifted into the only caller of
handle_hashhash() and its check_arg() folded with the one we do for non-##
case there.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 37 ++++++++++++++++++++++++++-----------
1 file changed, 26 insertions(+), 11 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index fed3dc2a..51ad916c 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -1166,7 +1166,7 @@ struct arg_state {
struct token *needs_str;
};
-static int try_arg(struct token *token, enum arg_kind kind, struct arg_state args[])
+static int check_arg(struct token *token, struct arg_state args[])
{
struct ident *ident = token->ident;
int nr;
@@ -1181,8 +1181,14 @@ static int try_arg(struct token *token, enum arg_kind kind, struct arg_state arg
return 0;
nr = nr == macro_vararg ? 0 : nr + 1;
- token->argnum = (nr << ARGNUM_BITS_STOLEN) | kind;
+ token->argnum = nr << ARGNUM_BITS_STOLEN;
token_type(token) = TOKEN_MACRO_ARGUMENT;
+ return nr + 1;
+}
+
+static void seen_arg(struct token *token, enum arg_kind kind, struct arg_state args[], int nr)
+{
+ token->argnum |= kind;
switch (kind) {
case ARG_QUOTED:
args[nr].needs_raw = token;
@@ -1197,7 +1203,6 @@ static int try_arg(struct token *token, enum arg_kind kind, struct arg_state arg
args[nr].needs_raw = token;
args[nr].needs_str = token;
}
- return nr == 0 ? 2 : 1;
}
static struct token *handle_hash(struct token **p, struct arg_state args[])
@@ -1205,8 +1210,12 @@ static struct token *handle_hash(struct token **p, struct arg_state args[])
struct token *token = *p;
if (macro_funclike) {
struct token *next = token->next;
- if (!try_arg(next, ARG_STR, args))
+ int nr = check_arg(next, args);
+
+ if (!nr)
goto Equote;
+
+ seen_arg(next, ARG_STR, args, nr - 1);
next->pos.whitespace = token->pos.whitespace;
__free_token(token);
token = *p = next;
@@ -1226,12 +1235,10 @@ static struct token *handle_hashhash(struct token *token, struct arg_state args[
struct token *last = token;
struct token *concat;
int state = match_op(token, ',');
-
- try_arg(token, ARG_QUOTED, args);
+ int nr;
while (1) {
struct token *t;
- int is_arg;
/* eat duplicate ## */
concat = token->next;
@@ -1251,10 +1258,13 @@ static struct token *handle_hashhash(struct token *token, struct arg_state args[
return NULL;
}
- is_arg = try_arg(t, ARG_QUOTED, args);
+ nr = check_arg(t, args);
+ if (nr > 0)
+ seen_arg(t, ARG_QUOTED, args, nr - 1);
- if (state == 1 && is_arg) {
- state = is_arg;
+ if (state == 1 && nr > 0) {
+ if (nr == 1)
+ state = 2;
} else {
last = t;
state = match_op(t, ',');
@@ -1280,6 +1290,7 @@ static struct token *parse_expansion(struct token *expansion, struct ident *name
struct arg_state args[slots] = {};
struct token *token = expansion;
struct token **p;
+ int nr;
if (match_op(token, SPECIAL_HASHHASH))
goto Econcat;
@@ -1290,12 +1301,16 @@ static struct token *parse_expansion(struct token *expansion, struct ident *name
if (!token)
return NULL;
}
+ nr = check_arg(token, args);
if (match_op(token->next, SPECIAL_HASHHASH)) {
+ if (nr > 0)
+ seen_arg(token, ARG_QUOTED, args, nr - 1);
token = handle_hashhash(token, args);
if (!token)
return NULL;
} else {
- try_arg(token, ARG_NORMAL, args);
+ if (nr > 0)
+ seen_arg(token, ARG_NORMAL, args, nr - 1);
}
}
for (int i = 0; i < slots; i++) {
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 16/21] __VA_OPT__: parsing
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (13 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 15/21] pre-process.c: split try_arg() Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 17/21] expansion-time va_opt handling Al Viro
` (4 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
va-opt-replacement can occur in any place where a macro argument of a
vararg macro might. It consists of identifier __VA_OPT__, followed by
'(', a sequence of pp-tokens with balanced parentheses (body of that
va-opt-replacement) and finally a ')'.
Body of va-opt-replacement may not contain __VA_OPT__ and may not begin
or end with a ##.
At the expansion time va-opt-replacement is handled at the same stage as
argument substitution. What happens depends upon the value of __VA_ARGS__
- if it would expand to an empty token sequence, each va-opt-replacement
in the body is treated the same way as an occurrence of an empty argument
(replaced by an empty string literal if preceded by a # operator and by
placemarker token otherwise).
If __VA_ARGS__ does *not* expand to an empty token sequence, the body of
va-opt-replacement is subjected to argument substitution and # processing,
as if it had been the entire macro body. Leading and trailing whitespace
is stripped from the result. If va-opt-replacement is not preceded by
a # operator, the resulting list is substituted in its place. If it
*is* preceded by a # operator, the resulting list is subjected to ##
processing/placemarker removal and converted into a string literal token.
That token is substituted in place of # va-opt-replacement combination.
All of that is followed by usual processing of remaining ## operators and
placemarker removal (we are, of course, allowed to calculate the individual
token concatenations earlier, provided that end result is the same).
For non-stringified instances it's _almost_ the same as if all
va-opt-replacements had been replaced with their bodies in case when
__VA_ARGS__ expands to non-empty sequence of tokens; the only difference
is that ## next to va-opt-replacement does not suppress expansion of
arguments inside; for example
#define FOO BAR
#define A(X) X ## 1 // X is not expanded
#define B(X,...) __VA_OPT__(X) ## 1 // X is expanded
A(FOO)
B(FOO,_)
B(FOO)
yields
FOO1
BAR1
1
Any ## inside the va-opt-replacement still have the usual effect on the
adjacent macro arguments.
In other words, for non-stringified __VA_OPT__ we can simply
* parse its body as if it had been an entire macro (with the
usual handling of arguments)
* when substitute() gets to va-opt-replacement, check if expansion
of __VA_ARGS__ is empty
* if it is, just do what we do when seeing an empty argument,
otherwise switch to taking tokens to interpret from the body of that
va-opt-replacement until we reach its end, then proceed to interpret
the rest of the body of our macro.
For stringified __VA_OPT__ we need to save the state of interpreter (body,
list, state), switch to (body of va-opt-replacement, private list, Normal)
and once we are done stringify the private list, restore the saved state
and add the string token we've got to the main list, same as usual.
Note on whitespace handling: whitespace in front of the first token
coming from va-opt-replacement is _not_ affected by whatever whitespace
we might have between __VA_OPT__ and '(' or '(' and the body; only
the whitespace preceding the __VA_OPT__ itself matters.
Representation:
* new token types: TOKEN_VA_OPT and TOKEN_VA_OPT_STR; va-opt-replacement
and # va-opt-replacement resp. get converted to that, with the body +
surrounding parentheses stripped from the list and reference to the
opening parenthesis stored into ->va_opt_linkage of the converted __VA_OPT__
token.
Closing parenthesis is converted to TOKEN_VA_OPT; its ->next points to
eof_token_entry to make it distinguishable from the normal TOKEN_VA_OPT
and its ->va_opt_linkage points back to the originating TOKEN_VA_OPT or
TOKEN_VA_OPT_STR - basically, that will serve as return instruction.
We could add a separate token type for that, but that would only make
things more inconvenient at expansion time.
Note that in all cases ->va_opt_linkage points to the token immediately
preceding the ones we should proceed to; that will simplify life at
expansion time.
This commit contains the parser side of the things. Substitution side
is done in the next one.
* check_arg() taught to recognize and parse __VA_OPT__(...); returns -1
on failure and 0 (not an argument of macro) on success. Callers updated.
* dump_macro() and token_list_different() taught to handle those.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
ident-list.h | 1 +
pre-process.c | 233 ++++++++++++++++++-----
token.h | 3 +
validation/preprocessor/dump-macro.c | 4 +-
validation/preprocessor/va_opt_compare.c | 28 +++
validation/preprocessor/va_opt_parse.c | 37 ++++
6 files changed, 258 insertions(+), 48 deletions(-)
create mode 100644 validation/preprocessor/va_opt_compare.c
create mode 100644 validation/preprocessor/va_opt_parse.c
diff --git a/ident-list.h b/ident-list.h
index 3c08e8ca..556d4050 100644
--- a/ident-list.h
+++ b/ident-list.h
@@ -65,6 +65,7 @@ IDENT(c_generic_selections);
IDENT(c_static_assert);
__IDENT(pragma_ident, "__pragma__", 0);
__IDENT(__VA_ARGS___ident, "__VA_ARGS__", 0);
+__IDENT(__VA_OPT___ident, "__VA_OPT__", 0);
__IDENT(__func___ident, "__func__", 0);
__IDENT(__FUNCTION___ident, "__FUNCTION__", 0);
__IDENT(__PRETTY_FUNCTION___ident, "__PRETTY_FUNCTION__", 0);
diff --git a/pre-process.c b/pre-process.c
index 51ad916c..0f0dbc56 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -640,6 +640,11 @@ static struct token *do_argument(const struct token *body,
return arg; // ARG_QUOTED
}
+static bool is_end_va_opt(const struct token *token)
+{
+ return eof_token(token->next);
+}
+
static struct token **substitute(struct token **list, const struct token *body, struct arg *args)
{
struct position *base_pos = &(*list)->pos;
@@ -996,6 +1001,8 @@ static int handle_argv_include(struct stream *stream, struct token **list, struc
return handle_include_path(stream, list, token, 2);
}
+static int token_list_different(struct token *, struct token *);
+
static int token_different(struct token *t1, struct token *t2)
{
int different;
@@ -1039,6 +1046,29 @@ static int token_different(struct token *t1, struct token *t2)
different = memcmp(s1->data, s2->data, s1->length);
break;
}
+ case TOKEN_VA_OPT:
+ if (is_end_va_opt(t1)) {
+ /*
+ * t1 is a return (at the end of __VA_OPT__ body);
+ * the same should be true for t2 and that's it.
+ */
+ different = !is_end_va_opt(t2);
+ break;
+ }
+ /*
+ * t1 is a real __VA_OPT__; the same should be true for
+ * t2...
+ */
+ if (is_end_va_opt(t2)) {
+ different = 1;
+ break;
+ }
+ /* ... and their bodies should not be different */
+ /* fall-through */
+ case TOKEN_VA_OPT_STR:
+ different = token_list_different(t1->va_opt_linkage,
+ t2->va_opt_linkage);
+ break;
default:
different = 1;
break;
@@ -1083,6 +1113,13 @@ Eargs:
return false;
}
+static void misplaced_va_xxx(struct token *arg)
+{
+ sparse_error(arg->pos,
+ "%s can only appear in the expansion of a C99 variadic macro",
+ show_token(arg));
+}
+
static struct token *parse_arguments(struct token *list)
{
struct token *arg = list->next, *next = list;
@@ -1091,7 +1128,8 @@ static struct token *parse_arguments(struct token *list)
return arg;
while (token_type(arg) == TOKEN_IDENT) {
- if (arg->ident == &__VA_ARGS___ident)
+ if (arg->ident == &__VA_ARGS___ident ||
+ arg->ident == &__VA_OPT___ident)
goto Eva_args;
if (!macro_add_arg(arg->pos, arg->ident))
return NULL;
@@ -1156,7 +1194,7 @@ Enotclosed:
sparse_error(arg->pos, "missing ')' in macro parameter list");
return NULL;
Eva_args:
- sparse_error(arg->pos, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
+ misplaced_va_xxx(arg);
return NULL;
}
@@ -1166,24 +1204,84 @@ struct arg_state {
struct token *needs_str;
};
+static bool in_va_opt;
+
+static struct token **parse_body(struct token **list, struct arg_state args[]);
+
+static int parse_va_opt(struct token *token, struct arg_state args[])
+{
+ struct token **p = &token->next;
+ struct token *next = *p;
+ int nesting = 0;
+
+ if (macro_vararg < 0)
+ goto Evararg;
+ if (in_va_opt)
+ goto Enested;
+
+ if (!match_op(next, '('))
+ goto Eunterminated;
+ token_type(token) = TOKEN_VA_OPT;
+ token->va_opt_linkage = next;
+ next->next->pos.whitespace = token->pos.whitespace;
+ for (; !eof_token(next); p = &next->next, next = *p) {
+ if (token_type(next) != TOKEN_SPECIAL)
+ continue;
+ if (next->special == ')') {
+ if (!--nesting) {
+ *p = &eof_token_entry; // cut prior to that ')'
+ in_va_opt = true;
+ p = parse_body(&token->va_opt_linkage->next, args);
+ in_va_opt = false;
+ if (!p)
+ return -1;
+ // strip everything up to ')' from the list
+ token->next = next->next;
+ // convert the ')' into return
+ token_type(next) = TOKEN_VA_OPT;
+ next->va_opt_linkage = token;
+ next->next = &eof_token_entry;
+ // and reattach it to the end of body
+ *p = next;
+ return 0;
+ }
+ } else if (next->special == '(')
+ nesting++;
+ }
+Eunterminated:
+ sparse_error(token->pos, "unterminated __VA_OPT__");
+ return -1;
+
+Enested:
+ sparse_error(token->pos, "__VA_OPT__ may not appear in a __VA_OPT__");
+ return -1;
+Evararg:
+ misplaced_va_xxx(token);
+ return -1;
+}
+
static int check_arg(struct token *token, struct arg_state args[])
{
- struct ident *ident = token->ident;
+ struct ident *ident;
int nr;
- if (!macro_funclike || token_type(token) != TOKEN_IDENT)
+ if (!macro_nargs || token_type(token) != TOKEN_IDENT)
return 0;
+ ident = token->ident;
for (nr = 0; nr < macro_nargs && macro_arg_name[nr] != ident; nr++)
;
- if (nr == macro_nargs)
- return 0;
+ if (nr < macro_nargs) {
+ nr = nr == macro_vararg ? 0 : nr + 1;
+ token->argnum = nr << ARGNUM_BITS_STOLEN;
+ token_type(token) = TOKEN_MACRO_ARGUMENT;
+ return nr + 1;
+ }
- nr = nr == macro_vararg ? 0 : nr + 1;
- token->argnum = nr << ARGNUM_BITS_STOLEN;
- token_type(token) = TOKEN_MACRO_ARGUMENT;
- return nr + 1;
+ if (ident != &__VA_OPT___ident)
+ return 0;
+ return parse_va_opt(token, args);
}
static void seen_arg(struct token *token, enum arg_kind kind, struct arg_state args[], int nr)
@@ -1210,13 +1308,19 @@ static struct token *handle_hash(struct token **p, struct arg_state args[])
struct token *token = *p;
if (macro_funclike) {
struct token *next = token->next;
- int nr = check_arg(next, args);
-
- if (!nr)
- goto Equote;
+ int nr;
- seen_arg(next, ARG_STR, args, nr - 1);
next->pos.whitespace = token->pos.whitespace;
+
+ nr = check_arg(next, args);
+ if (nr < 0)
+ return NULL;
+ if (token_type(next) == TOKEN_MACRO_ARGUMENT)
+ seen_arg(next, ARG_STR, args, nr - 1);
+ else if (token_type(next) == TOKEN_VA_OPT)
+ token_type(next) = TOKEN_VA_OPT_STR;
+ else
+ goto Equote;
__free_token(token);
token = *p = next;
} else {
@@ -1259,6 +1363,8 @@ static struct token *handle_hashhash(struct token *token, struct arg_state args[
}
nr = check_arg(t, args);
+ if (nr < 0)
+ return NULL;
if (nr > 0)
seen_arg(t, ARG_QUOTED, args, nr - 1);
@@ -1284,24 +1390,24 @@ Econcat:
return NULL;
}
-static struct token *parse_expansion(struct token *expansion, struct ident *name)
+static struct token **parse_body(struct token **list, struct arg_state args[])
{
- int slots = macro_nargs + (macro_vararg < 0);
- struct arg_state args[slots] = {};
- struct token *token = expansion;
- struct token **p;
- int nr;
+ struct token *token = *list;
if (match_op(token, SPECIAL_HASHHASH))
goto Econcat;
- for (p = &expansion; !eof_token(token); p = &token->next, token = *p) {
+ while (!eof_token(token)) {
+ int nr;
+
if (match_op(token, '#')) {
- token = handle_hash(p, args);
+ token = handle_hash(list, args);
if (!token)
return NULL;
}
nr = check_arg(token, args);
+ if (nr < 0)
+ return NULL;
if (match_op(token->next, SPECIAL_HASHHASH)) {
if (nr > 0)
seen_arg(token, ARG_QUOTED, args, nr - 1);
@@ -1312,7 +1418,26 @@ static struct token *parse_expansion(struct token *expansion, struct ident *name
if (nr > 0)
seen_arg(token, ARG_NORMAL, args, nr - 1);
}
+ list = &token->next;
+ token = *list;
}
+ return list;
+
+Econcat:
+ sparse_error(token->pos, "'##' cannot appear at the ends of macro expansion");
+ return NULL;
+}
+
+static struct token *parse_expansion(struct token *expansion, struct ident *name)
+{
+ int slots = macro_nargs + (macro_vararg < 0);
+ struct arg_state args[slots] = {};
+ struct token **tail;
+ struct token *token;
+
+ tail = parse_body(&expansion, args);
+ if (!tail)
+ return NULL;
for (int i = 0; i < slots; i++) {
if (args[i].needs_str)
args[i].needs_str->argnum |= 1 << ARGNUM_CONSUME;
@@ -1329,13 +1454,9 @@ static struct token *parse_expansion(struct token *expansion, struct ident *name
token = alloc_token(&expansion->pos);
token_type(token) = TOKEN_UNTAINT;
token->ident = name;
- token->next = *p;
- *p = token;
+ token->next = &eof_token_entry;
+ *tail = token;
return expansion;
-
-Econcat:
- sparse_error(token->pos, "'##' cannot appear at the ends of macro expansion");
- return NULL;
}
static int do_define(struct position pos, struct token *token, struct ident *name,
@@ -2279,6 +2400,40 @@ struct token * preprocess(struct token *token)
return token;
}
+static void dump_body(struct token *token, struct ident *args[])
+{
+ bool first = true;
+ while (!eof_token(token) && token_type(token) != TOKEN_UNTAINT) {
+ struct token *next = token->next;
+ if (!first && token->pos.whitespace)
+ putchar(' ');
+ first = false;
+ switch (token_type(token)) {
+ case TOKEN_CONCAT:
+ printf("##");
+ break;
+ case TOKEN_MACRO_ARGUMENT:
+ if (argkind(token) == ARG_STR)
+ printf("#");
+ printf("%s", show_ident(args[argnum(token)]));
+ break;
+ default:
+ printf("%s", show_token(token));
+ break;
+ case TOKEN_VA_OPT_STR:
+ printf("#");
+ /* fall-through */
+ case TOKEN_VA_OPT:
+ if (is_end_va_opt(token))
+ break;
+ printf("__VA_OPT__(");
+ dump_body(token->va_opt_linkage->next, args);
+ printf(")");
+ }
+ token = next;
+ }
+}
+
static void dump_macro(struct symbol *sym)
{
int fixed_args = sym->fixed_args;
@@ -2298,26 +2453,10 @@ static void dump_macro(struct symbol *sym)
}
}
}
+ putchar(' ');
token = sym->expansion;
- while (token_type(token) != TOKEN_UNTAINT) {
- struct token *next = token->next;
- if (token->pos.whitespace)
- putchar(' ');
- switch (token_type(token)) {
- case TOKEN_CONCAT:
- printf("##");
- break;
- case TOKEN_MACRO_ARGUMENT:
- if (argkind(token) == ARG_STR)
- printf("#");
- printf("%s", show_ident(args[argnum(token)]));
- break;
- default:
- printf("%s", show_token(token));
- }
- token = next;
- }
+ dump_body(token, args);
putchar('\n');
}
diff --git a/token.h b/token.h
index e469e02d..3edf4ce1 100644
--- a/token.h
+++ b/token.h
@@ -102,6 +102,8 @@ enum token_type {
TOKEN_MACRO_ARGUMENT,
TOKEN_CONCAT,
TOKEN_GNU_KLUDGE,
+ TOKEN_VA_OPT,
+ TOKEN_VA_OPT_STR,
TOKEN_UNTAINT,
TOKEN_IF,
TOKEN_SKIP_GROUPS,
@@ -199,6 +201,7 @@ struct token {
unsigned int special;
struct string *string;
int argnum;
+ struct token *va_opt_linkage;
char embedded[4];
};
};
diff --git a/validation/preprocessor/dump-macro.c b/validation/preprocessor/dump-macro.c
index 710c1027..b0085840 100644
--- a/validation/preprocessor/dump-macro.c
+++ b/validation/preprocessor/dump-macro.c
@@ -1,11 +1,13 @@
#define A(X,Y,...) __VA_ARGS__,Y,X
#define B(X,Y...) Y
+#define C(...) __VA_OPT__(1 #__VA_ARGS__) #__VA_OPT__(1 __VA_ARGS__)
/*
* check-name: -dM handling of varargs
- * check-command: sparse -E -dM $file | tail -2
+ * check-command: sparse -E -dM $file | tail -3
*
* check-output-start
#define A(X,Y,...) __VA_ARGS__,Y,X
#define B(X,Y...) Y
+#define C(...) __VA_OPT__(1 #__VA_ARGS__) #__VA_OPT__(1 __VA_ARGS__)
* check-output-end
*/
diff --git a/validation/preprocessor/va_opt_compare.c b/validation/preprocessor/va_opt_compare.c
new file mode 100644
index 00000000..ad15cabe
--- /dev/null
+++ b/validation/preprocessor/va_opt_compare.c
@@ -0,0 +1,28 @@
+#define OK1(X,...) __VA_OPT__(X =)
+#define OK1(X,...) __VA_OPT__(X =)
+#define OK2(X,...) #__VA_OPT__(X =)
+#define OK2(X,...) #__VA_OPT__(X =)
+#define BAD1(X,...) __VA_OPT__(X)
+#define BAD1(X,...) __VA_OPT__(_)
+#define BAD2(X,...) __VA_OPT__(,)
+#define BAD2(X,...) ,
+#define BAD3(X,...) __VA_OPT__(,)
+#define BAD3(X,...) #__VA_OPT__(,)
+/*
+ * check-name: __VA_OPT__ comparison
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+
+ * check-output-end
+ *
+ * check-error-start
+preprocessor/va_opt_compare.c:6:9: warning: preprocessor token BAD1 redefined
+preprocessor/va_opt_compare.c:5:9: this was the original definition
+preprocessor/va_opt_compare.c:8:9: warning: preprocessor token BAD2 redefined
+preprocessor/va_opt_compare.c:7:9: this was the original definition
+preprocessor/va_opt_compare.c:10:9: warning: preprocessor token BAD3 redefined
+preprocessor/va_opt_compare.c:9:9: this was the original definition
+ * check-error-end
+ */
diff --git a/validation/preprocessor/va_opt_parse.c b/validation/preprocessor/va_opt_parse.c
new file mode 100644
index 00000000..4eb8675d
--- /dev/null
+++ b/validation/preprocessor/va_opt_parse.c
@@ -0,0 +1,37 @@
+#define A(__VA_OPT__)
+#define B(X) __VA_OPT__(_)
+#define C(X,...) __VA_OPT__(__VA_OPT__(_))
+#define D(X,...) __VA_OPT__
+#define E(X,...) __VA_OPT__(_
+#define OK(X,...) __VA_OPT__()
+#define OK2(X,...) __VA_OPT__(,(,,),)
+#define F(X,...) __VA_OPT__(,(,,,)
+#define OK3(X,...) __VA_OPT__(,(,,),))
+#define G1(...) __VA_OPT__(##)
+#define G2(...) __VA_OPT__(##,)
+#define G3(...) __VA_OPT__(,##)
+#define H(...) __VA_OPT__(#1)
+#define OK4(X,...) __VA_OPT__(__VA_ARGS__,#X)
+#define OK5(X,...) #__VA_OPT__(__VA_ARGS__,#X)
+/*
+ * check-name: __VA_OPT__ parsing
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+
+ * check-output-end
+ *
+ * check-error-start
+preprocessor/va_opt_parse.c:1:11: error: __VA_OPT__ can only appear in the expansion of a C99 variadic macro
+preprocessor/va_opt_parse.c:2:14: error: __VA_OPT__ can only appear in the expansion of a C99 variadic macro
+preprocessor/va_opt_parse.c:3:29: error: __VA_OPT__ may not appear in a __VA_OPT__
+preprocessor/va_opt_parse.c:4:18: error: unterminated __VA_OPT__
+preprocessor/va_opt_parse.c:5:18: error: unterminated __VA_OPT__
+preprocessor/va_opt_parse.c:8:18: error: unterminated __VA_OPT__
+preprocessor/va_opt_parse.c:10:28: error: '##' cannot appear at the ends of macro expansion
+preprocessor/va_opt_parse.c:11:28: error: '##' cannot appear at the ends of macro expansion
+preprocessor/va_opt_parse.c:12:29: error: '##' cannot appear at the ends of macro expansion
+preprocessor/va_opt_parse.c:13:27: error: '#' is not followed by a macro parameter
+ * check-error-end
+ */
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 17/21] expansion-time va_opt handling
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (14 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 16/21] __VA_OPT__: parsing Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 18/21] merge(): saner handling of ->noexpand Al Viro
` (3 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
Teach the interpreter (== substitute()) to handle TOKEN_VA_OPT and
TOKEN_VA_OPT_STR.
Two tricky parts, both related to calculating when an argument can be
consumed. One is that in situation like
#define A(x,...) __VA_OPT__(x) foo_##x x
we might end up doing expansion of x either at the 1st occurrence (inside
__VA_OPT__) or at the 1st one _not_ inside __VA_OPT__ (the 3rd one in
in this example). So at parsing time we need to keep track of whether
we'd already seen an unconditional use of expanded form and similarly
for stringified one.
Another is that getting to the first __VA_OPT__ means that
we need to find out whether the expanded form of __VA_ARGS__ is empty.
If there'd been a prior expanding occurrence of __VA_ARGS__, we are
fine; if there hadn't, we need to make sure that unexpanded form
survives at least until that point.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 106 +++++++++++++++++++-
validation/preprocessor/va_opt.c | 54 ++++++++++
validation/preprocessor/va_opt2.c | 34 +++++++
validation/preprocessor/va_opt_whitespace.c | 14 +++
4 files changed, 204 insertions(+), 4 deletions(-)
create mode 100644 validation/preprocessor/va_opt.c
create mode 100644 validation/preprocessor/va_opt2.c
create mode 100644 validation/preprocessor/va_opt_whitespace.c
diff --git a/pre-process.c b/pre-process.c
index 0f0dbc56..eec0569c 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -419,6 +419,18 @@ static struct token *stringify(struct token *arg)
return token;
}
+static struct token *empty_string(const struct position *pos)
+{
+ struct token *token = __alloc_token(0);
+ static struct string empty = {.immutable = 1, .length = 1, .data = ""};
+
+ token->pos = *pos;
+ token_type(token) = TOKEN_STRING;
+ token->string = ∅
+ token->next = &eof_token_entry;
+ return token;
+}
+
/*
* Possibly valid combinations:
* - ident + ident -> ident
@@ -645,11 +657,28 @@ static bool is_end_va_opt(const struct token *token)
return eof_token(token->next);
}
+static bool skip_va_opt(struct arg *args, struct ident *expanding)
+{
+ struct token *arg = args[0].arg[ARG_NORMAL];
+ if (arg)
+ return eof_token(arg);
+ arg = args[0].arg[ARG_QUOTED];
+ if (!arg || eof_token(arg))
+ return true;
+ arg = dup_list(arg);
+ expanding->tainted = 0;
+ expand_list(&arg);
+ expanding->tainted = 1;
+ args[0].arg[ARG_NORMAL] = arg;
+ return eof_token(arg);
+}
+
static struct token **substitute(struct token **list, const struct token *body, struct arg *args)
{
struct position *base_pos = &(*list)->pos;
- enum {Normal, Placeholder, Concat} state = Normal;
+ enum {Normal, Placeholder, Concat} state = Normal, saved_state = Normal;
struct ident *expanding = (*list)->ident;
+ struct token **saved_list = NULL, *va_opt_list;
expanding->tainted = 1;
@@ -720,6 +749,47 @@ static struct token **substitute(struct token **list, const struct token *body,
state = Concat;
continue;
+ case TOKEN_VA_OPT:
+ // entering va_opt?
+ if (!is_end_va_opt(body)) {
+ if (skip_va_opt(args, expanding)) {
+ if (state == Concat)
+ state = Normal;
+ else
+ state = Placeholder;
+ continue;
+ }
+ body = body->va_opt_linkage;
+ continue;
+ }
+ body = body->va_opt_linkage;
+ // leaving va_opt?
+ if (token_type(body) == TOKEN_VA_OPT)
+ continue;
+ // leaving #va_opt
+ if (list == &va_opt_list) {
+ added = empty_string(base_pos);
+ } else {
+ *list = &eof_token_entry;
+ added = stringify(va_opt_list);
+ }
+ list = saved_list;
+ state = saved_state;
+ break;
+
+ case TOKEN_VA_OPT_STR:
+ // entering #va_opt
+ if (!skip_va_opt(args, expanding)) {
+ saved_state = state;
+ state = Normal;
+ saved_list = list;
+ list = &va_opt_list;
+ body = body->va_opt_linkage;
+ continue;
+ }
+ added = empty_string(base_pos);
+ break;
+
default:
added = dup_token(body, base_pos);
break;
@@ -1202,9 +1272,11 @@ struct arg_state {
struct token *needs_raw;
struct token *needs_expanded;
struct token *needs_str;
+ bool seen_uncond_expand;
+ bool seen_uncond_str;
};
-static bool in_va_opt;
+static bool in_va_opt, seen_va_opt;
static struct token **parse_body(struct token **list, struct arg_state args[]);
@@ -1221,6 +1293,23 @@ static int parse_va_opt(struct token *token, struct arg_state args[])
if (!match_op(next, '('))
goto Eunterminated;
+ if (!seen_va_opt) {
+ /*
+ * The first __VA_OPT__() will need an expanded __VA_ARGS__.
+ * if we had no prior expanded occurrences of __VA_ARGS__,
+ * we'll need its unexpanded form to survive until that point.
+ * Only the cannibalization of unexpended form needs to be
+ * prevented; cannibalization of expanded form doesn't matter.
+ * We only want to know if it's an empty list, i.e. equal to
+ * &eof_token_entry, and the pointer stored in struct args
+ * ->arg[ARG_NORMAL] doesn't change when we get to the last
+ * expanded occurrence of __VA_ARGS__ and consume the list
+ * it's pointing to.
+ */
+ if (!args[0].needs_expanded)
+ args[0].needs_raw = token;
+ seen_va_opt = true;
+ }
token_type(token) = TOKEN_VA_OPT;
token->va_opt_linkage = next;
next->next->pos.whitespace = token->pos.whitespace;
@@ -1292,13 +1381,19 @@ static void seen_arg(struct token *token, enum arg_kind kind, struct arg_state a
args[nr].needs_raw = token;
break;
case ARG_NORMAL:
- if (!args[nr].needs_expanded)
+ if (!args[nr].seen_uncond_expand &&
+ (!in_va_opt || !args[nr].needs_expanded)) {
+ args[nr].seen_uncond_expand = !in_va_opt;
args[nr].needs_raw = token;
+ }
args[nr].needs_expanded = token;
break;
default: // ARG_STR
- if (!args[nr].needs_str)
+ if (!args[nr].seen_uncond_str &&
+ (!in_va_opt || !args[nr].needs_str)) {
+ args[nr].seen_uncond_str = !in_va_opt;
args[nr].needs_raw = token;
+ }
args[nr].needs_str = token;
}
}
@@ -1436,6 +1531,7 @@ static struct token *parse_expansion(struct token *expansion, struct ident *name
struct token *token;
tail = parse_body(&expansion, args);
+ seen_va_opt = false;
if (!tail)
return NULL;
for (int i = 0; i < slots; i++) {
@@ -1445,6 +1541,8 @@ static struct token *parse_expansion(struct token *expansion, struct ident *name
args[i].needs_expanded->argnum |= 1 << ARGNUM_CONSUME;
if (args[i].needs_raw) {
struct token *p = args[i].needs_raw;
+ if (token_type(p) != TOKEN_MACRO_ARGUMENT)
+ continue;
if (argkind(p) == ARG_QUOTED)
p->argnum |= 1 << ARGNUM_CONSUME;
else if (argkind(p) == ARG_NORMAL)
diff --git a/validation/preprocessor/va_opt.c b/validation/preprocessor/va_opt.c
new file mode 100644
index 00000000..4fa38794
--- /dev/null
+++ b/validation/preprocessor/va_opt.c
@@ -0,0 +1,54 @@
+#define LPAREN() (
+#define G(Q) 42
+#define F(R, X, ...) __VA_OPT__(G R X) )
+int x = F(LPAREN(), 0, <:-); // replaced by int x = 42;
+#undef F
+#undef G
+#define F(...) f(0 __VA_OPT__(,) __VA_ARGS__)
+#define G(X, ...) f(0, X __VA_OPT__(,) __VA_ARGS__)
+#define SDEF(sname, ...) S sname __VA_OPT__(= { __VA_ARGS__ })
+#define EMP
+F(a, b, c) // replaced by f(0, a, b, c)
+F() // replaced by f(0)
+F(EMP) // replaced by f(0)
+G(a, b, c) // replaced by f(0, a, b, c)
+G(a, ) // replaced by f(0, a)
+G(a) // replaced by f(0, a)
+SDEF(foo); // replaced by S foo;
+SDEF(bar, 1, 2); // replaced by S bar = { 1, 2 };
+// may not appear at the beginning of a replacement
+// list (6.10.5.3)
+#define H2(X, Y, ...) __VA_OPT__(X ## Y,) __VA_ARGS__
+H2(a, b, c, d) // replaced by ab, c, d
+#define H3(X, ...) #__VA_OPT__(X##X X##X)
+H3(, 0) // replaced by ""
+#define H4(X, ...) __VA_OPT__(a X ## X) ## b
+H4(, 1) // replaced by a b
+#define H5A(...) __VA_OPT__()/**/__VA_OPT__()
+#define H5B(X) a ## X ## b
+#define H5C(X) H5B(X)
+H5C(H5A()) // replaced by ab
+/*
+ * check-name: __VA_OPT__ expansion (examples from C23)
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+int x = 42;
+f(0 , a, b, c)
+f(0)
+f(0)
+f(0, a , b, c)
+f(0, a)
+f(0, a)
+S foo;
+S bar = { 1, 2 };
+ab, c, d
+""
+a b
+ab
+ * check-output-end
+ *
+ * check-error-start
+ * check-error-end
+ */
diff --git a/validation/preprocessor/va_opt2.c b/validation/preprocessor/va_opt2.c
new file mode 100644
index 00000000..5523301e
--- /dev/null
+++ b/validation/preprocessor/va_opt2.c
@@ -0,0 +1,34 @@
+#define B(X) 1
+// don't screw unexpanded __VA_ARGS__ on prior __VA_OPT__
+#define A(...) __VA_OPT__(1) A##__VA_ARGS__
+A(B(_))
+// tests for skipping __VA_OPT__ don't care if expanded __VA_ARGS__
+// has been already consumed
+#define C(...) [__VA_ARGS__ __VA_OPT__(1)]
+C(_)
+C()
+// don't cannibalize unexpanded __VA_ARGS__ too early
+#define E(X)
+#define D(...) A##__VA_ARGS__ R __VA_OPT__(1)
+D(E(_))
+// check that parser clears seen_va_opt on failure exit
+#define BAD(...) __VA_OPT__(,) #1
+#define F(...) A##__VA_ARGS__ R __VA_OPT__(1)
+F(E(_))
+/*
+ * check-name: __VA_ARGS__ cannibalization with __VA_OPT__
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+1 AB(_)
+[_ 1]
+[]
+AE(_) R
+AE(_) R
+ * check-output-end
+ *
+ * check-error-start
+preprocessor/va_opt2.c:15:32: error: '#' is not followed by a macro parameter
+ * check-error-end
+ */
diff --git a/validation/preprocessor/va_opt_whitespace.c b/validation/preprocessor/va_opt_whitespace.c
new file mode 100644
index 00000000..727327f0
--- /dev/null
+++ b/validation/preprocessor/va_opt_whitespace.c
@@ -0,0 +1,14 @@
+#define A(X,...) [__VA_OPT__( X)][ __VA_OPT__(X)]
+A(1,_)
+/*
+ * check-name: __VA_OPT__ whitespace
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+[1][ 1]
+ * check-output-end
+ *
+ * check-error-start
+ * check-error-end
+ */
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 18/21] merge(): saner handling of ->noexpand
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (15 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 17/21] expansion-time va_opt handling Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 19/21] simplify the calling conventions of collect_arguments() Al Viro
` (2 subsequent siblings)
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
We only care about noexpand for identifiers and solitary #; the latter
can't occur in merge(), the former should just get ->noexpand set
according to ->ident->tainted. That eliminates the last remaining
possibility of having expand() run into a token that has tainted
identifier - the regular noexpand check in the caller is sufficient now.
Should've done that all way back in 2004...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 10 +---------
1 file changed, 1 insertion(+), 9 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index eec0569c..352f02df 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -507,7 +507,7 @@ static int merge(struct token *left, struct token *right)
switch (res) {
case TOKEN_IDENT:
left->ident = built_in_ident(buffer);
- left->pos.noexpand = 0;
+ left->pos.noexpand = left->ident->tainted;
return 1;
case TOKEN_NUMBER:
@@ -529,13 +529,11 @@ static int merge(struct token *left, struct token *right)
case TOKEN_WIDE_CHAR:
case TOKEN_WIDE_STRING:
token_type(left) = res;
- left->pos.noexpand = 0;
left->string = right->string;
return 1;
case TOKEN_WIDE_CHAR_EMBEDDED_0 ... TOKEN_WIDE_CHAR_EMBEDDED_3:
token_type(left) = res;
- left->pos.noexpand = 0;
memcpy(left->embedded, right->embedded, 4);
return 1;
@@ -812,16 +810,10 @@ static int expand(struct token **list, struct symbol *sym)
{
struct token *last;
struct token *token = *list;
- struct ident *expanding = token->ident;
struct token **tail;
struct token *expansion = sym->expansion;
struct arg args[sym->fixed_args + 1];
- if (expanding->tainted) {
- token->pos.noexpand = 1;
- return 1;
- }
-
if (sym->arglist) {
if (!match_op(scan_next(&token->next), '('))
return 1;
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 19/21] simplify the calling conventions of collect_arguments()
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (16 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 18/21] merge(): saner handling of ->noexpand Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 20/21] make expand_one_symbol() inline Al Viro
2026-03-16 7:04 ` [PATCH 21/21] substitute(): convert switch() into cascade of ifs Al Viro
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
Currently we call that only after having verified that macro name is
followed by the (, with those two tokens passed as separate arguments.
What's more, collect_arguments() already can tell the caller "don't
expand that" if the arguments are malformed, so there's no reason not to
move the check for opening parenthesis into collect_arguments() - that
makes the calling conventions simpler and it does not incur any cost -
collect_arguments() is going to be inlined into its sole caller anyway.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index 352f02df..73f4d615 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -307,16 +307,17 @@ struct arg {
struct token *arg[3];
};
-static int collect_arguments(struct token *start, struct symbol *sym, struct arg *args, struct token *what)
+static int collect_arguments(struct token *what, int fixed, bool vararg, struct arg *args)
{
- int fixed = sym->fixed_args;
- bool vararg = sym->vararg;
+ struct token *start = scan_next(&what->next);
struct token *next = NULL, *v = NULL;
const char *err;
int commas;
memset(args, 0, sizeof(struct arg) * (fixed + 1));
+ if (!match_op(start, '('))
+ return 0;
for (commas = 0; commas < fixed; commas++) {
next = collect_arg(start, false, &what->pos);
if (token_type(next) != TOKEN_SPECIAL)
@@ -355,7 +356,7 @@ Eexcess:
Eclosing:
err = "unterminated argument list invoking";
out:
- sparse_error(what->pos, "%s macro \"%s\"", err, show_ident(sym->ident));
+ sparse_error(what->pos, "%s macro \"%s\"", err, show_ident(what->ident));
what->next = next;
return 0;
}
@@ -808,23 +809,20 @@ static struct token **substitute(struct token **list, const struct token *body,
static int expand(struct token **list, struct symbol *sym)
{
- struct token *last;
+ struct token *next;
struct token *token = *list;
struct token **tail;
struct token *expansion = sym->expansion;
struct arg args[sym->fixed_args + 1];
- if (sym->arglist) {
- if (!match_op(scan_next(&token->next), '('))
- return 1;
- if (!collect_arguments(token->next, sym, args, token))
- return 1;
- }
+ if (sym->arglist &&
+ !collect_arguments(token, sym->fixed_args, sym->vararg, args))
+ return 1;
if (sym->expand)
return sym->expand(token, args) ? 0 : 1;
- last = token->next;
+ next = token->next;
tail = substitute(list, expansion, args);
/*
* Note that it won't be eof - at least TOKEN_UNTAINT will be there.
@@ -834,7 +832,7 @@ static int expand(struct token **list, struct symbol *sym)
*/
(*list)->pos.newline = token->pos.newline;
(*list)->pos.whitespace = token->pos.whitespace;
- *tail = last;
+ *tail = next;
return 0;
}
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 20/21] make expand_one_symbol() inline
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (17 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 19/21] simplify the calling conventions of collect_arguments() Al Viro
@ 2026-03-16 7:04 ` Al Viro
2026-03-16 7:04 ` [PATCH 21/21] substitute(): convert switch() into cascade of ifs Al Viro
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
better code generation that way...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pre-process.c b/pre-process.c
index 73f4d615..728feeb3 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -206,7 +206,7 @@ static void expand_include_level(struct token *token)
replace_with_integer(token, include_level - 1);
}
-static int expand_one_symbol(struct token **list)
+static inline int expand_one_symbol(struct token **list)
{
struct token *token = *list;
struct symbol *sym;
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 21/21] substitute(): convert switch() into cascade of ifs
2026-03-16 7:03 ` [PATCH 01/21] split copy() into "need to copy" and "can move in place" cases Al Viro
` (18 preceding siblings ...)
2026-03-16 7:04 ` [PATCH 20/21] make expand_one_symbol() inline Al Viro
@ 2026-03-16 7:04 ` Al Viro
19 siblings, 0 replies; 42+ messages in thread
From: Al Viro @ 2026-03-16 7:04 UTC (permalink / raw)
To: linux-sparse; +Cc: chriscli, torvalds, zxh, ben.dooks, dan.carpenter, rf
Again, better code generation that way (and I'd like to use likely()
here); it *is* in a very hot loop.
Reorder the TOKEN_... a bit (move TOKEN_UNTAINT up, so that it's less than
TOKEN_MACRO_ARGUMENT) to get the default (and by far the most common case)
via single comparison.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
pre-process.c | 68 +++++++++++++++++++++++----------------------------
token.h | 3 ++-
2 files changed, 33 insertions(+), 38 deletions(-)
diff --git a/pre-process.c b/pre-process.c
index 728feeb3..ea199a9a 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -682,34 +682,14 @@ static struct token **substitute(struct token **list, const struct token *body,
expanding->tainted = 1;
for (; !eof_token(body); body = body->next) {
- struct token *added, *arg;
- struct token **inserted_at;
- const struct token *t;
+ struct token *added;
- switch (token_type(body)) {
- case TOKEN_GNU_KLUDGE:
- /*
- * GNU kludge: if we had <comma>##<vararg>, behaviour
- * depends on whether we had enough arguments to have
- * a vararg. If we did, ## is just ignored. Otherwise
- * both , and ## are ignored. Worse, there can be
- * an arbitrary number of ##<arg> in between; if all of
- * those are empty, we act as if they hadn't been there,
- * otherwise we act as if the kludge didn't exist.
- */
- t = body;
- if (handle_kludge(&body, args)) {
- if (state == Concat)
- state = Normal;
- else
- state = Placeholder;
- continue;
- }
- added = dup_token(t, base_pos);
- token_type(added) = TOKEN_SPECIAL;
- break;
+ if (token_type(body) <= TOKEN_LAST_NORMAL) {
+ added = dup_token(body, base_pos);
+ } else if (token_type(body) == TOKEN_MACRO_ARGUMENT) {
+ struct token **inserted_at;
+ struct token *arg;
- case TOKEN_MACRO_ARGUMENT:
arg = do_argument(body, args, expanding);
if (!arg || eof_token(arg)) {
if (state == Concat)
@@ -740,15 +720,33 @@ static struct token **substitute(struct token **list, const struct token *body,
}
state = Normal;
continue;
-
- case TOKEN_CONCAT:
+ } else if (token_type(body) == TOKEN_CONCAT) {
if (state == Placeholder)
state = Normal;
else
state = Concat;
continue;
-
- case TOKEN_VA_OPT:
+ } else if (token_type(body) == TOKEN_GNU_KLUDGE) {
+ const struct token *t = body;
+ /*
+ * GNU kludge: if we had <comma>##<vararg>, behaviour
+ * depends on whether we had enough arguments to have
+ * a vararg. If we did, ## is just ignored. Otherwise
+ * both , and ## are ignored. Worse, there can be
+ * an arbitrary number of ##<arg> in between; if all of
+ * those are empty, we act as if they hadn't been there,
+ * otherwise we act as if the kludge didn't exist.
+ */
+ if (handle_kludge(&body, args)) {
+ if (state == Concat)
+ state = Normal;
+ else
+ state = Placeholder;
+ continue;
+ }
+ added = dup_token(t, base_pos);
+ token_type(added) = TOKEN_SPECIAL;
+ } else if (token_type(body) == TOKEN_VA_OPT) {
// entering va_opt?
if (!is_end_va_opt(body)) {
if (skip_va_opt(args, expanding)) {
@@ -774,9 +772,7 @@ static struct token **substitute(struct token **list, const struct token *body,
}
list = saved_list;
state = saved_state;
- break;
-
- case TOKEN_VA_OPT_STR:
+ } else if (token_type(body) == TOKEN_VA_OPT_STR) {
// entering #va_opt
if (!skip_va_opt(args, expanding)) {
saved_state = state;
@@ -787,10 +783,8 @@ static struct token **substitute(struct token **list, const struct token *body,
continue;
}
added = empty_string(base_pos);
- break;
-
- default:
- added = dup_token(body, base_pos);
+ } else {
+ sparse_error(body->pos, "bad token type(%d)", token_type(body));
break;
}
diff --git a/token.h b/token.h
index 3edf4ce1..5915d6a4 100644
--- a/token.h
+++ b/token.h
@@ -99,12 +99,13 @@ enum token_type {
TOKEN_SPECIAL,
TOKEN_STREAMBEGIN,
TOKEN_STREAMEND,
+ TOKEN_UNTAINT,
+ TOKEN_LAST_NORMAL = TOKEN_UNTAINT,
TOKEN_MACRO_ARGUMENT,
TOKEN_CONCAT,
TOKEN_GNU_KLUDGE,
TOKEN_VA_OPT,
TOKEN_VA_OPT_STR,
- TOKEN_UNTAINT,
TOKEN_IF,
TOKEN_SKIP_GROUPS,
TOKEN_ELSE,
--
2.47.3
^ permalink raw reply related [flat|nested] 42+ messages in thread