public inbox for dtrace@lists.linux.dev
 help / color / mirror / Atom feed
From: Nick Alcock <nick.alcock@oracle.com>
To: dtrace@lists.linux.dev, dtrace-devel@oss.oracle.com
Subject: [PATCH 2/4] lexer: the things inside an enum { ... } declaration are identifiers
Date: Thu, 18 Sep 2025 19:03:36 +0100	[thread overview]
Message-ID: <20250918180338.197827-2-nick.alcock@oracle.com> (raw)
In-Reply-To: <20250918180338.197827-1-nick.alcock@oracle.com>

The code in dt_lex.c:id_or_type() tries to figure out if something is an
identifier or not: if it cannot prove it is an identifier, it concludes
it must be a type name, which later triggers a search for said type (or
identifier) which eventually reaches (expensively) across the entire
kernel CTF.  Usually this is what we want, since we use type names that
actually reside in the kernel extensively and do not expect to have to
decorate all of them with `. But if this misfires bad things can happen.

In the case of enums, existing code in dt_decl.c checks for duplicate
identifiers, and carefully avoids considering code outside the C and D
dicts to be duplicates: but if id_or_type() concludes this enumerator is
probably a type name, we'll import the thing we find even if it's an
identifier, and then conflict. Enumerators cannot be type names, so this
must always be wrong (if we actually do put a type name in there,
dt_parser.c will correctly reject it no matter what the lexer says).

So add yet another piece of parser context identifying when we are
inside the { } in an enum (we set it to 1 when the enum is seen, then
bump it when the braces are seen, so if it's 2 we are in the relevant
context; it is reset to 0 on every ;), then use that to forcibly declare
everything seen inside enums an identifier without trying to chase it
down

Signed-off-by: Nick Alcock <nick.alcock@oracle.com>
---
 libdtrace/dt_lex.l | 17 +++++++++++++++--
 libdtrace/dt_pcb.h |  1 +
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/libdtrace/dt_lex.l b/libdtrace/dt_lex.l
index 9f12f5c7ca289..fd70aa0aa5803 100644
--- a/libdtrace/dt_lex.l
+++ b/libdtrace/dt_lex.l
@@ -88,7 +88,7 @@ if (yypcb->pcb_token != 0) {
 <S0>do		return DT_KEY_DO;
 <S0>double	return DT_KEY_DOUBLE;
 <S0>else	return DT_KEY_ELSE;
-<S0>enum	{ yypcb->pcb_sou_type = 1; return DT_KEY_ENUM; }
+<S0>enum	{ yypcb->pcb_sou_type = 1; yypcb->pcb_enum_decl = 1; return DT_KEY_ENUM; }
 <S0>extern	return DT_KEY_EXTERN;
 <S0>float	return DT_KEY_FLOAT;
 <S0>for		return DT_KEY_FOR;
@@ -128,6 +128,7 @@ if (yypcb->pcb_token != 0) {
 <S2>counter	{ yybegin(YYS_DEFINE);	return DT_KEY_COUNTER; }
 <S2>double	{ yybegin(YYS_EXPR);	return DT_KEY_DOUBLE; }
 <S2>enum	{ yybegin(YYS_EXPR);	yypcb->pcb_sou_type = 1;
+					yypcb->pcb_enum_decl = 1;
 					return DT_KEY_ENUM; }
 <S2>extern	{ yybegin(YYS_EXPR);	return DT_KEY_EXTERN; }
 <S2>float	{ yybegin(YYS_EXPR);	return DT_KEY_FLOAT; }
@@ -463,6 +464,7 @@ if (yypcb->pcb_token != 0) {
 <S0>"("		{
 			yypcb->pcb_parens++;
 			yypcb->pcb_sou_type = 0;
+			yypcb->pcb_enum_decl = 0;
 			return DT_TOK_LPAR;
 		}
 
@@ -488,10 +490,13 @@ if (yypcb->pcb_token != 0) {
 <S2>"{"		{
 			yypcb->pcb_braces++;
 			yypcb->pcb_sou_type = 0;
+			if (yypcb->pcb_enum_decl)
+				yypcb->pcb_enum_decl++;
 			return '{';
 		}
 
 <S0>"}"		{
+			yypcb->pcb_enum_decl = 0;
 			if (--yypcb->pcb_braces < 0)
 				yyerror("extra } in input stream\n");
 			return '}';
@@ -536,7 +541,7 @@ if (yypcb->pcb_token != 0) {
 <S0>"--"	return DT_TOK_SUBSUB;
 <S0>"..."	return DT_TOK_ELLIPSIS;
 <S0>","		return DT_TOK_COMMA;
-<S0>";"		return ';';
+<S0>";"		yypcb->pcb_enum_decl = 0; return ';';
 <S0>{RGX_WS}	; /* discard */
 <S0>"\\"\n	; /* discard */
 <S0>.		{
@@ -769,6 +774,14 @@ id_or_type(const char *s)
 		return DT_TOK_IDENT;
 	}
 
+	/*
+	 * Inside an enumeration declaration's { }'s region: must be an ident.
+	 * Checking for conflicts is handled by dt_decl_enumerator().  No
+	 * need to look anything up here.
+	 */
+	if (yypcb->pcb_enum_decl == 2)
+	    return DT_TOK_IDENT;
+
 	/*
 	 * If the lexeme is a global variable or likely identifier, then it is
 	 * an identifier token.
diff --git a/libdtrace/dt_pcb.h b/libdtrace/dt_pcb.h
index 7c57f83220b28..b6a7620f4920c 100644
--- a/libdtrace/dt_pcb.h
+++ b/libdtrace/dt_pcb.h
@@ -71,6 +71,7 @@ typedef struct dt_pcb {
 	int pcb_parens;		/* number of open parentheses in lexer */
 	int pcb_sou_type;	/* lexer in struct/union type name */
 	int pcb_sou_deref;	/* lexer in struct/union dereference */
+	int pcb_enum_decl;	/* lexer in enum declaration: 2 for inside { }. */
 	int pcb_xlator_input;	/* in translator input type */
 	int pcb_array_dimens;	/* in array dimensions */
 	int pcb_alloca_taints;	/* number of alloca taint changes */
-- 
2.48.1.283.g18c60a128c


  reply	other threads:[~2025-09-18 18:04 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-18 18:03 [PATCH 1/4] parser: fix parser debugging yet again Nick Alcock
2025-09-18 18:03 ` Nick Alcock [this message]
2025-10-03  0:00   ` [DTrace-devel] [PATCH 2/4] lexer: the things inside an enum { ... } declaration are identifiers Kris Van Hees
2025-09-18 18:03 ` [PATCH 3/4] test: enum tests Nick Alcock
2025-10-03  0:00   ` Kris Van Hees
2025-09-18 18:03 ` [PATCH 4/4] test: fix test failure when no /usr/sbin/dtrace exists Nick Alcock
2025-09-18 19:18   ` Eugene Loh
2025-09-23 15:15     ` Nick Alcock
2025-09-23 15:31     ` [PATCH v2 " Nick Alcock
2025-10-03  0:01   ` [DTrace-devel] [PATCH " Kris Van Hees
2025-10-03  0:00 ` [PATCH 1/4] parser: fix parser debugging yet again Kris Van Hees

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250918180338.197827-2-nick.alcock@oracle.com \
    --to=nick.alcock@oracle.com \
    --cc=dtrace-devel@oss.oracle.com \
    --cc=dtrace@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox