[PATCH 1/7] Fix handling of ident-less declarations

linux-sparse.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/7] Fix handling of ident-less declarations
@ 2009-02-14 12:25 Al Viro
  2009-02-14 18:45 ` Christopher Li
  0 siblings, 1 reply; 6+ messages in thread
From: Al Viro @ 2009-02-14 12:25 UTC (permalink / raw)
  To: linux-sparse


The rule for ident-less declaration is
	declaration -> declaration-specifiers ;
not
	declaration -> declaration-specifiers abstract-declarator;

IOW, struct foo; is OK and so's struct foo {int x; int y;} (and even
simply int; is allowed by syntax - it's rejected by constraints, but
that's a separate story), but not struct foo (void); and its ilk.

See C99 6.7p1 for syntax; C90 is the same in that area and gcc also
behaves the same way.  Unlike gcc I've made it a warning (gcc produces
a hard error for those).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 parse.c                    |   10 +++++++++-
 validation/missing-ident.c |   18 ++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletions(-)
 create mode 100644 validation/missing-ident.c

diff --git a/parse.c b/parse.c
index 73e7b65..6100fc2 100644
--- a/parse.c
+++ b/parse.c
@@ -2265,14 +2265,22 @@ struct token *external_declaration(struct token *token, struct symbol_list **lis
 	token = declaration_specifiers(token, &ctype, 0);
 	decl = alloc_symbol(token->pos, SYM_NODE);
 	decl->ctype = ctype;
+	/* Just a type declaration? */
+	if (match_op(token, ';')) {
+		apply_modifiers(token->pos, &decl->ctype);
+		return token->next;
+	}
+
 	token = declarator(token, decl, &ident, 0);
 	apply_modifiers(token->pos, &decl->ctype);
 
 	decl->endpos = token->pos;
 
 	/* Just a type declaration? */
-	if (!ident)
+	if (!ident) {
+		warning(token->pos, "missing identifier in declaration");
 		return expect(token, ';', "end of type declaration");
+	}
 
 	/* type define declaration? */
 	is_typedef = (ctype.modifiers & MOD_TYPEDEF) != 0;
diff --git a/validation/missing-ident.c b/validation/missing-ident.c
new file mode 100644
index 0000000..ce73983
--- /dev/null
+++ b/validation/missing-ident.c
@@ -0,0 +1,18 @@
+int [2];
+int *;
+int (*);
+int ();
+int;
+struct foo;
+union bar {int x; int y;};
+struct baz {int x, :3, y:2;};
+/*
+ * check-name: handling of identifier-less declarations
+ *
+ * check-error-start
+missing-ident.c:1:8: warning: missing identifier in declaration
+missing-ident.c:2:6: warning: missing identifier in declaration
+missing-ident.c:3:8: warning: missing identifier in declaration
+missing-ident.c:4:7: warning: missing identifier in declaration
+ * check-error-end
+ */
-- 
1.5.6.6



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/7] Fix handling of ident-less declarations
  2009-02-14 12:25 [PATCH 1/7] Fix handling of ident-less declarations Al Viro
@ 2009-02-14 18:45 ` Christopher Li
  2009-02-14 19:08   ` Al Viro
  0 siblings, 1 reply; 6+ messages in thread
From: Christopher Li @ 2009-02-14 18:45 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-sparse

On Sat, Feb 14, 2009 at 4:25 AM, Al Viro <viro@ftp.linux.org.uk> wrote:
>
> The rule for ident-less declaration is

wow, great.

The series applies to the tree fine. First impression of the series
looks fine. I am still working on it.

Thanks for the patches. BTW, this is not the "lazy type evaluation"
you are talking about right?

Chris

>        declaration -> declaration-specifiers ;
> not
>        declaration -> declaration-specifiers abstract-declarator;
>
> IOW, struct foo; is OK and so's struct foo {int x; int y;} (and even
> simply int; is allowed by syntax - it's rejected by constraints, but
> that's a separate story), but not struct foo (void); and its ilk.
>
> See C99 6.7p1 for syntax; C90 is the same in that area and gcc also
> behaves the same way.  Unlike gcc I've made it a warning (gcc produces
> a hard error for those).
>
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> ---
>  parse.c                    |   10 +++++++++-
>  validation/missing-ident.c |   18 ++++++++++++++++++
>  2 files changed, 27 insertions(+), 1 deletions(-)
>  create mode 100644 validation/missing-ident.c
>
> diff --git a/parse.c b/parse.c
> index 73e7b65..6100fc2 100644
> --- a/parse.c
> +++ b/parse.c
> @@ -2265,14 +2265,22 @@ struct token *external_declaration(struct token *token, struct symbol_list **lis
>        token = declaration_specifiers(token, &ctype, 0);
>        decl = alloc_symbol(token->pos, SYM_NODE);
>        decl->ctype = ctype;
> +       /* Just a type declaration? */
> +       if (match_op(token, ';')) {
> +               apply_modifiers(token->pos, &decl->ctype);
> +               return token->next;
> +       }
> +
>        token = declarator(token, decl, &ident, 0);
>        apply_modifiers(token->pos, &decl->ctype);
>
>        decl->endpos = token->pos;
>
>        /* Just a type declaration? */
> -       if (!ident)
> +       if (!ident) {
> +               warning(token->pos, "missing identifier in declaration");
>                return expect(token, ';', "end of type declaration");
> +       }
>
>        /* type define declaration? */
>        is_typedef = (ctype.modifiers & MOD_TYPEDEF) != 0;
> diff --git a/validation/missing-ident.c b/validation/missing-ident.c
> new file mode 100644
> index 0000000..ce73983
> --- /dev/null
> +++ b/validation/missing-ident.c
> @@ -0,0 +1,18 @@
> +int [2];
> +int *;
> +int (*);
> +int ();
> +int;
> +struct foo;
> +union bar {int x; int y;};
> +struct baz {int x, :3, y:2;};
> +/*
> + * check-name: handling of identifier-less declarations
> + *
> + * check-error-start
> +missing-ident.c:1:8: warning: missing identifier in declaration
> +missing-ident.c:2:6: warning: missing identifier in declaration
> +missing-ident.c:3:8: warning: missing identifier in declaration
> +missing-ident.c:4:7: warning: missing identifier in declaration
> + * check-error-end
> + */
> --
> 1.5.6.6
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-sparse" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/7] Fix handling of ident-less declarations
  2009-02-14 18:45 ` Christopher Li
@ 2009-02-14 19:08   ` Al Viro
  2009-02-14 20:31     ` Al Viro
       [not found]     ` <70318cbf0902151633w77dc151ek7a6205430d2a19b8@mail.gmail.com>
  0 siblings, 2 replies; 6+ messages in thread
From: Al Viro @ 2009-02-14 19:08 UTC (permalink / raw)
  To: Christopher Li; +Cc: Al Viro, linux-sparse

On Sat, Feb 14, 2009 at 10:45:41AM -0800, Christopher Li wrote:
> On Sat, Feb 14, 2009 at 4:25 AM, Al Viro <viro@ftp.linux.org.uk> wrote:
> >
> > The rule for ident-less declaration is
> 
> wow, great.
> 
> The series applies to the tree fine. First impression of the series
> looks fine. I am still working on it.
> 
> Thanks for the patches. BTW, this is not the "lazy type evaluation"
> you are talking about right?

No, just the first batch of declaration parser fixes...  The next group is
about separating ctype-as-part-of-symbol from ctype-as-parser-state uses.
After that - getting declaration_specifiers() to sane shape, which, BTW,
will relieve the situation with mode bits.  Then -  cleaning up the type
handling...

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/7] Fix handling of ident-less declarations
  2009-02-14 19:08   ` Al Viro
@ 2009-02-14 20:31     ` Al Viro
  2009-02-14 21:30       ` Al Viro
       [not found]     ` <70318cbf0902151633w77dc151ek7a6205430d2a19b8@mail.gmail.com>
  1 sibling, 1 reply; 6+ messages in thread
From: Al Viro @ 2009-02-14 20:31 UTC (permalink / raw)
  To: Christopher Li; +Cc: Al Viro, linux-sparse

On Sat, Feb 14, 2009 at 07:08:36PM +0000, Al Viro wrote:
> On Sat, Feb 14, 2009 at 10:45:41AM -0800, Christopher Li wrote:
> > On Sat, Feb 14, 2009 at 4:25 AM, Al Viro <viro@ftp.linux.org.uk> wrote:
> > >
> > > The rule for ident-less declaration is
> > 
> > wow, great.
> > 
> > The series applies to the tree fine. First impression of the series
> > looks fine. I am still working on it.
> > 
> > Thanks for the patches. BTW, this is not the "lazy type evaluation"
> > you are talking about right?
> 
> No, just the first batch of declaration parser fixes...  The next group is
> about separating ctype-as-part-of-symbol from ctype-as-parser-state uses.
> After that - getting declaration_specifiers() to sane shape, which, BTW,
> will relieve the situation with mode bits.  Then -  cleaning up the type
> handling...

BTW, I wonder whether if it would be better to just scan for the end
of attributes after ( when we are deciding whether it's a function or
nested declarator, leaving the actual handling of these guys to after
the decision.  The thing is, unlike gcc we have the token list anyway,
and it's easier to not bother with passing that crap to parameter_type_list(),
etc.

I'll try to do that and see what falls out; potentially that replaces
7/7 in this series.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/7] Fix handling of ident-less declarations
  2009-02-14 20:31     ` Al Viro
@ 2009-02-14 21:30       ` Al Viro
  0 siblings, 0 replies; 6+ messages in thread
From: Al Viro @ 2009-02-14 21:30 UTC (permalink / raw)
  To: Christopher Li; +Cc: Al Viro, linux-sparse

On Sat, Feb 14, 2009 at 08:31:54PM +0000, Al Viro wrote:
> On Sat, Feb 14, 2009 at 07:08:36PM +0000, Al Viro wrote:
> > On Sat, Feb 14, 2009 at 10:45:41AM -0800, Christopher Li wrote:
> > > On Sat, Feb 14, 2009 at 4:25 AM, Al Viro <viro@ftp.linux.org.uk> wrote:
> > > >
> > > > The rule for ident-less declaration is
> > > 
> > > wow, great.
> > > 
> > > The series applies to the tree fine. First impression of the series
> > > looks fine. I am still working on it.
> > > 
> > > Thanks for the patches. BTW, this is not the "lazy type evaluation"
> > > you are talking about right?
> > 
> > No, just the first batch of declaration parser fixes...  The next group is
> > about separating ctype-as-part-of-symbol from ctype-as-parser-state uses.
> > After that - getting declaration_specifiers() to sane shape, which, BTW,
> > will relieve the situation with mode bits.  Then -  cleaning up the type
> > handling...
> 
> BTW, I wonder whether if it would be better to just scan for the end
> of attributes after ( when we are deciding whether it's a function or
> nested declarator, leaving the actual handling of these guys to after
> the decision.  The thing is, unlike gcc we have the token list anyway,
> and it's easier to not bother with passing that crap to parameter_type_list(),
> etc.
> 
> I'll try to do that and see what falls out; potentially that replaces
> 7/7 in this series.

... no, it doesn't ;-/  Too much PITA with error recovery, so the original
version still stands.  Alas.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/7] Fix handling of ident-less declarations
       [not found]       ` <20090216014234.GS28946@ZenIV.linux.org.uk>
@ 2009-02-16 11:14         ` Christopher Li
  0 siblings, 0 replies; 6+ messages in thread
From: Christopher Li @ 2009-02-16 11:14 UTC (permalink / raw)
  To: Al Viro; +Cc: Linux-Sparse

[-- Attachment #1: Type: text/plain, Size: 698 bytes --]

On Sun, Feb 15, 2009 at 5:42 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> On Sun, Feb 15, 2009 at 04:33:16PM -0800, Christopher Li wrote:
>> All applied and pushed. I update some of the validations error because
>> of the tab width change.
>
> BTW, I'm not sure that this messing with tabstops is a good idea.
> Note that tokenizer is _still_ the hottest part of the entire
> thing, so we need to be damn careful around it.  You are getting the
> slow path of nextchar() on \t now, which can add up to a lot of
> extra overhead...

How about the attached patch?

I move white space handling out side of the nextchar_slow().
Let nextchar_slow() only handle two case: possible EOF and '\\'.

Chris

[-- Attachment #2: hot-nextchar --]
[-- Type: application/octet-stream, Size: 5408 bytes --]



Index: sparse.chrisl/tokenize.c
===================================================================
--- sparse.chrisl.orig/tokenize.c
+++ sparse.chrisl/tokenize.c
@@ -38,6 +38,8 @@ typedef struct {
 	unsigned char *buffer;
 } stream_t;
 
+static inline int drop_whitespace(stream_t *stream, int c);
+
 const char *stream_name(int stream)
 {
 	if (stream < 0 || stream > input_stream_nr)
@@ -69,6 +71,13 @@ const char *show_special(int val)
 	return buffer;
 }
 
+static inline void stream_newline(stream_t *stream)
+{
+	stream->newline = 1;
+	stream->pos = 0;
+	stream->line++;
+}
+
 const char *show_ident(const struct ident *ident)
 {
 	static char buffer[256];
@@ -198,19 +207,15 @@ static struct token * alloc_token(stream
 	return token;
 }
 
-/*
- *  Argh...  That was surprisingly messy - handling '\r' complicates the
- *  things a _lot_.
- */
 static int nextchar_slow(stream_t *stream)
 {
 	int offset = stream->offset;
 	int size = stream->size;
 	int c;
-	int spliced = 0, had_cr, had_backslash, complain;
+	int spliced = 0, had_backslash;
 
 restart:
-	had_cr = had_backslash = complain = 0;
+	had_backslash = 0;
 
 repeat:
 	if (offset >= size) {
@@ -224,33 +229,15 @@ repeat:
 	}
 
 	c = stream->buffer[offset++];
-
-	if (had_cr && c != '\n')
-		complain = 1;
-
-	if (c == '\r') {
-		had_cr = 1;
-		goto repeat;
-	}
-
-	stream->pos += (c == '\t') ? (tabstop - stream->pos % tabstop) : 1;
-
-	if (c == '\n') {
-		stream->line++;
-		stream->pos = 0;
-	}
-
 	if (!had_backslash) {
 		if (c == '\\') {
 			had_backslash = 1;
 			goto repeat;
 		}
-		if (c == '\n')
-			stream->newline = 1;
 	} else {
 		if (c == '\n') {
-			if (complain)
-				warning(stream_pos(stream), "non-ASCII data stream");
+			stream->line++;
+			stream->pos = 0;
 			spliced = 1;
 			goto restart;
 		}
@@ -261,9 +248,6 @@ repeat:
 
 out:
 	stream->offset = offset;
-	if (complain)
-		warning(stream_pos(stream), "non-ASCII data stream");
-
 	return c;
 
 got_eof:
@@ -273,8 +257,6 @@ got_eof:
 	}
 	if (stream->pos)
 		warning(stream_pos(stream), "no newline at end of file");
-	else if (had_cr)
-		warning(stream_pos(stream), "non-ASCII data stream");
 	else if (spliced)
 		warning(stream_pos(stream), "backslash-newline at end of file");
 	return EOF;
@@ -291,10 +273,7 @@ static inline int nextchar(stream_t *str
 
 	if (offset < stream->size) {
 		int c = stream->buffer[offset++];
-		static const char special[256] = {
-			['\t'] = 1, ['\r'] = 1, ['\n'] = 1, ['\\'] = 1
-		};
-		if (!special[c]) {
+		if (c != '\\') {
 			stream->offset = offset;
 			stream->pos++;
 			return c;
@@ -346,9 +325,14 @@ enum {
 	Exp = 8,
 	Dot = 16,
 	ValidSecond = 32,
+	Space = 64,
 };
 
 static const long cclass[257] = {
+	[' ' + 1] = Space,
+	['\r' + 1] = Space,
+	['\n' + 1] = Space,
+	['\t' + 1] = Space,
 	['0' + 1 ... '9' + 1] = Digit | Hex,
 	['A' + 1 ... 'D' + 1] = Letter | Hex,
 	['E' + 1] = Letter | Hex | Exp,
@@ -374,6 +358,42 @@ static const long cclass[257] = {
 	['#' + 1] = ValidSecond,
 };
 
+static inline int drop_whitespace(stream_t *stream, int c)
+{
+	int complain = 0;
+	for (;;) {
+repeat:
+		switch (c) {
+		case ' ':
+			break;
+		case '\n':
+			stream_newline(stream);
+			break;
+		case '\t': {
+			int remain = stream->pos % tabstop;
+			if (remain)
+				stream->pos += tabstop - remain;
+			break;
+		}
+		case '\r': {
+			c = nextchar(stream);
+			if (c == '\n')
+				break;
+
+			if (!complain++)
+				warning(stream_pos(stream),
+					"non-ASCII data stream");
+			stream->whitespace = 1;
+			goto repeat;
+		}
+		default:
+			return c;
+		}
+		c = nextchar(stream);
+	}
+}
+
+
 /*
  * pp-number:
  *	digit
@@ -438,8 +458,10 @@ static int escapechar(int first, int typ
 	next = nextchar(stream);
 	value = first;
 
-	if (first == '\n')
+	if (first == '\n') {
+		stream_newline(stream);
 		warning(stream_pos(stream), "Newline in string or character constant");
+	}
 
 	if (first == '\\' && next != EOF) {
 		value = next;
@@ -479,6 +501,7 @@ static int escapechar(int first, int typ
 			case '"':
 				break;
 			case '\n':
+				stream_newline(stream);
 				warning(stream_pos(stream), "Newline in string or character constant");
 				break;
 			case '0'...'7': {
@@ -586,6 +609,7 @@ static int drop_stream_eoln(stream_t *st
 		case EOF:
 			return EOF;
 		case '\n':
+			stream_newline(stream);
 			return nextchar(stream);
 		}
 	}
@@ -598,15 +622,16 @@ static int drop_stream_comment(stream_t 
 	drop_token(stream);
 	newline = stream->newline;
 
-	next = nextchar(stream);
+	next = drop_whitespace(stream, nextchar(stream));
 	for (;;) {
 		int curr = next;
 		if (curr == EOF) {
 			warning(stream_pos(stream), "End of file in the middle of a comment");
 			return curr;
 		}
-		next = nextchar(stream);
-		if (curr == '*' && next == '/')
+		stream->whitespace = 0;
+		next = drop_whitespace(stream, nextchar(stream));
+		if (curr == '*' && !stream->whitespace && next == '/')
 			break;
 	}
 	stream->newline = newline;
@@ -916,7 +941,7 @@ static struct token *tokenize_stream(str
 {
 	int c = nextchar(stream);
 	while (c != EOF) {
-		if (!isspace(c)) {
+		if (!(cclass[c+1] & Space)) {
 			struct token *token = alloc_token(stream);
 			stream->token = token;
 			stream->newline = 0;
@@ -925,7 +950,7 @@ static struct token *tokenize_stream(str
 			continue;
 		}
 		stream->whitespace = 1;
-		c = nextchar(stream);
+		c = drop_whitespace(stream, c);
 	}
 	return mark_eof(stream);
 }

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2009-02-16 11:14 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-02-14 12:25 [PATCH 1/7] Fix handling of ident-less declarations Al Viro
2009-02-14 18:45 ` Christopher Li
2009-02-14 19:08   ` Al Viro
2009-02-14 20:31     ` Al Viro
2009-02-14 21:30       ` Al Viro
     [not found]     ` <70318cbf0902151633w77dc151ek7a6205430d2a19b8@mail.gmail.com>
     [not found]       ` <20090216014234.GS28946@ZenIV.linux.org.uk>
2009-02-16 11:14         ` Christopher Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).