Re: L'\0' handling - Christopher Li

linux-sparse.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Christopher Li <sparse@chrisli.org>
To: Michael Stefaniuc <mstefani@redhat.com>
Cc: Linux-Sparse <linux-sparse@vger.kernel.org>
Subject: Re: L'\0' handling
Date: Thu, 17 Jun 2010 17:30:40 -0700	[thread overview]
Message-ID: <AANLkTikeLwjc-OKk-tVGqRoz2uaf0MVzWJJSFWMinETM@mail.gmail.com> (raw)
In-Reply-To: <4BBF8E07.5030306@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 704 bytes --]

On Fri, Apr 9, 2010 at 1:28 PM, Michael Stefaniuc <mstefani@redhat.com> wrote:
>> Ah, silly me. I did not realized the nature of this change is to support
>> wide
>> char literals. Just look up what wide char string literals is, now I
>> have a better
>> idea. You are right. We should support both. My previous patch is wrong
>> to set the type of wide char string as "long" type.
>>
>> So L"hello word\n" pointer are incompatible with char * pointer right?
>
> Yes, they are incompatible.

A blast from the past. I found this patch while I am cleaning up my
tree. Totally forget about it already.

At least it should parse the L"hello world" now.
Make the base type correct is more work though.

Chris

[-- Attachment #2: 0001-Parsing-wide-char-string.patch --]
[-- Type: application/octet-stream, Size: 5578 bytes --]

From 49adf11b99cfce04ddcae7be0a272cc2df31436d Mon Sep 17 00:00:00 2001
From: Christopher Li <sparse@chrisli.org>
Date: Thu, 17 Jun 2010 17:08:09 -0700
Subject: [PATCH 1/4] Parsing wide char string

A follow up change to parse the wide char string.
It currently only parse and store it like normal strings.
Need more change to reflect the base type and size etc.

Signed-off-by: Christopher Li <sparse@chrisli.org>
---
 expression.c  |   13 ++++++++-----
 expression.h  |    5 ++++-
 pre-process.c |    5 +++--
 token.h       |    3 ++-
 tokenize.c    |   17 +++++++++++------
 5 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/expression.c b/expression.c
index 67e05e7..7e06e60 100644
--- a/expression.c
+++ b/expression.c
@@ -224,17 +224,18 @@ static struct token *string_expression(struct token *token, struct expression *e
 {
 	struct string *string = token->string;
 	struct token *next = token->next;
+	int stringtype = token_type(token);
 
 	convert_function(token);
 
-	if (token_type(next) == TOKEN_STRING) {
+	if (token_type(next) == stringtype) {
 		int totlen = string->length-1;
 		char *data;
 
 		do {
 			totlen += next->string->length-1;
 			next = next->next;
-		} while (token_type(next) == TOKEN_STRING);
+		} while (token_type(next) == stringtype);
 
 		if (totlen > MAX_STRING) {
 			warning(token->pos, "trying to concatenate %d-character string (%d bytes max)", totlen, MAX_STRING);
@@ -256,7 +257,7 @@ static struct token *string_expression(struct token *token, struct expression *e
 			next = next->next;
 			memcpy(data, s->data, len);
 			data += len;
-		} while (token_type(next) == TOKEN_STRING);
+		} while (token_type(next) == stringtype);
 		*data = '\0';
 	}
 	expr->string = string;
@@ -397,7 +398,7 @@ struct token *primary_expression(struct token *token, struct expression **tree)
 
 	switch (token_type(token)) {
 	case TOKEN_CHAR:
-	case TOKEN_LONG_CHAR:
+	case TOKEN_WIDE_CHAR:
 		expr = alloc_expression(token->pos, EXPR_VALUE);   
 		expr->flags = Int_const_expr;
 		expr->ctype = token_type(token) == TOKEN_CHAR ? &int_ctype : &long_ctype;
@@ -464,9 +465,11 @@ struct token *primary_expression(struct token *token, struct expression **tree)
 		break;
 	}
 
-	case TOKEN_STRING: {
+	case TOKEN_STRING:
+	case TOKEN_WIDE_STRING: {
 	handle_string:
 		expr = alloc_expression(token->pos, EXPR_STRING);
+		expr->wide = token_type(token) == TOKEN_WIDE_STRING;
 		token = string_expression(token, expr);
 		break;
 	}
diff --git a/expression.h b/expression.h
index 631224f..9778de8 100644
--- a/expression.h
+++ b/expression.h
@@ -76,7 +76,10 @@ struct expression {
 		long double fvalue;
 
 		// EXPR_STRING
-		struct string *string;
+		struct {
+			int wide;
+			struct string *string;
+		};
 
 		// EXPR_UNOP, EXPR_PREOP and EXPR_POSTOP
 		struct /* unop */ {
diff --git a/pre-process.c b/pre-process.c
index 058f24b..656acaa 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -864,10 +864,11 @@ static int token_different(struct token *t1, struct token *t2)
 		different = t1->argnum != t2->argnum;
 		break;
 	case TOKEN_CHAR:
-	case TOKEN_LONG_CHAR:
+	case TOKEN_WIDE_CHAR:
 		different = t1->character != t2->character;
 		break;
-	case TOKEN_STRING: {
+	case TOKEN_STRING:
+	case TOKEN_WIDE_STRING: {
 		struct string *s1, *s2;
 
 		s1 = t1->string;
diff --git a/token.h b/token.h
index c527e78..a7ec77e 100644
--- a/token.h
+++ b/token.h
@@ -67,8 +67,9 @@ enum token_type {
 	TOKEN_ZERO_IDENT,
 	TOKEN_NUMBER,
 	TOKEN_CHAR,
-	TOKEN_LONG_CHAR,
+	TOKEN_WIDE_CHAR,
 	TOKEN_STRING,
+	TOKEN_WIDE_STRING,
 	TOKEN_SPECIAL,
 	TOKEN_STREAMBEGIN,
 	TOKEN_STREAMEND,
diff --git a/tokenize.c b/tokenize.c
index cf05826..4c97517 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -137,6 +137,7 @@ const char *show_token(const struct token *token)
 		return show_ident(token->ident);
 
 	case TOKEN_STRING:
+	case TOKEN_WIDE_STRING:
 		return show_string(token->string);
 
 	case TOKEN_NUMBER:
@@ -146,7 +147,7 @@ const char *show_token(const struct token *token)
 		return show_special(token->special);
 
 	case TOKEN_CHAR: 
-	case TOKEN_LONG_CHAR: {
+	case TOKEN_WIDE_CHAR: {
 		char *ptr = buffer;
 		int c = token->character;
 		*ptr++ = '\'';
@@ -548,7 +549,7 @@ static int get_char_token(int next, stream_t *stream, enum token_type type)
 	return nextchar(stream);
 }
 
-static int get_string_token(int next, stream_t *stream)
+static int get_string_token(int next, stream_t *stream, enum token_type type)
 {
 	static char buffer[MAX_STRING];
 	struct string *string;
@@ -581,7 +582,7 @@ static int get_string_token(int next, stream_t *stream)
 
 	/* Pass it on.. */
 	token = stream->token;
-	token_type(token) = TOKEN_STRING;
+	token_type(token) = type;
 	token->string = string;
 	add_token(stream);
 	
@@ -701,7 +702,7 @@ static int get_one_special(int c, stream_t *stream)
 			return get_one_number(c, next, stream);
 		break;
 	case '"':
-		return get_string_token(next, stream);
+		return get_string_token(next, stream, TOKEN_STRING);
 	case '\'':
 		return get_char_token(next, stream, TOKEN_CHAR);
 	case '/':
@@ -881,8 +882,12 @@ static int get_one_identifier(int c, stream_t *stream)
 
 	ident = create_hashed_ident(buf, len, hash);
 
-	if (ident == &L_ident && next == '\'')
-		return get_char_token(nextchar(stream), stream, TOKEN_LONG_CHAR);
+	if (ident == &L_ident) {
+		if (next == '\'')
+			return get_char_token(nextchar(stream), stream, TOKEN_WIDE_CHAR);
+		if (next == '\"')
+			return get_string_token(nextchar(stream), stream, TOKEN_WIDE_STRING);
+	}
 
 	/* Pass it on.. */
 	token = stream->token;
-- 
1.6.6.1

     prev parent reply	other threads:[~2010-06-18  0:30 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-08 14:59 L'\0' handling Yura Pakhuchiy
2010-04-08 15:22 ` Michael Stefaniuc
2010-04-08 15:39   ` Yura Pakhuchiy
2010-04-08 15:54     ` Michael Stefaniuc
2010-04-08 20:19       ` Christopher Li
     [not found]         ` <1270758815.2167.13.camel@yura-tl>
2010-04-08 20:46           ` Christopher Li
2010-04-08 20:58             ` Michael Stefaniuc
2010-04-08 23:18               ` Christopher Li
2010-04-09  8:57                 ` Michael Stefaniuc
2010-04-09 20:07                   ` Christopher Li
2010-04-09 20:28                     ` Michael Stefaniuc
2010-06-18  0:30                       ` Christopher Li [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:67e05e7 dfblob:7e06e60 dfblob:631224f dfblob:9778de8
dfblob:058f24b dfblob:656acaa dfblob:c527e78 dfblob:a7ec77e
dfblob:cf05826 dfblob:4c97517 )
 OR (
bs:"Parsing wide char string" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=AANLkTikeLwjc-OKk-tVGqRoz2uaf0MVzWJJSFWMinETM@mail.gmail.com \
    --to=sparse@chrisli.org \
    --cc=linux-sparse@vger.kernel.org \
    --cc=mstefani@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).