[PATCH 4/4] DTC: Begin the path to sane literals and expressions.

linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 4/4] DTC: Begin the path to sane literals and expressions.
@ 2007-10-19 17:43 Jon Loeliger
  2007-10-20  8:47 ` David Gibson
  2007-10-21  5:30 ` Segher Boessenkool
  0 siblings, 2 replies; 13+ messages in thread
From: Jon Loeliger @ 2007-10-19 17:43 UTC (permalink / raw)
  To: linuxppc-dev


Add support for the "/dts-version/ <number>;" statment.
Make legacy DTS files version 0 whether explicitly stated
or implied by a lack of /dts-version/ statement.

Begin supporting a new /dts-version/ 1 that changes the
format of the literals in a DTS source file.  In the new
format, hex constants are prefixed with 0x or 0X, and
bare numbers are decimal or octal according to standard
conventions.

Property names have been limited to start with
characters from the set [a-zA-Z,._#?].  That is, the
digits and the expression symbols have been removed.

Use of "d#', "o#", "h#" and "b#" are gone in version 1.

Several warnings are introduced for debatable constructs.

- Only /dts-version/ 0 and 1 are supported yet.

- A missing /dts-version/ statement garners a
  suggestion to add one, and defaults to verion 0.

- The /memreserve/ construct using "-" for ranges looks
  suspiciously like the subtraction of two expressions,
  so its syntax has been changed to use ".." as the range
  indicator.

Signed-off-by: Jon Loeliger <jdl@freescale.com>
---
 dtc-lexer.l  |  180 +++++++++++++++++++++++++++++++++++++++++++++------------
 dtc-parser.y |  143 +++++++++++++++++++++++++++++++---------------
 dtc.c        |   26 ++++++++
 dtc.h        |   23 ++++++--
 srcpos.h     |    1 +
 treesource.c |   24 +++++++-
 6 files changed, 303 insertions(+), 94 deletions(-)

diff --git a/dtc-lexer.l b/dtc-lexer.l
index 278a96e..a1c52c4 100644
--- a/dtc-lexer.l
+++ b/dtc-lexer.l
@@ -22,12 +22,19 @@
 
 %x INCLUDE
 %x CELLDATA
+%x CELLDATA_LEGACY
 %x BYTESTRING
+%x BYTESTRING_LEGACY
 %x MEMRESERVE
+%x MEMRESERVE_LEGACY
 
+DIGIT		[0-9]
+HEXDIGIT	[0-9a-fA-F]
+FIRSTPROPCHAR	[a-zA-Z,._#?]
 PROPCHAR	[a-zA-Z0-9,._+*#?-]
 UNITCHAR	[0-9a-f,]
 WS		[[:space:]]
+DOT		[.]
 
 REFCHAR		({PROPCHAR}|{UNITCHAR}|[/@])
 
@@ -46,11 +53,20 @@ REFCHAR		({PROPCHAR}|{UNITCHAR}|[/@])
 #endif
 
 
+unsigned int in_lexer_use_base = 16;	/* Feedback from parser.  Ugh. */
+
 
 %}
 
 %%
 
+"/dts-version/"	{
+			yylloc.filenum = srcpos_filenum;
+			yylloc.first_line = yylineno;
+			DPRINT("Keyword: /dts-version/\n");
+			return DT_DTS_VERSION;
+		}
+
 "/include/"		BEGIN(INCLUDE);
 
 <INCLUDE>\"[^"\n]*\"	{
@@ -83,24 +99,15 @@ REFCHAR		({PROPCHAR}|{UNITCHAR}|[/@])
 			yylloc.filenum = srcpos_filenum;
 			yylloc.first_line = yylineno;
 			DPRINT("Keyword: /memreserve/\n");
-			BEGIN(MEMRESERVE);
-			return DT_MEMRESERVE;
-		}
-
-<MEMRESERVE>[0-9a-fA-F]+ {
-			yylloc.filenum = srcpos_filenum;
-			yylloc.first_line = yylineno;
-			if (yyleng > 2*sizeof(yylval.addr)) {
-				fprintf(stderr, "Address value %s too large\n",
-					yytext);
+			if (dts_version == 0) {
+				BEGIN(MEMRESERVE_LEGACY);
+			} else {
+				BEGIN(MEMRESERVE);
 			}
-			yylval.addr = (u64) strtoull(yytext, NULL, 16);
-			DPRINT("Addr: %llx\n",
-			       (unsigned long long)yylval.addr);
-			return DT_ADDR;
+			return DT_MEMRESERVE;
 		}
 
-<MEMRESERVE>";"	{
+<MEMRESERVE,MEMRESERVE_LEGACY>";"	{
 			yylloc.filenum = srcpos_filenum;
 			yylloc.first_line = yylineno;
 			DPRINT("/MEMRESERVE\n");
@@ -117,7 +124,7 @@ REFCHAR		({PROPCHAR}|{UNITCHAR}|[/@])
 			return DT_LABEL;
 		}
 
-<CELLDATA>[bodh]# {
+<CELLDATA_LEGACY>[bodh]# {
 			yylloc.filenum = srcpos_filenum;
 			yylloc.first_line = yylineno;
 			if (*yytext == 'b')
@@ -132,15 +139,7 @@ REFCHAR		({PROPCHAR}|{UNITCHAR}|[/@])
 			return DT_BASE;
 		}
 
-<CELLDATA>[0-9a-fA-F]+	{
-			yylloc.filenum = srcpos_filenum;
-			yylloc.first_line = yylineno;
-			yylval.str = strdup(yytext);
-			DPRINT("Cell: '%s'\n", yylval.str);
-			return DT_CELL;
-		}
-
-<CELLDATA>">"	{
+<CELLDATA,CELLDATA_LEGACY>">"	{
 			yylloc.filenum = srcpos_filenum;
 			yylloc.first_line = yylineno;
 			DPRINT("/CELLDATA\n");
@@ -156,15 +155,7 @@ REFCHAR		({PROPCHAR}|{UNITCHAR}|[/@])
 			return DT_REF;
 		}
 
-<BYTESTRING>[0-9a-fA-F]{2} {
-			yylloc.filenum = srcpos_filenum;
-			yylloc.first_line = yylineno;
-			yylval.byte = strtol(yytext, NULL, 16);
-			DPRINT("Byte: %02x\n", (int)yylval.byte);
-			return DT_BYTE;
-		}
-
-<BYTESTRING>"]"	{
+<BYTESTRING,BYTESTRING_LEGACY>"]"	{
 			yylloc.filenum = srcpos_filenum;
 			yylloc.first_line = yylineno;
 			DPRINT("/BYTESTRING\n");
@@ -182,7 +173,63 @@ REFCHAR		({PROPCHAR}|{UNITCHAR}|[/@])
 			return yytext[0];
 		}
 
-{PROPCHAR}+	{
+
+<MEMRESERVE_LEGACY>{HEXDIGIT}+	{
+			yylloc.filenum = srcpos_filenum;
+			yylloc.first_line = yylineno;
+			DPRINT("LEGACY MEMRESERVE LITERAL bare hex: '%s'\n",
+			       yytext);
+			yylval.ire = expr_from_string(yytext, 16);
+			in_lexer_use_base = expr_default_base;
+			return DT_LITERAL;
+		}
+
+<CELLDATA_LEGACY>{HEXDIGIT}+	{
+			yylloc.filenum = srcpos_filenum;
+			yylloc.first_line = yylineno;
+			DPRINT("LEGACY CELLDATA LITERAL bare hex: '%s'\n",
+			       yytext);
+			yylval.ire = expr_from_string(yytext,
+						      in_lexer_use_base);
+			in_lexer_use_base = expr_default_base;
+			return DT_LITERAL;
+		}
+
+<BYTESTRING_LEGACY>[0-9a-fA-F]{2} {
+			yylloc.filenum = srcpos_filenum;
+			yylloc.first_line = yylineno;
+			yylval.ire = strtol(yytext, NULL, 16);
+			DPRINT("LEGACY BYTE LITERAL: %02x\n", (int)yylval.ire);
+			return DT_LITERAL;
+		}
+
+
+<CELLDATA,MEMRESERVE,BYTESTRING>{DIGIT}+	{
+			yylloc.filenum = srcpos_filenum;
+			yylloc.first_line = yylineno;
+			DPRINT("CONTEXT LITERAL bare dec: '%s'\n", yytext);
+			yylval.ire = expr_from_string(yytext, 0);
+			return DT_LITERAL;
+		}
+
+<CELLDATA,MEMRESERVE,BYTESTRING>0(x|X){HEXDIGIT}+	{
+			yylloc.filenum = srcpos_filenum;
+			yylloc.first_line = yylineno;
+			yylval.ire = expr_from_string(yytext, 0);
+			DPRINT("CONTEXT LITERAL 0x: '%llx'\n",
+			       (unsigned long long)yylval.ire);
+			return DT_LITERAL;
+		}
+
+
+<*>{DOT}{DOT}	{
+			yylloc.filenum = srcpos_filenum;
+			yylloc.first_line = yylineno;
+			DPRINT("DT_RANGE\n");
+			return DT_RANGE;
+		}
+
+{FIRSTPROPCHAR}{PROPCHAR}+	{
 			yylloc.filenum = srcpos_filenum;
 			yylloc.first_line = yylineno;
 			DPRINT("PropName: %s\n", yytext);
@@ -190,7 +237,7 @@ REFCHAR		({PROPCHAR}|{UNITCHAR}|[/@])
 			return DT_PROPNAME;
 		}
 
-{PROPCHAR}+(@{UNITCHAR}+)? {
+{FIRSTPROPCHAR}{PROPCHAR}+(@{UNITCHAR}+)? {
 			yylloc.filenum = srcpos_filenum;
 			yylloc.first_line = yylineno;
 			DPRINT("NodeName: %s\n", yytext);
@@ -198,6 +245,23 @@ REFCHAR		({PROPCHAR}|{UNITCHAR}|[/@])
 			return DT_NODENAME;
 		}
 
+{DIGIT}+	{
+			yylloc.filenum = srcpos_filenum;
+			yylloc.first_line = yylineno;
+			yylval.ire = expr_from_string(yytext, 0);
+			DPRINT("LITERAL: '%llx'\n",
+			       (unsigned long long)yylval.ire);
+			return DT_LITERAL;
+		}
+
+0(x|X){HEXDIGIT}+	{
+			yylloc.filenum = srcpos_filenum;
+			yylloc.first_line = yylineno;
+			DPRINT("LITERAL bare hex: '%s'\n", yytext);
+			yylval.ire = expr_from_string(yytext, 0);
+			return DT_LITERAL;
+		}
+
 
 <*>{WS}+	/* eat whitespace */
 
@@ -216,11 +280,19 @@ REFCHAR		({PROPCHAR}|{UNITCHAR}|[/@])
 			switch (yytext[0]) {
 				case '<':
 					DPRINT("CELLDATA\n");
-					BEGIN(CELLDATA);
+					if (dts_version == 0) {
+					    BEGIN(CELLDATA_LEGACY);
+					} else {
+					    BEGIN(CELLDATA);
+					}
 					break;
 				case '[':
 					DPRINT("BYTESTRING\n");
-					BEGIN(BYTESTRING);
+					if (dts_version == 0) {
+					    BEGIN(BYTESTRING_LEGACY);
+					} else {
+					    BEGIN(BYTESTRING);
+					}
 					break;
 				default:
 
@@ -339,3 +411,35 @@ int pop_input_file(void)
 
 	return 1;
 }
+
+/*
+ * Convert a string representation of a numeric literal
+ * in the given base into a binary representation.
+ *
+ * FIXME: should these specification errors be fatal instead?
+ */
+
+u64 expr_from_string(char *s, unsigned int base)
+{
+	u64 v;
+	char *e;
+
+	v = strtoull(s, &e, base);
+	if (*e) {
+		fprintf(stderr,
+			"Line %d: Invalid literal value '%s' : "
+			"%c is not a base %d digit; %lld assumed\n",
+			yylloc.first_line, s, *e,
+			base == 0 ? expr_default_base : base,
+			(unsigned long long) v);
+	}
+
+	if (errno == EINVAL || errno == ERANGE) {
+		fprintf(stderr,
+			"Line %d: Invalid literal value '%s'; %lld assumed\n",
+			yylloc.first_line, s, (unsigned long long) v);
+		errno = 0;
+	}
+
+	return v;
+}
diff --git a/dtc-parser.y b/dtc-parser.y
index 4698793..fd82381 100644
--- a/dtc-parser.y
+++ b/dtc-parser.y
@@ -27,6 +27,8 @@
 
 int yylex(void);
 cell_t cell_from_string(char *s, unsigned int base);
+cell_t expr_cell_value(u64 c);
+u8 expr_byte_value(u64 c);
 
 extern struct boot_info *the_boot_info;
 
@@ -35,7 +37,6 @@ extern struct boot_info *the_boot_info;
 %union {
 	cell_t cval;
 	unsigned int cbase;
-	u8 byte;
 	char *str;
 	struct data data;
 	struct property *prop;
@@ -46,25 +47,27 @@ extern struct boot_info *the_boot_info;
 	int hexlen;
 	u64 addr;
 	struct reserve_info *re;
+	u64 ire;
 }
 
+%token DT_DTS_VERSION
 %token DT_MEMRESERVE
-%token <addr> DT_ADDR
 %token <str> DT_PROPNAME
 %token <str> DT_NODENAME
 %token <cbase> DT_BASE
-%token <str> DT_CELL
-%token <byte> DT_BYTE
 %token <data> DT_STRING
 %token <str> DT_UNIT
 %token <str> DT_LABEL
 %token <str> DT_REF
+%token <ire> DT_LITERAL
+%token DT_RANGE
 
 %type <data> propdata
 %type <data> propdataprefix
 %type <re> memreserve
 %type <re> memreserves
 %type <cbase> opt_cell_base
+%type <cval> cell
 %type <data> celllist
 %type <data> bytestring
 %type <prop> propdef
@@ -76,13 +79,27 @@ extern struct boot_info *the_boot_info;
 %type <nodelist> subnodes
 %type <str> label
 %type <str> nodename
+%type <ire> expr
+%type <ire> expr_primary
 
 %%
 
 sourcefile:
-	  memreserves devicetree
+	  opt_version memreserves devicetree
 		{
-			the_boot_info = build_boot_info($1, $2);
+			the_boot_info = build_boot_info($2, $3);
+		}
+	;
+
+opt_version:
+	  /* empty */
+		{
+			yywarn("Missing /dts-version/; 0 assumed.");
+			set_dts_version(0);
+		}
+	|  DT_DTS_VERSION expr ';'
+		{
+			set_dts_version($2);
 		}
 	;
 
@@ -98,11 +115,16 @@ memreserves:
 	;
 
 memreserve:
-	  label DT_MEMRESERVE DT_ADDR DT_ADDR ';'
+	  label DT_MEMRESERVE expr expr ';'
 		{
 			$$ = build_reserve_entry($3, $4, $1);
 		}
-	| label DT_MEMRESERVE DT_ADDR '-' DT_ADDR ';'
+	| label DT_MEMRESERVE expr '-' expr ';'
+		{
+			yywarn("/memreserve/ using '-' should be converted to '..' instead.");
+			$$ = build_reserve_entry($3, $5 - $3 + 1, $1);
+		}
+	| label DT_MEMRESERVE expr DT_RANGE expr ';'
 		{
 			$$ = build_reserve_entry($3, $5 - $3 + 1, $1);
 		}
@@ -179,19 +201,10 @@ propdataprefix:
 		}
 	;
 
-opt_cell_base:
-	  /* empty */
-		{
-			$$ = 16;
-		}
-	| DT_BASE
-	;
-
 celllist:
-	  celllist opt_cell_base DT_CELL
+	  celllist cell
 		{
-			$$ = data_append_cell($1,
-					      cell_from_string($3, $2));
+			$$ = data_append_cell($1, $2);
 		}
 	| celllist DT_REF
 		{
@@ -207,8 +220,39 @@ celllist:
 		}
 	;
 
+cell:
+	  expr
+		{
+			$$ = expr_cell_value($1);
+		}
+	;
+
+expr:
+	  expr_primary
+	;
+
+expr_primary:
+	  opt_cell_base DT_LITERAL
+		{
+			$$ = $2;
+		}
+	;
+
+opt_cell_base:
+	  /* empty */
+		{
+			/* This is a lot gross, but hopefully temporary. */
+			in_lexer_use_base = 16;
+		}
+	| DT_BASE
+		{
+			/* This is a lot gross, but hopefully temporary. */
+			in_lexer_use_base = $1;
+		}
+	;
+
 bytestring:
-	  bytestring DT_BYTE
+	  bytestring expr
 		{
 			$$ = data_append_byte($1, $2);
 		}
@@ -264,7 +308,7 @@ label:
 
 %%
 
-void yyerror (char const *s)
+void yyerror(char const *s)
 {
 	const char *fname = srcpos_filename_for_num(yylloc.filenum);
 
@@ -276,32 +320,37 @@ void yyerror (char const *s)
 }
 
 
-/*
- * Convert a string representation of a numeric cell
- * in the given base into a cell.
- *
- * FIXME: should these specification errors be fatal instead?
- */
+void yywarn(char const *s, ...)
+{
+	va_list ap;
+	const char *fname;
+
+	if (quiet >= 1)
+		return;
 
-cell_t cell_from_string(char *s, unsigned int base)
+	va_start(ap, s);
+
+	fname = srcpos_filename_for_num(yylloc.filenum);
+	if (strcmp(fname, "-") == 0)
+		fname = "stdin";
+
+	fprintf(stderr, "%s:%d Warning: ", fname, yylloc.first_line);
+	vfprintf(stderr, s, ap);
+	fprintf(stderr, "\n");
+}
+
+
+cell_t expr_cell_value(u64 c)
 {
-	cell_t c;
-	char *e;
-
-	c = strtoul(s, &e, base);
-	if (*e) {
-		fprintf(stderr,
-			"Line %d: Invalid cell value '%s' : "
-			"%c is not a base %d digit; %d assumed\n",
-			yylloc.first_line, s, *e, base, c);
-	}
-
-	if (errno == EINVAL || errno == ERANGE) {
-		fprintf(stderr,
-			"Line %d: Invalid cell value '%s'; %d assumed\n",
-			yylloc.first_line, s, c);
-		errno = 0;
-	}
-
-	return c;
+	/* FIXME: Range check to fit in u32 cell_t */
+
+	return (cell_t) c;
+}
+
+
+u8 expr_byte_value(u64 c)
+{
+	/* FIXME: Range check to fit in u8 byte */
+
+	return (u8) c;
 }
diff --git a/dtc.c b/dtc.c
index 76a6dfe..6417c2d 100644
--- a/dtc.c
+++ b/dtc.c
@@ -30,6 +30,32 @@ int quiet;		/* Level of quietness */
 int reservenum;		/* Number of memory reservation slots */
 int minsize;		/* Minimum blob size */
 
+/*
+ * DTS sourcefile version.
+ */
+unsigned int dts_version = 0;
+unsigned int expr_default_base = 10;
+
+
+void set_dts_version(u64 vers)
+{
+	if (vers > 1) {
+		yywarn("Unknown version %lld; 0 assumed\n", vers);
+		dts_version = 0;
+	} else {
+		dts_version = vers;
+	}
+
+	if (dts_version == 0) {
+		expr_default_base = 16;
+		in_lexer_use_base = 16;
+	} else {
+		expr_default_base = 10;
+		in_lexer_use_base = 10;
+	}
+}
+
+
 char *join_path(char *path, char *name)
 {
 	int lenp = strlen(path);
diff --git a/dtc.h b/dtc.h
index 09dec54..900ad9f 100644
--- a/dtc.h
+++ b/dtc.h
@@ -36,7 +36,14 @@
 
 #include <fdt.h>
 
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+typedef u32 cell_t;
+
 #define DEFAULT_FDT_VERSION	17
+
 /*
  * Command line options
  */
@@ -44,6 +51,13 @@ extern int quiet;		/* Level of quietness */
 extern int reservenum;		/* Number of memory reservation slots */
 extern int minsize;		/* Minimum blob size */
 
+/*
+ * DTS sourcefile version
+ */
+extern unsigned int dts_version;
+extern void set_dts_version(u64 vers);
+
+
 static inline void __attribute__((noreturn)) die(char * str, ...)
 {
 	va_list ap;
@@ -74,12 +88,6 @@ static inline void *xrealloc(void *p, size_t len)
 	return new;
 }
 
-typedef uint8_t u8;
-typedef uint16_t u16;
-typedef uint32_t u32;
-typedef uint64_t u64;
-typedef u32 cell_t;
-
 #define cpu_to_be16(x)	htons(x)
 #define be16_to_cpu(x)	ntohs(x)
 
@@ -237,5 +245,8 @@ struct boot_info *dt_from_fs(char *dirname);
 
 char *join_path(char *path, char *name);
 void fill_fullpaths(struct node *tree, char *prefix);
+u64 expr_from_string(char *s, unsigned int base);
+extern unsigned int in_lexer_use_base;
+unsigned int expr_default_base;
 
 #endif /* _DTC_H */
diff --git a/srcpos.h b/srcpos.h
index ce7ab5b..c285e47 100644
--- a/srcpos.h
+++ b/srcpos.h
@@ -63,6 +63,7 @@ typedef struct YYLTYPE {
 
 
 extern void yyerror(char const *);
+extern void yywarn(char const *, ...);
 
 extern int srcpos_filenum;
 
diff --git a/treesource.c b/treesource.c
index f62041f..653be3a 100644
--- a/treesource.c
+++ b/treesource.c
@@ -147,7 +147,11 @@ static void write_propval_cells(FILE *f, struct data val)
 
 	fprintf(f, " = <");
 	for (;;) {
-		fprintf(f, "%x", be32_to_cpu(*cp++));
+		if (dts_version == 0) {
+			fprintf(f, "%x", be32_to_cpu(*cp++));
+		} else {
+			fprintf(f, "0x%x", be32_to_cpu(*cp++));
+		}
 		if ((void *)cp >= propend)
 			break;
 		fprintf(f, " ");
@@ -162,7 +166,11 @@ static void write_propval_bytes(FILE *f, struct data val)
 
 	fprintf(f, " = [");
 	for (;;) {
-		fprintf(f, "%02hhx", *bp++);
+		if (dts_version == 0) {
+			fprintf(f, "%02hhx", *bp++);
+		} else {
+			fprintf(f, "0x%02hhx", *bp++);
+		}
 		if ((void *)bp >= propend)
 			break;
 		fprintf(f, " ");
@@ -218,12 +226,22 @@ static void write_tree_source_node(FILE *f, struct node *tree, int level)
 void dt_to_source(FILE *f, struct boot_info *bi)
 {
 	struct reserve_info *re;
+	const char *range_str;
+
+	if (dts_version == 0) {
+	    range_str = " - ";
+	} else {
+	    range_str = " .. ";
+	    fprintf(f, "/dts-version/ %d;\n\n", dts_version);
+	}
 
 	for (re = bi->reservelist; re; re = re->next) {
-		fprintf(f, "/memreserve/\t%016llx-%016llx;\n",
+		fprintf(f, "/memreserve/\t0x%016llx %s 0x%016llx;\n",
 			(unsigned long long)re->re.address,
+			range_str,
 			(unsigned long long)(re->re.address + re->re.size - 1));
 	}
+	fprintf(f, "\n");
 
 	write_tree_source_node(f, bi->dt, 0);
 }
-- 
1.5.3.1.139.g9346b

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] DTC: Begin the path to sane literals and expressions.
  2007-10-19 17:43 [PATCH 4/4] DTC: Begin the path to sane literals and expressions Jon Loeliger
@ 2007-10-20  8:47 ` David Gibson
  2007-10-21  5:32   ` Segher Boessenkool
  2007-10-25 18:24   ` Jon Loeliger
  2007-10-21  5:30 ` Segher Boessenkool
  1 sibling, 2 replies; 13+ messages in thread
From: David Gibson @ 2007-10-20  8:47 UTC (permalink / raw)
  To: Jon Loeliger; +Cc: linuxppc-dev

On Fri, Oct 19, 2007 at 12:43:30PM -0500, Jon Loeliger wrote:
> 
> Add support for the "/dts-version/ <number>;" statment.
> Make legacy DTS files version 0 whether explicitly stated
> or implied by a lack of /dts-version/ statement.
> 
> Begin supporting a new /dts-version/ 1 that changes the
> format of the literals in a DTS source file.  In the new
> format, hex constants are prefixed with 0x or 0X, and
> bare numbers are decimal or octal according to standard
> conventions.

Hurrah!  Except that there's a few small details, and one biggish
detail that concern me.

> Property names have been limited to start with
> characters from the set [a-zA-Z,._#?].  That is, the
> digits and the expression symbols have been removed.

Good call.  Note that the rationale for the property and node name
regexes is a bit fuzzy: there's a standard for what's allowed in
IEEE1275, but existing IBM and Apple device trees break it in a number
of ways.  

> Use of "d#', "o#", "h#" and "b#" are gone in version 1.

Also good.  We might want to keep b#, since there's no C way of doing
binary literals, but in that case I'd suggest recognizing it at the
lexical level, not the grammar level as we do now (which would mean a
space between the b# and the digits would no longer be permissible).

> Several warnings are introduced for debatable constructs.
> 
> - Only /dts-version/ 0 and 1 are supported yet.
> 
> - A missing /dts-version/ statement garners a
>   suggestion to add one, and defaults to verion 0.
> 
> - The /memreserve/ construct using "-" for ranges looks
>   suspiciously like the subtraction of two expressions,
>   so its syntax has been changed to use ".." as the range
>   indicator.

Ah, yes.  An irritating change, but a necessary one, I agree.

Now my big concern with this patch: the dts_version global, set by the
parser, used by the lexer.  I assume you've tested this and it works
in practice, but you're relying on the semantic action from the .y
file being executed before the lexer meets any token that depends on
it.

I don't know about you, but I have to think pretty hard about how flow
of control will move between lexer and parser rules in a shift-reduce
parser at the best of times.  With the %glr-parser option, bison will
use the Tomita algorithm.  That means the parser state could be split
if there are ambiguities, or non-LALR(1) portions of the grammar
(which there are).  In that case the execution of the parser rules
will be delayed until after the split is resolved again, however many
tokens down the track.

Now, I'll grant that such a grammar ambiguity is unlikely around the
version statement.  But given the complexity of the situation, it
seems pretty risky to rely on execution ordering between parser and
lexer - I don't even know if there's a guarantee that bison won't
buffer lexer tokens before parsing them, which would screw us up in a
much less involved way.

Therefore, I'd prefer that instead of having this general version
number, we introduce a separate token for each new version.  So
/dts-version-1/ or whatever.  Any new, incompatible, dts version is a
big deal in any case - not something we want to happen often - so a
new token for each version is not a big deal, I think.  With this
approach, the version flag can be tripped in the lexer, and the
ordering of lexer rule execution is obvious.

A few more minor comments below:

[snip]
> diff --git a/dtc-lexer.l b/dtc-lexer.l
> index 278a96e..a1c52c4 100644
> --- a/dtc-lexer.l
> +++ b/dtc-lexer.l
> @@ -22,12 +22,19 @@
>  
>  %x INCLUDE
>  %x CELLDATA
> +%x CELLDATA_LEGACY
>  %x BYTESTRING
> +%x BYTESTRING_LEGACY
>  %x MEMRESERVE
> +%x MEMRESERVE_LEGACY

With the new syntax, integeral literals should no longer be ambiguous
with property or node names.  Therefore, we should only need legacy
CELLDATA and MEMRESERVE states, new-style literals can be recognized
in INITIAL state.

I'm also inclined to leave the syntax for bytestrings as it is, in
whichcase we should only need one lexical state for that, too.

[snip]
> +<*>{DOT}{DOT}	{

You should be able to just use \.\. or ".." here rather than having to
indirect through a character class.

[snip]
> +0(x|X){HEXDIGIT}+	{

Does C recognize both 0x and 0X, or just 0x?  I don't remember off the
top of my head.

[snip]
> +u64 expr_from_string(char *s, unsigned int base)
> +{
> +	u64 v;
> +	char *e;
> +
> +	v = strtoull(s, &e, base);
> +	if (*e) {
> +		fprintf(stderr,
> +			"Line %d: Invalid literal value '%s' : "
> +			"%c is not a base %d digit; %lld assumed\n",
> +			yylloc.first_line, s, *e,
> +			base == 0 ? expr_default_base : base,
> +			(unsigned long long) v);

Do we need this error checking?  Won't the regexes mean that the
string *must* be a valid literal by this point?

[snip]
> @@ -46,25 +47,27 @@ extern struct boot_info *the_boot_info;
>  	int hexlen;
>  	u64 addr;
>  	struct reserve_info *re;
> +	u64 ire;

What's "ire" supposed to be short for?

[snip]
> +	| label DT_MEMRESERVE expr '-' expr ';'

Oh.. you haven't actually abolished the '-' syntax, even in version 1
mode.  Since we're already making an incompatible syntax change, we
should really make this one at the same time.

[snip]
> +cell:
> +	  expr
> +		{
> +			$$ = expr_cell_value($1);
> +		}
> +	;
> +
> +expr:
> +	  expr_primary
> +	;
> +
> +expr_primary:
> +	  opt_cell_base DT_LITERAL
> +		{
> +			$$ = $2;
> +		}

Hrm.  I realise you haven't actually implemented expressions here, but
I think these names suggest a wrong direction.  I think we should only
allow literals and bracketed expressions in celldata, not bare
expressions.  Why?  Because mistaking the following:
	<0x00000000 0xf0000000 + 0x00001000 0x80000000>
as being 4 rather than 3 cells long is rather easier than mistaking:
	<0x00000000 (0xf0000000 + 0x00001000) 0x80000000>

[snip[
>  bytestring:
> -	  bytestring DT_BYTE
> +	  bytestring expr

Urg... I don't know that allowing expressions in bytestrings is not a
very good idea.  Better, I think to keep the existing syntax here, and
just think of [abcd1234deadbeef] as a rather long sort of literal
itself.

[snip]
> +void yywarn(char const *s, ...)

Ick.. we can tolerate yyblah() names for the things we inherit from
yacc, but lets not introduce our own, hey.

[snip]
> +unsigned int dts_version = 0;
> +unsigned int expr_default_base = 10;
> +
> +
> +void set_dts_version(u64 vers)
> +{
> +	if (vers > 1) {
> +		yywarn("Unknown version %lld; 0 assumed\n", vers);
> +		dts_version = 0;
> +	} else {
> +		dts_version = vers;
> +	}
> +
> +	if (dts_version == 0) {
> +		expr_default_base = 16;
> +		in_lexer_use_base = 16;
> +	} else {
> +		expr_default_base = 10;
> +		in_lexer_use_base = 10;

Uh.. I don't think we should need either of expr_default_base and
in_lexer_use_base, let alone both..

[snip]
> -		fprintf(f, "%x", be32_to_cpu(*cp++));
> +		if (dts_version == 0) {

Where does dts_version get set in the -Odts case?

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] DTC: Begin the path to sane literals and expressions.
  2007-10-19 17:43 [PATCH 4/4] DTC: Begin the path to sane literals and expressions Jon Loeliger
  2007-10-20  8:47 ` David Gibson
@ 2007-10-21  5:30 ` Segher Boessenkool
  2007-10-22  0:51   ` David Gibson
  2007-10-22 12:36   ` Jon Loeliger
  1 sibling, 2 replies; 13+ messages in thread
From: Segher Boessenkool @ 2007-10-21  5:30 UTC (permalink / raw)
  To: Jon Loeliger; +Cc: linuxppc-dev

> Property names have been limited to start with
> characters from the set [a-zA-Z,._#?].  That is, the
> digits and the expression symbols have been removed.

This cannot work; many property names start with a digit,
for example.


Segher

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] DTC: Begin the path to sane literals and expressions.
  2007-10-20  8:47 ` David Gibson
@ 2007-10-21  5:32   ` Segher Boessenkool
  2007-10-22  0:37     ` David Gibson
  2007-10-25 18:24   ` Jon Loeliger
  1 sibling, 1 reply; 13+ messages in thread
From: Segher Boessenkool @ 2007-10-21  5:32 UTC (permalink / raw)
  To: David Gibson; +Cc: linuxppc-dev, Jon Loeliger

>> Use of "d#', "o#", "h#" and "b#" are gone in version 1.
>
> Also good.  We might want to keep b#, since there's no C way of doing
> binary literals,

GCC supports "0b...", and it is proposed new ISO C syntax, too.


Segher

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] DTC: Begin the path to sane literals and expressions.
  2007-10-21  5:32   ` Segher Boessenkool
@ 2007-10-22  0:37     ` David Gibson
  0 siblings, 0 replies; 13+ messages in thread
From: David Gibson @ 2007-10-22  0:37 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: linuxppc-dev, Jon Loeliger

On Sun, Oct 21, 2007 at 07:32:33AM +0200, Segher Boessenkool wrote:
> >> Use of "d#', "o#", "h#" and "b#" are gone in version 1.
> >
> > Also good.  We might want to keep b#, since there's no C way of doing
> > binary literals,
> 
> GCC supports "0b...", and it is proposed new ISO C syntax, too.

Ah, ok.  We should go with that instead, then.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] DTC: Begin the path to sane literals and expressions.
  2007-10-21  5:30 ` Segher Boessenkool
@ 2007-10-22  0:51   ` David Gibson
  2007-10-22 12:36   ` Jon Loeliger
  1 sibling, 0 replies; 13+ messages in thread
From: David Gibson @ 2007-10-22  0:51 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: linuxppc-dev, Jon Loeliger

On Sun, Oct 21, 2007 at 07:30:45AM +0200, Segher Boessenkool wrote:
> > Property names have been limited to start with
> > characters from the set [a-zA-Z,._#?].  That is, the
> > digits and the expression symbols have been removed.
> 
> This cannot work; many property names start with a digit,
> for example.

Yes, crap.  Ok, I guess we just have to put the literal lexer rule
before the property name lexer rule, so literals will be recognized in
preference.  Disallowing just those property names which look like
literals is a little warty, but not likely to cause trouble in
practice, I think.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] DTC: Begin the path to sane literals and expressions.
  2007-10-21  5:30 ` Segher Boessenkool
  2007-10-22  0:51   ` David Gibson
@ 2007-10-22 12:36   ` Jon Loeliger
  2007-10-23  0:33     ` David Gibson
  1 sibling, 1 reply; 13+ messages in thread
From: Jon Loeliger @ 2007-10-22 12:36 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: linuxppc-dev

So, like, the other day Segher Boessenkool mumbled:
> > Property names have been limited to start with
> > characters from the set [a-zA-Z,._#?].  That is, the
> > digits and the expression symbols have been removed.
> 
> This cannot work; many property names start with a digit,
> for example.

Are any of those property names in use in any
of our DTS files or b-w-o.txt?  Not really, no.
In fact, with this lexical change, all of our
DTS files still produce byte-identical results.

I really think this is one of those areas where
we may need to stray from the other guideline.

Is there a compelling reason somewhere?  Really?

jdl

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] DTC: Begin the path to sane literals and expressions.
  2007-10-22 12:36   ` Jon Loeliger
@ 2007-10-23  0:33     ` David Gibson
  2007-10-23  0:57       ` Segher Boessenkool
  0 siblings, 1 reply; 13+ messages in thread
From: David Gibson @ 2007-10-23  0:33 UTC (permalink / raw)
  To: Jon Loeliger; +Cc: linuxppc-dev

On Mon, Oct 22, 2007 at 07:36:57AM -0500, Jon Loeliger wrote:
> So, like, the other day Segher Boessenkool mumbled:
> > > Property names have been limited to start with
> > > characters from the set [a-zA-Z,._#?].  That is, the
> > > digits and the expression symbols have been removed.
> > 
> > This cannot work; many property names start with a digit,
> > for example.
> 
> Are any of those property names in use in any
> of our DTS files or b-w-o.txt?  Not really, no.
> In fact, with this lexical change, all of our
> DTS files still produce byte-identical results.
> 
> I really think this is one of those areas where
> we may need to stray from the other guideline.
> 
> Is there a compelling reason somewhere?  Really?

We may have deprecated te '64-bit' and '32-64-bridge' properties in
cpu nodes for the flattened tree, but it already exists in a great
number of Apple and IBM trees.  It would be poor form for dtc to choke
on these trees.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] DTC: Begin the path to sane literals and expressions.
  2007-10-23  0:33     ` David Gibson
@ 2007-10-23  0:57       ` Segher Boessenkool
  0 siblings, 0 replies; 13+ messages in thread
From: Segher Boessenkool @ 2007-10-23  0:57 UTC (permalink / raw)
  To: David Gibson; +Cc: linuxppc-dev, Jon Loeliger

>>>> Property names have been limited to start with
>>>> characters from the set [a-zA-Z,._#?].  That is, the
>>>> digits and the expression symbols have been removed.
>>>
>>> This cannot work; many property names start with a digit,
>>> for example.
>>
>> Are any of those property names in use in any
>> of our DTS files or b-w-o.txt?  Not really, no.
>> In fact, with this lexical change, all of our
>> DTS files still produce byte-identical results.
>>
>> I really think this is one of those areas where
>> we may need to stray from the other guideline.
>>
>> Is there a compelling reason somewhere?  Really?
>
> We may have deprecated te '64-bit' and '32-64-bridge' properties in
> cpu nodes for the flattened tree, but it already exists in a great
> number of Apple and IBM trees.

Huh?  "64-bit" isn't deprecated I hope, just the "32-bit" thing
(that never was defined) is.

> It would be poor form for dtc to choke on these trees.

Yah.  Small incompatibilities add up :-(


Segher

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] DTC: Begin the path to sane literals and expressions.
  2007-10-20  8:47 ` David Gibson
  2007-10-21  5:32   ` Segher Boessenkool
@ 2007-10-25 18:24   ` Jon Loeliger
  2007-10-26  1:28     ` David Gibson
  1 sibling, 1 reply; 13+ messages in thread
From: Jon Loeliger @ 2007-10-25 18:24 UTC (permalink / raw)
  To: David Gibson; +Cc: linuxppc-dev

So, like, the other day David Gibson mumbled:
> > Use of "d#', "o#", "h#" and "b#" are gone in version 1.
> 
> Also good.  We might want to keep b#, since there's no C way of doing
> binary literals, but in that case I'd suggest recognizing it at the
> lexical level, not the grammar level as we do now (which would mean a
> space between the b# and the digits would no longer be permissible).

I added 0(b|B)[01]+ literal support.

> Now my big concern with this patch: the dts_version global, set by the
> parser, used by the lexer.  I assume you've tested this and it works
> in practice,

Yes.

> but you're relying on the semantic action from the .y
> file being executed before the lexer meets any token that depends on
> it.

Of course that is how it works.  No problem.

> I don't know about you, but I have to think pretty hard about how flow
> of control will move between lexer and parser rules in a shift-reduce
> parser at the best of times.  With the %glr-parser option, bison will
> use the Tomita algorithm.  That means the parser state could be split
> if there are ambiguities, or non-LALR(1) portions of the grammar
> (which there are).  In that case the execution of the parser rules
> will be delayed until after the split is resolved again, however many
> tokens down the track.

So, I removed the ambiguity.

> Therefore, I'd prefer that instead of having this general version
> number, we introduce a separate token for each new version.  So
> /dts-version-1/ or whatever.  Any new, incompatible, dts version is a
> big deal in any case - not something we want to happen often - so a
> new token for each version is not a big deal, I think.  With this
> approach, the version flag can be tripped in the lexer, and the
> ordering of lexer rule execution is obvious.

I don't see how this will work at the purely lexical level in
a reliable way.  Sure, I see the lexer matching the token and
setting some variable (dts_version = 0 or 1) as needed.

But there is no really good way to enforce that this happens at
an early enough state that it covers the rest of the file short
of having a staged lexical context state machine.  And that just
seems way hacky to me.

With it in the grammar, we can enforce it being early in the file
and can be assured of it happening in time to affect the rest of
the parse.  Personally, I think having it be a general statement
like:

    /<option>/ <value> ;

makes a whole lot more sense than having the purely lexical
context.  Future versions of the DTS files will be syntactically
correct even when moving to a (now) hypothetical version 2 file.


> I'm also inclined to leave the syntax for bytestrings as it is, in

Why?  Why not be allowed to form up a series of expressions
that make up a byte string? Am I missing something obvious here?

> [snip]
> > +<*>{DOT}{DOT}	{
> 
> You should be able to just use \.\. or ".." here rather than having to
> indirect through a character class.

Oh, yeah, I meant to revert that again.  No problem.

> > +0(x|X){HEXDIGIT}+	{
> 
> Does C recognize both 0x and 0X, or just 0x?  I don't remember off the
> top of my head.

Yep.

> > +u64 expr_from_string(char *s, unsigned int base)
> > +{
> > +	u64 v;
> > +	char *e;
> > +
> > +	v = strtoull(s, &e, base);
> > +	if (*e) {
> > +		fprintf(stderr,
> > +			"Line %d: Invalid literal value '%s' : "
> > +			"%c is not a base %d digit; %lld assumed\n",
> > +			yylloc.first_line, s, *e,
> > +			base == 0 ? expr_default_base : base,
> > +			(unsigned long long) v);
> 
> Do we need this error checking?  Won't the regexes mean that the
> string *must* be a valid literal by this point?

It's still needed for the legacy bits where you have modified
the base for the following numbers.  Specifically, you can say
something like "d# 123abc" and it will scan lexically, but
not be correct semantically still.  Blah.

> > @@ -46,25 +47,27 @@ extern struct boot_info *the_boot_info;
> >  	int hexlen;
> >  	u64 addr;
> >  	struct reserve_info *re;
> > +	u64 ire;
> 
> What's "ire" supposed to be short for?

Oh, longer term.  I have a intermediate representation for
expressions up my sleeve.  Sorry, wasn't clear there...

> > +	| label DT_MEMRESERVE expr '-' expr ';'
> 
> Oh.. you haven't actually abolished the '-' syntax, even in version 1
> mode.  Since we're already making an incompatible syntax change, we
> should really make this one at the same time.

I can make that fail, no problem.

> > +cell:
> > +	  expr
> > +		{
> > +			$$ = expr_cell_value($1);
> > +		}
> > +	;
> > +
> > +expr:
> > +	  expr_primary
> > +	;
> > +
> > +expr_primary:
> > +	  opt_cell_base DT_LITERAL
> > +		{
> > +			$$ = $2;
> > +		}
> 
> Hrm.  I realise you haven't actually implemented expressions here, but

Right.  Initial framework....

> > +void yywarn(char const *s, ...)
> 
> Ick.. we can tolerate yyblah() names for the things we inherit from
> yacc, but lets not introduce our own, hey.

It's pretty normal, I think.  But we can introduce something else
and map yyerror() into an error variant.  Got name suggestions?



> > +unsigned int dts_version = 0;
> > +unsigned int expr_default_base = 10;
> > +
> > +
> > +void set_dts_version(u64 vers)
> > +{
> > +	if (vers > 1) {
> > +		yywarn("Unknown version %lld; 0 assumed\n", vers);
> > +		dts_version = 0;
> > +	} else {
> > +		dts_version = vers;
> > +	}
> > +
> > +	if (dts_version == 0) {
> > +		expr_default_base = 16;
> > +		in_lexer_use_base = 16;
> > +	} else {
> > +		expr_default_base = 10;
> > +		in_lexer_use_base = 10;
> 
> Uh.. I don't think we should need either of expr_default_base and
> in_lexer_use_base, let alone both..

You need to temporarily know what the base of the next number
will be for "d#"-like constructs, and then you need to know
what to revert to again (default).  Sure, we could consult the
dts_version again directly at that time if you'd rather.


> > -		fprintf(f, "%x", be32_to_cpu(*cp++));
> > +		if (dts_version == 0) {
> 
> Where does dts_version get set in the -Odts case?

The same call to set_dts_version() as any other case.

jdl

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] DTC: Begin the path to sane literals and expressions.
  2007-10-25 18:24   ` Jon Loeliger
@ 2007-10-26  1:28     ` David Gibson
  2007-10-26 13:07       ` Jon Loeliger
  0 siblings, 1 reply; 13+ messages in thread
From: David Gibson @ 2007-10-26  1:28 UTC (permalink / raw)
  To: Jon Loeliger; +Cc: linuxppc-dev

On Thu, Oct 25, 2007 at 01:24:57PM -0500, Jon Loeliger wrote:
> So, like, the other day David Gibson mumbled:
> > > Use of "d#', "o#", "h#" and "b#" are gone in version 1.
> > 
> > Also good.  We might want to keep b#, since there's no C way of doing
> > binary literals, but in that case I'd suggest recognizing it at the
> > lexical level, not the grammar level as we do now (which would mean a
> > space between the b# and the digits would no longer be permissible).
> 
> I added 0(b|B)[01]+ literal support.

Ok.

[snip]
> > Therefore, I'd prefer that instead of having this general version
> > number, we introduce a separate token for each new version.  So
> > /dts-version-1/ or whatever.  Any new, incompatible, dts version is a
> > big deal in any case - not something we want to happen often - so a
> > new token for each version is not a big deal, I think.  With this
> > approach, the version flag can be tripped in the lexer, and the
> > ordering of lexer rule execution is obvious.
> 
> I don't see how this will work at the purely lexical level in
> a reliable way.  Sure, I see the lexer matching the token and
> setting some variable (dts_version = 0 or 1) as needed.

> But there is no really good way to enforce that this happens at
> an early enough state that it covers the rest of the file short
> of having a staged lexical context state machine.  And that just
> seems way hacky to me.
>
> With it in the grammar, we can enforce it being early in the file
> and can be assured of it happening in time to affect the rest of
> the parse.  Personally, I think having it be a general statement
> like:

Ah... I think I see the source of our misunderstanding.  Sorry if I
was unclear.  I'm not saying that the version token would be
invisible to the parser, just that it would be recognized by the lexer
first.

So, the grammar would still need to specify that the magic version
token comes first, of course.  A file which had it in the middle would
lex very strangely, but it wouldn't matter because it will never pass
the parser anyway.

>     /<option>/ <value> ;
> 
> makes a whole lot more sense than having the purely lexical
> context.  Future versions of the DTS files will be syntactically
> correct even when moving to a (now) hypothetical version 2 file.

I see that as bad thing not a good thing.  The only reason to bump the
version number is for *incompatible* source changes - that's why we're
doing it now, because the new representation would be ambiguous with
the current representation without something specifying which we're
using.  Therefore future files *shouldn't* be syntactically correct
with a current parser.

The nice thing about having a token, is that if necessary we can
completely change the grammar for each version, without having to have
tangled rules that have to generate yyerror()s in some circumstances
depending on the version variable.  The alternate grammars can be
encoded directly into the yacc rules:
	startsymbol : version0_file
		    | V1_TOKEN version1_file
		    | V2_TOKEN version2_file
		    ;

> > I'm also inclined to leave the syntax for bytestrings as it is, in
> 
> Why?  Why not be allowed to form up a series of expressions
> that make up a byte string? Am I missing something obvious here?

Because part of the point of bytestrings is to provide representation
for binary data.  For a MAC address, say
	[0x00 0x0a 0xe4 0x2c 0x23 0x1f]
is way bulkier than
	[000ae42c231f]

And in bytestring context, I suspect having every expression result be
truncated to bytesize will be way more of a gotcha than in cell
context.

I suspect we can get the expression flexibility we want here by
providing the right operators to act *on* bytestrings, rather than
within bytestrings.

[snip]
> > > +u64 expr_from_string(char *s, unsigned int base)
> > > +{
> > > +	u64 v;
> > > +	char *e;
> > > +
> > > +	v = strtoull(s, &e, base);
> > > +	if (*e) {
> > > +		fprintf(stderr,
> > > +			"Line %d: Invalid literal value '%s' : "
> > > +			"%c is not a base %d digit; %lld assumed\n",
> > > +			yylloc.first_line, s, *e,
> > > +			base == 0 ? expr_default_base : base,
> > > +			(unsigned long long) v);
> > 
> > Do we need this error checking?  Won't the regexes mean that the
> > string *must* be a valid literal by this point?
> 
> It's still needed for the legacy bits where you have modified
> the base for the following numbers.  Specifically, you can say
> something like "d# 123abc" and it will scan lexically, but
> not be correct semantically still.  Blah.

Uh, yeah, that.  And as I realised afterwards also in case some one
writes 09999.  The latter can be banned in the grammar, but not doing
so will give better error messages (e.g. "invalid digits in octal
literal" instead of "syntax error").

Actually, possibly we should make the literal regex allow [0-9a-zA-Z]
after the initial digit or 0x, so that 0xabcdzoom gives an error,
rather than parsing as a literal followed by a name, which could be
confusing to say the least.  Of course that will make the literal/name
ambiguity worse (see below).

> > > @@ -46,25 +47,27 @@ extern struct boot_info *the_boot_info;
> > >  	int hexlen;
> > >  	u64 addr;
> > >  	struct reserve_info *re;
> > > +	u64 ire;
> > 
> > What's "ire" supposed to be short for?
> 
> Oh, longer term.  I have a intermediate representation for
> expressions up my sleeve.  Sorry, wasn't clear there...

Hrm.  I think just exprval or intval would be better.  Actually
probably intval, since last we spoke I though we were planning on
having expressions of string and bytestring types as well.

> > > +	| label DT_MEMRESERVE expr '-' expr ';'
> > 
> > Oh.. you haven't actually abolished the '-' syntax, even in version 1
> > mode.  Since we're already making an incompatible syntax change, we
> > should really make this one at the same time.
> 
> I can make that fail, no problem.

Incidentally, there's another problem here: we haven't solved the
problem about having to allow property names with initial digits.
That's a particular problem here, because although we can make
literals scanned in preference to propnames of the same length, in
this case
	0x1234..0xabcd
Will be scanned as one huge propname.

This might work for you at the moment, if you've still got all the
lexer states, but I was really hoping we could ditch most of them with
the new literals.

> > > +cell:
> > > +	  expr
> > > +		{
> > > +			$$ = expr_cell_value($1);
> > > +		}
> > > +	;
> > > +
> > > +expr:
> > > +	  expr_primary
> > > +	;
> > > +
> > > +expr_primary:
> > > +	  opt_cell_base DT_LITERAL
> > > +		{
> > > +			$$ = $2;
> > > +		}
> > 
> > Hrm.  I realise you haven't actually implemented expressions here, but
> 
> Right.  Initial framework....

But you haven't actually addressed my concern about this.  Actually
it's worse that I said then, because
	<0x10000000 -999>
is ambiguous.  Is it a single subtraction expression, or one literal
cell followed by an expression cell with a unary '-'?

[snip]
> > > +unsigned int dts_version = 0;
> > > +unsigned int expr_default_base = 10;
> > > +
> > > +
> > > +void set_dts_version(u64 vers)
> > > +{
> > > +	if (vers > 1) {
> > > +		yywarn("Unknown version %lld; 0 assumed\n", vers);
> > > +		dts_version = 0;
> > > +	} else {
> > > +		dts_version = vers;
> > > +	}
> > > +
> > > +	if (dts_version == 0) {
> > > +		expr_default_base = 16;
> > > +		in_lexer_use_base = 16;
> > > +	} else {
> > > +		expr_default_base = 10;
> > > +		in_lexer_use_base = 10;
> > 
> > Uh.. I don't think we should need either of expr_default_base and
> > in_lexer_use_base, let alone both..
> 
> You need to temporarily know what the base of the next number
> will be for "d#"-like constructs, and then you need to know
> what to revert to again (default).  Sure, we could consult the
> dts_version again directly at that time if you'd rather.

Yeah, I figured this out after.  Youch, an even tighter and harder to
follow coupling between lexer and parser execution order.  I can think
of at least two better ways to do this.

1) handle d# b# etc. at the lexer lexel, with a regex like
(d#{WS}*[0-9]+).  Strictly speaking that changes the language, but I
don't think anyone's been insane enough to do something like "d#
/*stupid comment*/ 999".  That would remove the whole ugly
opt_cell_base tangle from the grammar.

2) Have the lexer just pass up literals as strings, and let the parser
do the conversion to integer, based on the grammatical context.  I
think this is preferable because it has other advantages: we can do
the distinction between 64-bit values for memreserve and 32-bit values
for cell at the grammatical level.  It can also be used to handle the
propname/literal ambiguity without lexer states (I had a patch a while
back which removed the MEMRESERVE and CELLDATA lex states using this
technique).

> > > -		fprintf(f, "%x", be32_to_cpu(*cp++));
> > > +		if (dts_version == 0) {
> > 
> > Where does dts_version get set in the -Odts case?
> 
> The same call to set_dts_version() as any other case.

Erm... which same call to set_dts_version()?  Surely not the one in
the parser..

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] DTC: Begin the path to sane literals and expressions.
  2007-10-26  1:28     ` David Gibson
@ 2007-10-26 13:07       ` Jon Loeliger
  2007-10-26 14:03         ` David Gibson
  0 siblings, 1 reply; 13+ messages in thread
From: Jon Loeliger @ 2007-10-26 13:07 UTC (permalink / raw)
  To: David Gibson; +Cc: linuxppc-dev

So, like, the other day David Gibson mumbled:
> 
> Ah... I think I see the source of our misunderstanding.  Sorry if I
> was unclear.  I'm not saying that the version token would be
> invisible to the parser, just that it would be recognized by the lexer
> first.

Ah! Right.  OK, I see what you are saying now.

> The nice thing about having a token, is that if necessary we can
> completely change the grammar for each version, without having to have
> tangled rules that have to generate yyerror()s in some circumstances
> depending on the version variable.  The alternate grammars can be
> encoded directly into the yacc rules:
> 	startsymbol : version0_file
> 		    | V1_TOKEN version1_file
> 		    | V2_TOKEN version2_file
> 		    ;

Hmmm...  Now that I see that your symbol is still in the grammar,
I can see this part as well.  OK.  I'll buy it.

> > > I'm also inclined to leave the syntax for bytestrings as it is, in
> > 
> > Why?  Why not be allowed to form up a series of expressions
> > that make up a byte string? Am I missing something obvious here?
> 
> Because part of the point of bytestrings is to provide representation
> for binary data.  For a MAC address, say
> 	[0x00 0x0a 0xe4 0x2c 0x23 0x1f]
> is way bulkier than
> 	[000ae42c231f]

No, I think you misuderstand what I was after.  I'm not after the
the latter [000ae4...].  In that case, there would be multiple
expressions, each no bigger than 8 bits wide:

    [ expr expr expr    expr  expr      expr ]
    [ 0x00   10 0x4  0x20+12 '0'+3  0x20 - 1 ]

or whatever seemed appropriate.  It would not be one giant value.

> And in bytestring context, I suspect having every expression result be
> truncated to bytesize will be way more of a gotcha than in cell
> context.

Which is why we run a semantic checking as well and warn on
values not fitting in container sizes.

> I suspect we can get the expression flexibility we want here by
> providing the right operators to act *on* bytestrings, rather than
> within bytestrings.

That too.  No problem.  I suspect some may be functional, though.
Haven't thought about that a bunch yet.  I just want to get
basis stuff in first.


> Hrm.  I think just exprval or intval would be better.  Actually
> probably intval, since last we spoke I though we were planning on
> having expressions of string and bytestring types as well.

Except I think we want more generalized than that.

> Incidentally, there's another problem here: we haven't solved the
> problem about having to allow property names with initial digits.

I know.

> That's a particular problem here, because although we can make
> literals scanned in preference to propnames of the same length, in
> this case
> 	0x1234..0xabcd
> Will be scanned as one huge propname.

I know.  White space is mandatory right now.

> This might work for you at the moment, if you've still got all the
> lexer states, but I was really hoping we could ditch most of them with
> the new literals.

Which is really why they are all still there.  Longer term,
I want to _quit_ supporting "version 0" and remove the cruft...

> But you haven't actually addressed my concern about this.  Actually
> it's worse that I said then, because
> 	<0x10000000 -999>
> is ambiguous.  Is it a single subtraction expression, or one literal
> cell followed by an expression cell with a unary '-'?

Gah.

Paren'ed expressions may be the thing to do.
How do you feel about comma separation?

Anyone else care to chime in?

> > > > +unsigned int dts_version = 0;

> Yeah, I figured this out after.  Youch, an even tighter and harder to
> follow coupling between lexer and parser execution order.  I can think
> of at least two better ways to do this.

I'm listening... :-)

> 1) handle d# b# etc. at the lexer lexel, with a regex like
> (d#{WS}*[0-9]+).  Strictly speaking that changes the language, but I
> don't think anyone's been insane enough to do something like "d#
> /*stupid comment*/ 999".  That would remove the whole ugly
> opt_cell_base tangle from the grammar.

That seems like it could work...

> 2) Have the lexer just pass up literals as strings, and let the parser
> do the conversion to integer, based on the grammatical context.  I
> think this is preferable because it has other advantages: we can do
> the distinction between 64-bit values for memreserve and 32-bit values
> for cell at the grammatical level.  It can also be used to handle the
> propname/literal ambiguity without lexer states (I had a patch a while
> back which removed the MEMRESERVE and CELLDATA lex states using this
> technique).

I'm not so keen on that approach, I don't think.

> > The same call to set_dts_version() as any other case.
> 
> Erm... which same call to set_dts_version()?  Surely not the one in
> the parser..

I'm clearly not understanding your point, I'm afraid.  There are
static default values here:

    /*
     * DTS sourcefile version.
     */
    unsigned int dts_version = 0;
    unsigned int expr_default_base = 10;

And there is a call to set_dts_version() made when any DTS file
is parsed, which happens before any -O option is even handled.

What am I missing?

jdl

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] DTC: Begin the path to sane literals and expressions.
  2007-10-26 13:07       ` Jon Loeliger
@ 2007-10-26 14:03         ` David Gibson
  0 siblings, 0 replies; 13+ messages in thread
From: David Gibson @ 2007-10-26 14:03 UTC (permalink / raw)
  To: Jon Loeliger; +Cc: linuxppc-dev

On Fri, Oct 26, 2007 at 08:07:49AM -0500, Jon Loeliger wrote:
> So, like, the other day David Gibson mumbled:
> > 
> > Ah... I think I see the source of our misunderstanding.  Sorry if I
> > was unclear.  I'm not saying that the version token would be
> > invisible to the parser, just that it would be recognized by the lexer
> > first.
> 
> Ah! Right.  OK, I see what you are saying now.
> 
> > The nice thing about having a token, is that if necessary we can
> > completely change the grammar for each version, without having to have
> > tangled rules that have to generate yyerror()s in some circumstances
> > depending on the version variable.  The alternate grammars can be
> > encoded directly into the yacc rules:
> > 	startsymbol : version0_file
> > 		    | V1_TOKEN version1_file
> > 		    | V2_TOKEN version2_file
> > 		    ;
> 
> Hmmm...  Now that I see that your symbol is still in the grammar,
> I can see this part as well.  OK.  I'll buy it.

Yay! :)

> > > > I'm also inclined to leave the syntax for bytestrings as it is, in
> > > 
> > > Why?  Why not be allowed to form up a series of expressions
> > > that make up a byte string? Am I missing something obvious here?
> > 
> > Because part of the point of bytestrings is to provide representation
> > for binary data.  For a MAC address, say
> > 	[0x00 0x0a 0xe4 0x2c 0x23 0x1f]
> > is way bulkier than
> > 	[000ae42c231f]
> 
> No, I think you misuderstand what I was after.  I'm not after the
> the latter [000ae4...].  In that case, there would be multiple
> expressions, each no bigger than 8 bits wide:
> 
>     [ expr expr expr    expr  expr      expr ]
>     [ 0x00   10 0x4  0x20+12 '0'+3  0x20 - 1 ]
> 
> or whatever seemed appropriate.  It would not be one giant value.

Yes, I realise that.  My point is that [000ae42c231f] is valid *now*
and is a nice compact way of doing explicit bytestrings.  That would
become much bulkier with your proposal.

[snip]
> > Incidentally, there's another problem here: we haven't solved the
> > problem about having to allow property names with initial digits.
> 
> I know.

Yeah, have a look at my 1/2 patch - it addresses this, and I think a
bunch of related problems that would come with expression syntax.

> > That's a particular problem here, because although we can make
> > literals scanned in preference to propnames of the same length, in
> > this case
> > 	0x1234..0xabcd
> > Will be scanned as one huge propname.
> 
> I know.  White space is mandatory right now.

Ick.

> > This might work for you at the moment, if you've still got all the
> > lexer states, but I was really hoping we could ditch most of them with
> > the new literals.
> 
> Which is really why they are all still there.  Longer term,
> I want to _quit_ supporting "version 0" and remove the cruft...
> 
> > But you haven't actually addressed my concern about this.  Actually
> > it's worse that I said then, because
> > 	<0x10000000 -999>
> > is ambiguous.  Is it a single subtraction expression, or one literal
> > cell followed by an expression cell with a unary '-'?
> 
> Gah.
> 
> Paren'ed expressions may be the thing to do.
> How do you feel about comma separation?

I'm not keen on it.  I think paren'ed expressions are reasonable.
Grammatically the cell values would be literal | '(' expr ')'.  As
you've probably noticed in your expression research, there's generally
already a non-terminal just like that within expression grammars,
anyway.

[snip]
> > > The same call to set_dts_version() as any other case.
> > 
> > Erm... which same call to set_dts_version()?  Surely not the one in
> > the parser..
> 
> I'm clearly not understanding your point, I'm afraid.  There are
> static default values here:
> 
>     /*
>      * DTS sourcefile version.
>      */
>     unsigned int dts_version = 0;
>     unsigned int expr_default_base = 10;
> 
> And there is a call to set_dts_version() made when any DTS file
> is parsed, which happens before any -O option is even handled.
> 
> What am I missing?

The dts version for output should be independent of the dts version
for input (in fact it should probably be locked to v1).  In -Idtb
-Odts mode, there will be no dts file parsed before dts output, and it
would be neat if -Idts -Odts mode would convert from v0 to v1 (another
thing on my to-do list is to change things so -Idts -Odts will
preserve labels from input to output).

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2007-10-26 14:03 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-10-19 17:43 [PATCH 4/4] DTC: Begin the path to sane literals and expressions Jon Loeliger
2007-10-20  8:47 ` David Gibson
2007-10-21  5:32   ` Segher Boessenkool
2007-10-22  0:37     ` David Gibson
2007-10-25 18:24   ` Jon Loeliger
2007-10-26  1:28     ` David Gibson
2007-10-26 13:07       ` Jon Loeliger
2007-10-26 14:03         ` David Gibson
2007-10-21  5:30 ` Segher Boessenkool
2007-10-22  0:51   ` David Gibson
2007-10-22 12:36   ` Jon Loeliger
2007-10-23  0:33     ` David Gibson
2007-10-23  0:57       ` Segher Boessenkool

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).