devicetree-compiler.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] dtc: Return error when reading non-ascii chars
@ 2021-03-14  4:40 montytyper-uAjRD0nVeow
       [not found] ` <BY5PR14MB37654DDEFC00C8C1ABAD35DECE6D9-ee0W+6mJUHcCBBnP8riMx04NFAmbP9jRvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 2+ messages in thread
From: montytyper-uAjRD0nVeow @ 2021-03-14  4:40 UTC (permalink / raw)
  To: david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+,
	loeliger-Re5JQEeQqe8AvxtiuMwx3w
  Cc: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA, Jordan Montgomery

From: Jordan Montgomery <montytyper-uAjRD0nVeow@public.gmane.org>

The underlying lexer code generated by flex checks
if(yychar <= YYEOF) to detect EOF. This returns true
for any char >= 0x80, resulting in premature EOF and
causing dtc to generate an invalid FDT.

This patch adds a rule to detect these illegal
characters and throw a fatal error instead, as well as
a test case which passes if dtc errors fatally and
fails otherwise.

Parsing of valid devicetrees should be unaffected by
this change.

Signed-off-by: Jordan Montgomery <montytyper-uAjRD0nVeow@public.gmane.org>
---
 dtc-lexer.l           |  6 ++++++
 tests/bad-unicode.dts |  6 ++++++
 tests/dtc-fatal.sh    | 11 +++++++++--
 tests/run_tests.sh    |  1 +
 4 files changed, 22 insertions(+), 2 deletions(-)
 mode change 100644 => 100755 dtc-lexer.l
 create mode 100644 tests/bad-unicode.dts

diff --git a/dtc-lexer.l b/dtc-lexer.l
old mode 100644
new mode 100755
index b3b7270..668875b
--- a/dtc-lexer.l
+++ b/dtc-lexer.l
@@ -17,6 +17,7 @@ CHAR_LITERAL	'([^']|\\')*'
 WS		[[:space:]]
 COMMENT		"/*"([^*]|\*+[^*/])*\*+"/"
 LINECOMMENT	"//".*\n
+NONASCII	[\x80-\xff]
 
 %{
 #include "dtc.h"
@@ -245,6 +246,11 @@ static void PRINTF(1, 2) lexical_error(const char *fmt, ...);
 <*>"&&"		{ return DT_AND; };
 <*>"||"		{ return DT_OR; };
 
+<*>{NONASCII}	{
+			DPRINT("Illegal character x%X\n", (unsigned)yytext[0]);
+			YY_FATAL_ERROR("Unable to parse non-ASCII character");
+		};
+
 <*>.		{
 			DPRINT("Char: %c (\\x%02x)\n", yytext[0],
 				(unsigned)yytext[0]);
diff --git a/tests/bad-unicode.dts b/tests/bad-unicode.dts
new file mode 100644
index 0000000..19b3d2f
--- /dev/null
+++ b/tests/bad-unicode.dts
@@ -0,0 +1,6 @@
+/dts-v1/;
+
+/ {
+    prop-str = "hello world";⌘
+    prop-str2 = "hello world";
+};
diff --git a/tests/dtc-fatal.sh b/tests/dtc-fatal.sh
index 08a4d29..56cdc1c 100755
--- a/tests/dtc-fatal.sh
+++ b/tests/dtc-fatal.sh
@@ -3,13 +3,20 @@
 SRCDIR=`dirname "$0"`
 . "$SRCDIR/testutils.sh"
 
+if [ "$1" = "-r" ]; then
+    expected="$2"
+    shift 2
+else
+    expected="1"
+fi
+
 verbose_run $VALGRIND "$DTC" -o/dev/null "$@"
 ret="$?"
 
 if [ "$ret" -gt 127 ]; then
     FAIL "dtc killed by signal (ret=$ret)"
-elif [ "$ret" != "1" ]; then
-    FAIL "dtc returned incorrect status $ret instead of 1"
+elif [ "$ret" != "$expected" ]; then
+    FAIL "dtc returned incorrect status $ret instead of $expected"
 fi
 
 PASS
diff --git a/tests/run_tests.sh b/tests/run_tests.sh
index 4b8dada..c9bc095 100755
--- a/tests/run_tests.sh
+++ b/tests/run_tests.sh
@@ -764,6 +764,7 @@ dtc_tests () {
     run_sh_test "$SRCDIR/dtc-fatal.sh" -I dts -O dtb nosuchfile.dts
     run_sh_test "$SRCDIR/dtc-fatal.sh" -I dtb -O dtb nosuchfile.dtb
     run_sh_test "$SRCDIR/dtc-fatal.sh" -I fs -O dtb nosuchfile
+    run_sh_test "$SRCDIR/dtc-fatal.sh" -r 2 -I dts -O dtb bad-unicode.dts
 
     # Dependencies
     run_dtc_test -I dts -O dtb -o dependencies.test.dtb -d dependencies.test.d "$SRCDIR/dependencies.dts"
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] dtc: Return error when reading non-ascii chars
       [not found] ` <BY5PR14MB37654DDEFC00C8C1ABAD35DECE6D9-ee0W+6mJUHcCBBnP8riMx04NFAmbP9jRvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2021-03-23  3:32   ` David Gibson
  0 siblings, 0 replies; 2+ messages in thread
From: David Gibson @ 2021-03-23  3:32 UTC (permalink / raw)
  To: montytyper-uAjRD0nVeow
  Cc: loeliger-Re5JQEeQqe8AvxtiuMwx3w,
	devicetree-compiler-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 4046 bytes --]

On Sat, Mar 13, 2021 at 08:40:54PM -0800, montytyper-uAjRD0nVeow@public.gmane.org wrote:
> From: Jordan Montgomery <montytyper-uAjRD0nVeow@public.gmane.org>
> 
> The underlying lexer code generated by flex checks
> if(yychar <= YYEOF) to detect EOF. This returns true
> for any char >= 0x80, resulting in premature EOF and
> causing dtc to generate an invalid FDT.
> 
> This patch adds a rule to detect these illegal
> characters and throw a fatal error instead, as well as
> a test case which passes if dtc errors fatally and
> fails otherwise.
> 
> Parsing of valid devicetrees should be unaffected by
> this change.
> 
> Signed-off-by: Jordan Montgomery <montytyper-uAjRD0nVeow@public.gmane.org>

This doesn't seem right to me.  AIUI, flex although not Unicode-aware,
should be 8-bit clean.  If I attempt to build your example file here
without your lexer change, I get just what I'd expect:

$ ../dtc -I dts -O dtb -o /dev/null bad-unicode.dts 
Error: bad-unicode.dts:4.30-31 syntax error
FATAL ERROR: Unable to parse input tree

A fatal parse error, because it found a bogus character there, but no
different than if it had been an ASCII bogus character.


> ---
>  dtc-lexer.l           |  6 ++++++
>  tests/bad-unicode.dts |  6 ++++++
>  tests/dtc-fatal.sh    | 11 +++++++++--
>  tests/run_tests.sh    |  1 +
>  4 files changed, 22 insertions(+), 2 deletions(-)
>  mode change 100644 => 100755 dtc-lexer.l
>  create mode 100644 tests/bad-unicode.dts
> 
> diff --git a/dtc-lexer.l b/dtc-lexer.l
> old mode 100644
> new mode 100755
> index b3b7270..668875b
> --- a/dtc-lexer.l
> +++ b/dtc-lexer.l
> @@ -17,6 +17,7 @@ CHAR_LITERAL	'([^']|\\')*'
>  WS		[[:space:]]
>  COMMENT		"/*"([^*]|\*+[^*/])*\*+"/"
>  LINECOMMENT	"//".*\n
> +NONASCII	[\x80-\xff]
>  
>  %{
>  #include "dtc.h"
> @@ -245,6 +246,11 @@ static void PRINTF(1, 2) lexical_error(const char *fmt, ...);
>  <*>"&&"		{ return DT_AND; };
>  <*>"||"		{ return DT_OR; };
>  
> +<*>{NONASCII}	{
> +			DPRINT("Illegal character x%X\n", (unsigned)yytext[0]);
> +			YY_FATAL_ERROR("Unable to parse non-ASCII character");
> +		};
> +
>  <*>.		{
>  			DPRINT("Char: %c (\\x%02x)\n", yytext[0],
>  				(unsigned)yytext[0]);
> diff --git a/tests/bad-unicode.dts b/tests/bad-unicode.dts
> new file mode 100644
> index 0000000..19b3d2f
> --- /dev/null
> +++ b/tests/bad-unicode.dts
> @@ -0,0 +1,6 @@
> +/dts-v1/;
> +
> +/ {
> +    prop-str = "hello world";⌘
> +    prop-str2 = "hello world";
> +};
> diff --git a/tests/dtc-fatal.sh b/tests/dtc-fatal.sh
> index 08a4d29..56cdc1c 100755
> --- a/tests/dtc-fatal.sh
> +++ b/tests/dtc-fatal.sh
> @@ -3,13 +3,20 @@
>  SRCDIR=`dirname "$0"`
>  . "$SRCDIR/testutils.sh"
>  
> +if [ "$1" = "-r" ]; then
> +    expected="$2"
> +    shift 2
> +else
> +    expected="1"
> +fi
> +
>  verbose_run $VALGRIND "$DTC" -o/dev/null "$@"
>  ret="$?"
>  
>  if [ "$ret" -gt 127 ]; then
>      FAIL "dtc killed by signal (ret=$ret)"
> -elif [ "$ret" != "1" ]; then
> -    FAIL "dtc returned incorrect status $ret instead of 1"
> +elif [ "$ret" != "$expected" ]; then
> +    FAIL "dtc returned incorrect status $ret instead of $expected"
>  fi
>  
>  PASS
> diff --git a/tests/run_tests.sh b/tests/run_tests.sh
> index 4b8dada..c9bc095 100755
> --- a/tests/run_tests.sh
> +++ b/tests/run_tests.sh
> @@ -764,6 +764,7 @@ dtc_tests () {
>      run_sh_test "$SRCDIR/dtc-fatal.sh" -I dts -O dtb nosuchfile.dts
>      run_sh_test "$SRCDIR/dtc-fatal.sh" -I dtb -O dtb nosuchfile.dtb
>      run_sh_test "$SRCDIR/dtc-fatal.sh" -I fs -O dtb nosuchfile
> +    run_sh_test "$SRCDIR/dtc-fatal.sh" -r 2 -I dts -O dtb bad-unicode.dts
>  
>      # Dependencies
>      run_dtc_test -I dts -O dtb -o dependencies.test.dtb -d dependencies.test.d "$SRCDIR/dependencies.dts"

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-03-23  3:32 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-03-14  4:40 [PATCH] dtc: Return error when reading non-ascii chars montytyper-uAjRD0nVeow
     [not found] ` <BY5PR14MB37654DDEFC00C8C1ABAD35DECE6D9-ee0W+6mJUHcCBBnP8riMx04NFAmbP9jRvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2021-03-23  3:32   ` David Gibson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).