Re: [PATCH v2 07/10] target/hexagon: import lexer for idef-parser

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Richard Henderson <richard.henderson@linaro.org>
To: Alessandro Di Federico <ale.qemu@rev.ng>, qemu-devel@nongnu.org
Cc: Alessandro Di Federico <ale@rev.ng>,
	bcain@quicinc.com, babush@rev.ng, tsimpson@quicinc.com,
	nizzo@rev.ng, philmd@redhat.com
Subject: Re: [PATCH v2 07/10] target/hexagon: import lexer for idef-parser
Date: Thu, 25 Feb 2021 14:24:16 -0800	[thread overview]
Message-ID: <3bfa70a4-9eff-9500-185c-831e64c103f6@linaro.org> (raw)
In-Reply-To: <20210225151856.3284701-8-ale.qemu@rev.ng>

On 2/25/21 7:18 AM, Alessandro Di Federico wrote:
> From: Paolo Montesel <babush@rev.ng>
> 
> Signed-off-by: Alessandro Di Federico <ale@rev.ng>
> Signed-off-by: Paolo Montesel <babush@rev.ng>
> ---
>  target/hexagon/idef-parser/idef-parser.h      | 245 +++++++
>  target/hexagon/idef-parser/idef-parser.lex    | 647 ++++++++++++++++++
>  target/hexagon/meson.build                    |   4 +
>  tests/docker/dockerfiles/alpine.docker        |   1 +
>  tests/docker/dockerfiles/centos7.docker       |   1 +
>  tests/docker/dockerfiles/centos8.docker       |   1 +
>  tests/docker/dockerfiles/debian10.docker      |   1 +
>  .../dockerfiles/fedora-i386-cross.docker      |   1 +
>  .../dockerfiles/fedora-win32-cross.docker     |   1 +
>  .../dockerfiles/fedora-win64-cross.docker     |   1 +
>  tests/docker/dockerfiles/fedora.docker        |   1 +
>  tests/docker/dockerfiles/opensuse-leap.docker |   1 +
>  tests/docker/dockerfiles/ubuntu.docker        |   1 +
>  tests/docker/dockerfiles/ubuntu1804.docker    |   1 +
>  tests/docker/dockerfiles/ubuntu2004.docker    |   3 +-
>  15 files changed, 909 insertions(+), 1 deletion(-)
>  create mode 100644 target/hexagon/idef-parser/idef-parser.h
>  create mode 100644 target/hexagon/idef-parser/idef-parser.lex
> 
> diff --git a/target/hexagon/idef-parser/idef-parser.h b/target/hexagon/idef-parser/idef-parser.h
> new file mode 100644
> index 0000000000..d08b9c80ea
> --- /dev/null
> +++ b/target/hexagon/idef-parser/idef-parser.h
> @@ -0,0 +1,245 @@
> +/*
> + * Copyright(c) 2019-2020 rev.ng Srls. All Rights Reserved.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef IDEF_PARSER_H
> +#define IDEF_PARSER_H
> +
> +#include <inttypes.h>
> +#include <stdio.h>
> +#include <stdbool.h>
> +
> +#define TCGV_NAME_SIZE 7
> +#define MAX_WRITTEN_REGS 32
> +#define OFFSET_STR_LEN 32
> +#define ALLOC_LIST_LEN 32
> +#define ALLOC_NAME_SIZE 32
> +#define INIT_LIST_LEN 32
> +#define OUT_BUF_LEN (1024 * 1024)

Uh, right.  Let's not be declaring statically sized 1MB buffers, thanks.

> +#define SIGNATURE_BUF_LEN (128 * 1024)
> +#define HEADER_BUF_LEN (128 * 1024)

Nor 128k buffers.

> +/* Variadic macros to wrap the buffer printing functions */
> +#define EMIT(c, ...)                                                 \
> +    do {                                                             \
> +        (c)->out_c += snprintf((c)->out_buffer + (c)->out_c,         \
> +                               OUT_BUF_LEN - (c)->out_c,             \
> +                               __VA_ARGS__);                         \

There's nothing here that can't be better done with glib GString, with no
pre-determined buffer sizes.

E.g.

  g_string_append_printf((c)->out_str, __VA_ARGS__)

which seems simple enough to drop the macros entirely.

> +/**
> + * Semantic record of the EXTRACT token, identifying the cast operator
> + */
> +typedef struct HexExtract {
> +    int bit_width;          /**< Bit width of the extract operator           */
> +    int storage_bit_width;  /**< Actual bit width of the extract operator    */
> +    bool is_unsigned;       /**< Unsigned flag for the extract operator      */
> +} HexExtract;

All extracts begin at the lsb?

> +/**
> + * Enum of the possible rvalue types, used in the HexValue.type field
> + */
> +enum RvalueUnionTag {REGISTER, TEMP, IMMEDIATE, PREDICATE, VARID};

Typedef this.

> +    enum RvalueUnionTag type; /**< Type of the rvalue                        */

so you can drop the enum here.

> +enum OpType {ADD_OP, SUB_OP, MUL_OP, DIV_OP, ASL_OP, ASR_OP, LSR_OP, ANDB_OP,
> +             ORB_OP, XORB_OP, ANDL_OP, MINI_OP, MAXI_OP, MOD_OP};

Likewise.

> +/**
> + * Data structure including instruction specific information, to be cleared
> + * out after the compilation of each instruction
> + */
> +typedef struct Inst {
> +    char *name;                   /**< Name of the compiled instruction      */
> +    char *code_begin;             /**< Beginning of instruction input code   */
> +    char *code_end;               /**< End of instruction input code         */
> +    int tmp_count;                /**< Index of the last declared TCGv temp  */
> +    int qemu_tmp_count;           /**< Index of the last declared int temp   */
> +    int if_count;                 /**< Index of the last declared if label   */
> +    int error_count;              /**< Number of generated errors            */
> +    Var allocated[ALLOC_LIST_LEN]; /**< Allocated VARID automatic vars       */
> +    int allocated_count;          /**< Elements contained in allocated[]     */
> +    HexValue init_list[INIT_LIST_LEN]; /**< List of initialized registers    */
> +    int init_count;               /**< Number of members of init_list        */
> +} Inst;
> +
> +/**
> + * Data structure representing the whole translation context, which in a
> + * reentrant flex/bison parser just like ours is passed between the scanner
> + * and the parser, holding all the necessary information to perform the
> + * parsing, this data structure survives between the compilation of different
> + * instructions
> + *
> + */
> +typedef struct Context {
> +    void *scanner;                /**< Reentrant parser state pointer        */
> +    char *input_buffer;           /**< Buffer containing the input code      */
> +    char *out_buffer;             /**< Buffer containing the output code     */
> +    int out_c;                    /**< Characters emitted into out_buffer    */
> +    char *signature_buffer;       /**< Buffer containing the signatures code */
> +    int signature_c;              /**< Characters emitted into sig..._buffer */
> +    char *header_buffer;          /**< Buffer containing the output code     */
> +    int header_c;                 /**< Characters emitted into header buffer */
> +    FILE *defines_file;           /**< FILE * of the generated header        */
> +    FILE *output_file;            /**< FILE * of the C output file           */
> +    FILE *enabled_file;           /**< FILE * of the list of enabled inst    */
> +    int total_insn;               /**< Number of instructions in input file  */
> +    int implemented_insn;         /**< Instruction compiled without errors   */
> +    Inst inst;                  /**< Parsing data of the current inst      */
> +} Context;

I have a notion that every "char *" should be "GString *".


> +"(unsigned int)"         { /* Skip c-style casts */ }

Why is this not treated like

> +"(size8"[us]"_t)"        { yylval->cast.bit_width = 8;
> +                           yylval->cast.is_unsigned = ((yytext[6]) == 'u');
> +                           return CAST; }
> +"(size16"[us]"_t)"       { yylval->cast.bit_width = 16;
> +                           yylval->cast.is_unsigned = ((yytext[7]) == 'u');
> +                           return CAST; }
> +"(int)"                  { yylval->cast.bit_width = 32;
> +                           yylval->cast.is_unsigned = false;
> +                           return CAST; }

these?  Certainly it should be placed nearby.

> +"CANCEL"                 { return CANC; }

Any reason not to spell out CANCEL?

> +"fREAD_LC"{ZERO_ONE}     { yylval->rvalue.type = REGISTER;
> +                           yylval->rvalue.reg.type = CONTROL;
> +                           yylval->rvalue.reg.id = LC0 + atoi(yytext + 8);

Why atoi and not yytext[8] - '0'?
While we're at it, {ZERO_ONE} seems unnecessary obfuscation over [01].

> +{DIGIT}+                 { yylval->rvalue.type = IMMEDIATE;
> +                           yylval->rvalue.bit_width = 64;
> +                           yylval->rvalue.is_unsigned = false;
> +                           yylval->rvalue.imm.type = VALUE;
> +                           yylval->rvalue.imm.value = atoi(yytext);
> +                           return IMM; }
> +{DIGIT}+"LL"             { yylval->rvalue.type = IMMEDIATE;
> +                           yylval->rvalue.bit_width = 64;
> +                           yylval->rvalue.is_unsigned = false;
> +                           yylval->rvalue.imm.type = VALUE;
> +                           yylval->rvalue.imm.value = atoi(yytext);

I assume "LL" means it's supposed to be producing a 64-bit result, which atoi
will not do.  You need to be using strtoll.

> +"0x"{HEX_DIGIT}+"ULL"    { yylval->rvalue.type = IMMEDIATE;
> +                           yylval->rvalue.bit_width = 64;
> +                           yylval->rvalue.is_unsigned = true;
> +                           yylval->rvalue.imm.type = VALUE;
> +                           yylval->rvalue.imm.value = strtoul(yytext, NULL, 16);

And of course here, strtoull.

> +{VAR_ID}                 { /* Variable name, we adopt the C names convention */
> +                           yylval->rvalue.type = VARID;
> +                           yylval->rvalue.var.name = strndup(yytext,
> +                                                             ALLOC_NAME_SIZE);
> +                           /* Default types are int */
> +                           yylval->rvalue.bit_width = 32;
> +                           if (yylval->rvalue.var.name == NULL) {

This is the sort of check you need not be doing, by using the correct
functions.  In this case, g_string_new(yytext).

Glib generally asserts on memory allocation failure -- you have to explicitly
use a "*_try_*" function if you want to manage failure yourself.  Which of
course we don't.


r~

next prev parent reply	other threads:[~2021-02-25 22:25 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-25 15:18 [PATCH v2 00/10] target/hexagon: introduce idef-parser Alessandro Di Federico via
2021-02-25 15:18 ` [PATCH v2 01/10] target/hexagon: update MAINTAINERS for idef-parser Alessandro Di Federico via
2021-02-25 20:46   ` Richard Henderson
2021-02-25 15:18 ` [PATCH v2 02/10] target/hexagon: import README " Alessandro Di Federico via
2021-02-25 20:20   ` Richard Henderson
2021-03-01 14:50     ` Alessandro Di Federico via
2021-03-10 15:48     ` Taylor Simpson
2021-03-18 17:26       ` Alessandro Di Federico via
2021-04-05 21:26         ` Taylor Simpson
2021-02-25 15:18 ` [PATCH v2 03/10] target/hexagon: make helper functions non-static Alessandro Di Federico via
2021-02-25 20:47   ` Richard Henderson
2021-02-25 15:18 ` [PATCH v2 04/10] target/hexagon: introduce new helper functions Alessandro Di Federico via
2021-02-25 20:45   ` Richard Henderson
2021-02-25 15:18 ` [PATCH v2 05/10] target/hexagon: expose next PC in DisasContext Alessandro Di Federico via
2021-02-25 20:48   ` Richard Henderson
2021-02-25 15:18 ` [PATCH v2 06/10] target/hexagon: prepare input for the idef-parser Alessandro Di Federico via
2021-02-25 21:34   ` Richard Henderson
2021-03-01 16:26     ` Paolo Montesel via
2021-02-25 15:18 ` [PATCH v2 07/10] target/hexagon: import lexer for idef-parser Alessandro Di Federico via
2021-02-25 22:24   ` Richard Henderson [this message]
2021-02-25 15:18 ` [PATCH v2 08/10] target/hexagon: import parser " Alessandro Di Federico via
2021-02-26  3:30   ` Richard Henderson
2021-03-01 14:50     ` Alessandro Di Federico via
2021-03-23 16:52     ` Paolo Montesel via
2021-02-25 15:18 ` [PATCH v2 09/10] target/hexagon: call idef-parser functions Alessandro Di Federico via
2021-02-26  3:47   ` Richard Henderson
2021-03-01 14:49     ` Alessandro Di Federico via
2021-02-25 15:18 ` [PATCH v2 10/10] target/hexagon: import additional tests Alessandro Di Federico via
2021-02-26  3:52   ` Richard Henderson
2021-02-25 16:03 ` [PATCH v2 00/10] target/hexagon: introduce idef-parser no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3bfa70a4-9eff-9500-185c-831e64c103f6@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=ale.qemu@rev.ng \
    --cc=ale@rev.ng \
    --cc=babush@rev.ng \
    --cc=bcain@quicinc.com \
    --cc=nizzo@rev.ng \
    --cc=philmd@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=tsimpson@quicinc.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).