All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Palethorpe <rpalethorpe@suse.de>
To: ltp@lists.linux.it
Subject: [LTP] [PATCH v2 1/2] lib: Add generic boolean expression parser and eval
Date: Tue, 27 Oct 2020 15:33:13 +0000	[thread overview]
Message-ID: <874kmfvg3q.fsf@suse.de> (raw)
In-Reply-To: <20201027110446.20416-2-chrubis@suse.cz>


Cyril Hrubis <chrubis@suse.cz> writes:

> Add a simple and generic boolean expression parser and evaluator. This
> is a second step in order to implement more complex kconfig expressions,
> but since we are likely to reuse the parser for other purposes in the
> future it's implemented as a generic boolean parser.
>
> Signed-off-by: Cyril Hrubis <chrubis@suse.cz>

Still have some comments below, but am very happy with this:
Reviewed-by: Richard Palethorpe <rpalethorpe@suse.com>

> ---
>
> v2:
>    - use pointer to the original string + lenght instead of allocation
>    - simplified the code a bit
>    - renamed a few fucntions and identifiers to make the code easier to
>      understand
>
>  include/tst_bool_expr.h          |  80 ++++++
>  lib/newlib_tests/.gitignore      |   1 +
>  lib/newlib_tests/tst_bool_expr.c | 128 ++++++++++
>  lib/tst_bool_expr.c              | 425 +++++++++++++++++++++++++++++++
>  4 files changed, 634 insertions(+)
>  create mode 100644 include/tst_bool_expr.h
>  create mode 100644 lib/newlib_tests/tst_bool_expr.c
>  create mode 100644 lib/tst_bool_expr.c
>
> diff --git a/include/tst_bool_expr.h b/include/tst_bool_expr.h
> new file mode 100644
> index 000000000..cafad3b00
> --- /dev/null
> +++ b/include/tst_bool_expr.h
> @@ -0,0 +1,80 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (c) 2020 Cyril Hrubis <chrubis@suse.cz>
> + */
> +
> +#ifndef TST_BOOL_EXPR_H__
> +#define TST_BOOL_EXPR_H__
> +
> +enum tst_op {
> +	TST_OP_NOT,
> +	TST_OP_AND,
> +	TST_OP_OR,
> +	TST_OP_VAR,
> +	/* Used only internally */
> +	TST_OP_LPAR,
> +	TST_OP_RPAR,
> +};
> +
> +struct tst_expr {
> +	enum tst_op op;
> +	const char *tok;
> +	size_t tok_len;
> +	struct tst_expr *next;
> +	const void *priv;
> +};
> +
> +/*
> + * Parses an boolean expression and returns a simplified RPN version.
> + *
> + * If expression is not valid the call prints error into stderr and returns
> + * NULL. On success pointer to an expression is returned which can be evaluated
> + * by the tst_bool_expr_eval() function and has to be later freed by the
> + * caller.
> + *
> + * The boolean expression can consists of:
> + *
> + * - unary negation opeartion !
> + * - two binary operations & and |
> + * - correct sequence of parentheses ()
> + * - strings that are treated as boolean variables
> + *
> + *  e.g. '(A | B) & C' or 'Variable_1 & Variable_2' are both a valid boolean
> + *  expressions.
> + *
> + *  @expr String containing a boolean expression to be parsed.
> + *  @return Pointer to an RPN expression.
> + */
> +struct tst_expr *tst_bool_expr_parse(const char *expr);
> +
> +/*
> + * Prints an string representation of the expression into a FILE.
> + *
> + * @param A FILE to print to.
> + * @expr An expression to print.
> + */
> +void tst_bool_expr_print(FILE *f, struct tst_expr *expr);
> +
> +/*
> + * Evaluates an expression given a map for variables.
> + *
> + * The call will fail if:
> + * - map function returns -1 which indicates undefined variable
> + * - the eval function runs out of stack
> + *
> + * @param expr Boolean expression in RPN.
> + * @param map Mapping function for boolean variables.
> + *
> + * @return Returns 0 or 1 if expression was evaluated correctly and -1 on error.
> + */
> +int tst_bool_expr_eval(struct tst_expr *expr,
> +                       int (*map)(struct tst_expr *var));
> +
> +/*
> + * Frees the memory allocated by the tst_bool_expr_parse().
> + *
> + * @param Boolean expression.
> + */
> +void tst_bool_expr_free(struct tst_expr *expr);
> +
> +#endif	/* TST_BOOL_EXPR_H__ */
> diff --git a/lib/newlib_tests/.gitignore b/lib/newlib_tests/.gitignore
> index 44bc6526f..1e96db1da 100644
> --- a/lib/newlib_tests/.gitignore
> +++ b/lib/newlib_tests/.gitignore
> @@ -33,3 +33,4 @@ test_exec_child
>  test_kconfig
>  variant
>  test_guarded_buf
> +tst_bool_expr
> diff --git a/lib/newlib_tests/tst_bool_expr.c b/lib/newlib_tests/tst_bool_expr.c
> new file mode 100644
> index 000000000..49948d85f
> --- /dev/null
> +++ b/lib/newlib_tests/tst_bool_expr.c
> @@ -0,0 +1,128 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (c) 2017 Cyril Hrubis <chrubis@suse.cz>
> + */
> +
> +/*
> + * Basic unit test for the bolean expression parser and evaluator.
> + */
> +
> +#include <string.h>
> +#include <stdio.h>
> +#include "tst_test.h"
> +#include "tst_bool_expr.h"
> +
> +static int a, b, c;
> +
> +static int map(struct tst_expr *var)
> +{
> +	if (!strncmp(var->tok, "A", var->tok_len))
> +		return a;
> +
> +	if (!strncmp(var->tok, "B", var->tok_len))
> +		return b;
> +
> +	if (!strncmp(var->tok, "C", var->tok_len))
> +		return c;
> +
> +	if (!strncmp(var->tok, "True", var->tok_len))
> +		return 1;
> +
> +	if (!strncmp(var->tok, "False", var->tok_len))
> +		return 0;
> +
> +	return -1;
> +}
> +
> +static void parse_fail(const char *expr)
> +{
> +	struct tst_expr *res;
> +
> +	tst_res(TINFO, "Parsing '%s'", expr);
> +
> +	res = tst_bool_expr_parse(expr);
> +
> +	if (res) {
> +		printf("In RPN: ");
> +		tst_bool_expr_print(stdout, res);
> +		printf("\n");
> +		tst_bool_expr_free(res);
> +		tst_res(TFAIL, "Expression was parsed");
> +	} else {
> +		tst_res(TPASS, "Parser returned an error");
> +	}
> +}
> +
> +static void do_eval_test(const char *expr_str, int set_a, int set_b, int set_c, int exp_res)
> +{
> +	struct tst_expr *expr;
> +	int res;
> +
> +	a = set_a;
> +	b = set_b;
> +	c = set_c;
> +
> +	tst_res(TINFO, "'%s' A=%i B=%i C=%i == %i", expr_str, a, b, c, exp_res);
> +
> +	expr = tst_bool_expr_parse(expr_str);
> +
> +	if (!expr) {
> +		tst_res(TFAIL, "Parser returned error");
> +		return;
> +	}
> +
> +	printf("In RPN: ");
> +	tst_bool_expr_print(stdout, expr);
> +	printf("\n");
> +
> +	res = tst_bool_expr_eval(expr, map);
> +
> +	if (res == exp_res)
> +		tst_res(TPASS, "Got %i", res);
> +	else
> +		tst_res(TFAIL, "Got %i", res);
> +
> +	tst_bool_expr_free(expr);
> +}
> +
> +static void do_test(void)
> +{
> +	do_eval_test("(A | B) & !!C", 0, 0, 0, 0);
> +	do_eval_test("(A | B) & !!C", 1, 0, 1, 1);
> +	do_eval_test("!A & B", 1, 0, 0, 0);
> +	do_eval_test("!A & B", 0, 1, 0, 1);
> +	do_eval_test("A & !B", 1, 0, 0, 1);
> +	do_eval_test("!!A & !!B", 0, 1, 0, 0);
> +	do_eval_test("!!A & !!B", 1, 1, 0, 1);
> +	do_eval_test("!(A & B) & C", 1, 1, 0, 0);
> +	do_eval_test("A & (B | C)", 1, 1, 0, 1);
> +	do_eval_test("A & B | C", 1, 1, 0, 1);
> +	do_eval_test("((((A)))&(B))", 1, 1, 0, 1);
> +	do_eval_test("   A  \t", 0, 0, 0, 0);
> +	do_eval_test("False & A", 1, 0, 0, 0);
> +	do_eval_test("! Undefined", 0, 0, 0, -1);
> +
> +	parse_fail("A!");
> +	parse_fail("A &");
> +	parse_fail("A B");
> +	parse_fail("A ) B");
> +	parse_fail("A ( B");
> +	parse_fail("A ( B )");
> +	parse_fail("A |");
> +	parse_fail("A ! B");
> +	parse_fail("A! & B");
> +	parse_fail("A & | B");
> +	parse_fail("A & (B |)");
> +	parse_fail("A & ( | B)");
> +	parse_fail("A & B &");
> +	parse_fail("((A )");
> +	parse_fail("& A");
> +	parse_fail("! &");
> +	parse_fail(")");
> +	parse_fail("| A");
> +	parse_fail("");
> +}
> +
> +static struct tst_test test = {
> +	.test_all = do_test,
> +};
> diff --git a/lib/tst_bool_expr.c b/lib/tst_bool_expr.c
> new file mode 100644
> index 000000000..fbcc74c82
> --- /dev/null
> +++ b/lib/tst_bool_expr.c
> @@ -0,0 +1,425 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (c) 2020 Cyril Hrubis <chrubis@suse.cz>
> + */
> +/*
> + * Boolean expression is evaluated in three steps.
> + *
> + * First of all the string containing the expression is tokenized.
> + *
> + * Secondly the the expression is transformed to a postfix (RPN) notation by
> + * the shunting yard algorithm and the correctness of the expression is checked
> + * during the transformation as well. The fact that parenthesis are matched is
> + * asserted by the shunting yard algorithm itself while the rest is checked
> + * simply by checking if the preceding token is in a set of allowed tokens.
> + * This could be thought of as a simple open-coded state machine.
> + *
> + * An expression in the RPN form can be evaluated given a variable mapping
> + * function. The evaluation ignores most of errors because invalid expression
> + * will be rejected in the second step.
> + */
> +
> +#include <string.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include "tst_bool_expr.h"
> +
> +struct tok_list {
> +	struct tst_expr *first;
> +	struct tst_expr *last;
> +	unsigned int cnt;
> +};
> +
> +static void tok_list_append(struct tok_list *list, struct tst_expr *val)
> +{
> +	if (!val)
> +		return;
> +
> +	if (!list->first)
> +		list->first = val;
> +
> +	if (list->last)
> +		list->last->next = val;
> +
> +	list->last = val;
> +
> +	list->cnt++;
> +}
> +
> +static int char_to_op(char c)
> +{
> +	switch (c) {
> +	case '(':
> +		return TST_OP_LPAR;
> +	case ')':
> +		return TST_OP_RPAR;
> +	case '&':
> +		return TST_OP_AND;
> +	case '|':
> +		return TST_OP_OR;
> +	case '!':
> +		return TST_OP_NOT;
> +	default:
> +		return TST_OP_VAR;
> +	}
> +}
> +
> +static struct tst_expr *tok_alloc(const char *tok, size_t tok_len)
> +{
> +	struct tst_expr *ret;
> +
> +	if (!tok_len)
> +		return NULL;
> +
> +	ret = malloc(sizeof(struct tst_expr));
> +	if (!ret)
> +		return NULL;
> +
> +	memset(ret, 0, sizeof(*ret));
> +
> +	ret->tok = tok;
> +	ret->tok_len = tok_len;
> +	ret->op = char_to_op(ret->tok[0]);
> +
> +	return ret;
> +}
> +
> +static void tokenize(const char *expr, struct tok_list *ret)
> +{
> +	size_t i, j;
> +
> +	for (j = i = 0; expr[i]; i++) {
> +		switch (expr[i]) {
> +		case '(':
> +		case ')':
> +		case '!':
> +		case '&':
> +		case '|':
> +			tok_list_append(ret, tok_alloc(&expr[j], i - j));
> +			tok_list_append(ret, tok_alloc(&expr[i], 1));
> +			j = i+1;
> +		break;
> +		case '\t':
> +		case ' ':
> +			tok_list_append(ret, tok_alloc(&expr[j], i - j));
> +			j = i+1;
> +		break;
> +		default:
> +		break;
> +		}
> +	}
> +
> +	tok_list_append(ret, tok_alloc(&expr[j], i - j));
> +}
> +
> +void tst_bool_expr_print(FILE *f, struct tst_expr *expr)
> +{
> +	struct tst_expr *i;
> +	size_t j;
> +
> +	for (i = expr; i; i = i->next) {
> +		for (j = 0; j < i->tok_len; j++)
> +			putc(i->tok[j], f);
> +
> +		if (i->next)
> +			putc(' ', f);
> +	}
> +}
> +
> +static void tst_bool_expr_err(FILE *f, struct tst_expr *expr)
> +{
> +	struct tst_expr *i;
> +	unsigned int spaces = 0;
> +
> +	fprintf(f, "%s", expr->tok);
> +	fprintf(f, "\n");
> +
> +	for (i = expr; i; i = i->next) {
> +		if (i->priv)
> +			break;
> +	}
> +
> +	spaces = i->tok - expr->tok;
> +
> +	while (spaces--)
> +		putc(' ', f);
> +
> +	fprintf(f, "^\n");
> +	fprintf(f, "%s\n", (const char *)i->priv);
> +}
> +
> +static inline void stack_push(struct tst_expr *stack[], unsigned int *op_stack_pos,
> +                              struct tst_expr *op)
> +{
> +	stack[(*op_stack_pos)++] = op;
> +}
> +
> +static inline int stack_empty(unsigned int op_stack_pos)
> +{
> +	return op_stack_pos == 0;
> +}
> +
> +static inline struct tst_expr *stack_pop(struct tst_expr *stack[],
> +                                          unsigned int *op_stack_pos)
> +{
> +	if (stack_empty(*op_stack_pos))
> +		return NULL;
> +
> +	return stack[--(*op_stack_pos)];
> +}
> +
> +#define TST_OP_NONE -1
> +
> +static inline int stack_peek_op(struct tst_expr *stack[],
> +                                unsigned int op_stack_pos)
> +{
> +	if (stack_empty(op_stack_pos))
> +		return TST_OP_NONE;
> +
> +	return stack[op_stack_pos - 1]->op;
> +}
> +
> +/*
> + * This is a complete list of which tokens can happen before any of:
> + *  - variable
> + *  - left parentesis
> + *  - negation
> + *
> + * The -1 represents start of the expression.
> + */
> +static inline int check_one(int op)
> +{
> +	switch (op) {
> +	case TST_OP_AND:
> +	case TST_OP_OR:
> +	case TST_OP_NOT:
> +	case TST_OP_NONE:
> +	case TST_OP_LPAR:
> +		return 0;
> +	default:
> +		return 1;
> +	}
> +}
> +
> +/*
> + * And this checks for tokens that can happen before any of:
> + * - right parentesis
> + * - and
> + * - or
> + *
> + * This is also used to check that the last token in expression is correct one.
> + */
> +static inline int check_two(int op)
> +{
> +	switch (op) {
> +	case TST_OP_VAR:
> +	case TST_OP_RPAR:
> +		return 1;
> +	default:
> +		return 0;
> +	}
> +}
> +
> +/*
> + * Note that we avoid modifying the list until the end so that the caller can
> + * free memory correctly, which is the reason we have a stack for
> parentesis

If you use a new vector/stack for the output queue instead of an
intrusive list then this problem should go away. We have an upper bound
for any token/expr vector which is the length of the input string. So it
is possible to preallocate an input array instead of using a dynamically
allocated linked list.

> + * that are just freed at the end.
> + */
> +static struct tst_expr *shunting_yard(struct tok_list *list)
> +{
> +	struct tst_expr *op_stack[list->cnt];
> +	unsigned int op_stack_pos = 0;
> +	struct tst_expr *out[list->cnt + 1];
> +	unsigned int out_pos = 0;
> +	struct tst_expr *pars[list->cnt];
> +	unsigned int pars_pos = 0;
> +	struct tst_expr *i;
> +	unsigned int j;
> +	int prev_op = TST_OP_NONE;
> +
> +	for (i = list->first; i; i = i->next) {
> +		switch (i->op) {
> +		case TST_OP_VAR:
> +			if (check_one(prev_op)) {
> +				i->priv = "Expected operation";
> +				goto err;
> +			}
> +
> +			stack_push(out, &out_pos, i);
> +
> +			while (stack_peek_op(op_stack, op_stack_pos) == TST_OP_NOT)
> +				stack_push(out, &out_pos, stack_pop(op_stack, &op_stack_pos));
> +		break;
> +		case TST_OP_LPAR:
> +			if (check_one(prev_op)) {
> +				i->priv = "Expected operation";
> +				goto err;
> +			}
> +
> +			stack_push(op_stack, &op_stack_pos, i);
> +		break;
> +		case TST_OP_RPAR:
> +			if (!check_two(prev_op)) {
> +				i->priv = "Expected variable or )";
> +				goto err;
> +			}
> +
> +			stack_push(pars, &pars_pos, i);
> +
> +			/* pop everything till ( */
> +			for (;;) {
> +				struct tst_expr *op = stack_pop(op_stack, &op_stack_pos);
> +
> +				if (!op) {
> +					i->priv = "Missing (";
> +					goto err;
> +				}
> +
> +				if (op->op == TST_OP_LPAR) {
> +					stack_push(pars, &pars_pos, op);
> +					break;
> +				}
> +
> +				stack_push(out, &out_pos, op);
> +			}
> +
> +			while (stack_peek_op(op_stack, op_stack_pos) == TST_OP_NOT)
> +				stack_push(out, &out_pos, stack_pop(op_stack, &op_stack_pos));
> +		break;
> +		case TST_OP_NOT:
> +			if (check_one(prev_op)) {
> +				i->priv = "Expected operation";
> +				goto err;
> +			}
> +			stack_push(op_stack, &op_stack_pos, i);
> +		break;
> +		case TST_OP_AND:
> +		case TST_OP_OR:
> +			if (!check_two(prev_op)) {
> +				i->priv = "Expected variable or (";
> +				goto err;
> +			}
> +
> +			/*
> +			 * There can be at most one binary op on the stack
> +			 * since we pop the one present on the stack before we
> +			 * attempt to push new one they never accumulate.
> +			 */
> +			switch (stack_peek_op(op_stack, op_stack_pos)) {
> +			case TST_OP_AND:
> +			case TST_OP_OR:
> +				stack_push(out, &out_pos, stack_pop(op_stack, &op_stack_pos));
> +			break;
> +			}
> +
> +			stack_push(op_stack, &op_stack_pos, i);
> +		break;
> +		}
> +
> +		prev_op = i->op;
> +	}
> +
> +	if (!check_two(list->last->op)) {
> +		list->last->priv = "Unfinished expression";
> +		goto err;
> +	}
> +
> +	/* pop remaining operations */
> +	while ((i = stack_pop(op_stack, &op_stack_pos))) {
> +		if (i->op == TST_OP_LPAR) {
> +			i->priv = "Missing )";
> +			goto err;
> +		}
> +
> +		stack_push(out, &out_pos, i);
> +	}
> +
> +	/* free parentesis */
> +	for (j = 0; j < pars_pos; j++)
> +		free(pars[j]);
> +
> +	/* reconstruct the list */
> +	out[out_pos] = NULL;
> +
> +	for (j = 0; j < out_pos; j++)
> +		out[j]->next = out[j + 1];
> +
> +	return out[0];
> +err:
> +	return NULL;
> +}
> +
> +struct tst_expr *tst_bool_expr_parse(const char *expr)
> +{
> +	struct tok_list list = {};
> +	struct tst_expr *ret;
> +
> +	tokenize(expr, &list);
> +
> +	if (!list.first)
> +		return NULL;
> +
> +	ret = shunting_yard(&list);
> +
> +	if (!ret) {
> +		tst_bool_expr_err(stderr, list.first);
> +		tst_bool_expr_free(list.first);
> +		return NULL;
> +	}
> +
> +	return ret;
> +}
> +
> +#define MAX_STACK 16
> +
> +int tst_bool_expr_eval(struct tst_expr *expr,
> +                       int (*map)(struct tst_expr *var))
> +{
> +	struct tst_expr *i;
> +	int stack[MAX_STACK];
> +	int pos = -1;
> +
> +	for (i = expr; i; i = i->next) {
> +		switch (i->op) {
> +		case TST_OP_NOT:
> +			stack[pos] = !stack[pos];
> +		break;
> +		case TST_OP_OR:
> +			stack[pos-1] = stack[pos] || stack[pos-1];
> +			pos--;
> +		break;
> +		case TST_OP_AND:
> +			stack[pos-1] = stack[pos] && stack[pos-1];
> +			pos--;
> +		break;
> +		case TST_OP_VAR:
> +			if (pos + 1 >= MAX_STACK) {
> +				fprintf(stderr, "Eval out of stack!\n");
> +				return -1;
> +			}
> +
> +			stack[++pos] = map(i);
> +
> +			/* map reported undefined variable -> abort */
> +			if (stack[pos] == -1)
> +				return -1;
> +		break;
> +		/* does not happen */
> +		default:
> +		break;
> +		}
> +	}
> +

We could perform a final check on our logic here by asserting pos == 0.

> +	return stack[0];
> +}
> +
> +void tst_bool_expr_free(struct tst_expr *expr)
> +{
> +	struct tst_expr *i = expr, *tmp;
> +
> +	while (i) {
> +		tmp = i;
> +		i = i->next;
> +		free(tmp);
> +	}
> +}
> -- 
> 2.26.2

-- 
Thank you,
Richard.

  reply	other threads:[~2020-10-27 15:33 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-27 11:04 [LTP] [PATCH v2 0/2] Add support for kconfig constraints Cyril Hrubis
2020-10-27 11:04 ` [LTP] [PATCH v2 1/2] lib: Add generic boolean expression parser and eval Cyril Hrubis
2020-10-27 15:33   ` Richard Palethorpe [this message]
2020-10-27 15:45     ` Cyril Hrubis
2020-10-27 11:04 ` [LTP] [PATCH v2 2/2] lib/tst_kconfig: Make use of boolean expression eval Cyril Hrubis
2020-10-28  3:45   ` Xiao Yang
2020-10-29 10:39     ` Cyril Hrubis
2020-11-02 10:17   ` Li Wang
2020-11-02 10:38     ` Cyril Hrubis

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=874kmfvg3q.fsf@suse.de \
    --to=rpalethorpe@suse.de \
    --cc=ltp@lists.linux.it \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.