* [PATCH 1/3] Revert "Still not satisfactory..."
2010-04-22 13:29 [PATCH 0/3] Regexp speedup Zdenek Kabelac
@ 2010-04-22 13:29 ` Zdenek Kabelac
2010-04-22 13:29 ` [PATCH 2/3] Revert "Add a regex optimisation pass for shared character prefixes." Zdenek Kabelac
2010-04-22 13:29 ` [PATCH 3/3] New regexp code made by EJT Zdenek Kabelac
2 siblings, 0 replies; 4+ messages in thread
From: Zdenek Kabelac @ 2010-04-22 13:29 UTC (permalink / raw)
To: lvm-devel
This reverts commit 4c952f0af4d742492cdf4f026072d597ff24919a.
Signed-off-by: Zdenek Kabelac <zkabelac@redhat.com>
---
libdm/regex/parse_rx.c | 69 +++++++++++++----------------------------------
1 files changed, 19 insertions(+), 50 deletions(-)
diff --git a/libdm/regex/parse_rx.c b/libdm/regex/parse_rx.c
index fb9406d..fb8b886 100644
--- a/libdm/regex/parse_rx.c
+++ b/libdm/regex/parse_rx.c
@@ -331,19 +331,17 @@ static struct rx_node *_or_term(struct parse_sp *ps)
/*----------------------------------------------------------------*/
-#define L_OR_R(a) (leftmost ? (a)->left : (a)->right)
-
/*
* The optimiser spots common prefixes on either side of an 'or' node, and
* lifts them outside the 'or' with a 'cat'.
*/
-static unsigned _depth(struct rx_node *r, unsigned leftmost)
+static unsigned _leftmost_depth(struct rx_node *r)
{
int count = 1;
while (r->type != CHARSET) {
count++;
- r = L_OR_R(r);
+ r = r->left;
}
return count;
@@ -380,69 +378,38 @@ static int _nodes_equal(struct rx_node *l, struct rx_node *r)
static int _find_leftmost_common(struct rx_node *or,
struct rx_node **l,
- struct rx_node **r,
- unsigned leftmost)
+ struct rx_node **r)
{
struct rx_node *left = or->left, *right = or->right;
- unsigned left_depth = _depth(left, leftmost);
- unsigned right_depth = _depth(right, leftmost);
+ unsigned left_depth = _leftmost_depth(left);
+ unsigned right_depth = _leftmost_depth(right);
while (left_depth > right_depth) {
- left = L_OR_R(left);
+ left = left->left;
left_depth--;
}
while (right_depth > left_depth) {
- right = L_OR_R(right);
+ right = right->left;
right_depth--;
}
while (left_depth) {
if (left->type == CAT && right->type == CAT) {
- if (_nodes_equal(L_OR_R(left), L_OR_R(right))) {
+ if (_nodes_equal(left->left, right->left)) {
*l = left;
*r = right;
return 1;
}
}
- left = L_OR_R(left);
- right = L_OR_R(right);
+ left = left->left;
+ right = right->left;
left_depth--;
}
return 0;
}
-static struct rx_node *_exchange_nodes(struct dm_pool *mem, struct rx_node *r, struct rx_node *left_cat, struct rx_node *right_cat, unsigned leftmost)
-{
- struct rx_node *new_r, *new_cat;
-
- /* FIXME Clean up leftmost */
- if (leftmost) {
- new_r = r->right;
- new_cat = _node(mem, CAT, left_cat->left, r);
- if (!new_cat)
- return_NULL;
- new_r->left = new_cat;
-
- memcpy(left_cat, left_cat->right, sizeof(*left_cat));
- r->right = right_cat->right;
-
- /* FIXME Avoid this */
- if (new_r->right == right_cat->right)
- new_r = new_r->left;
- } else {
- new_r = _node(mem, CAT, r, right_cat->right);
- if (!new_r)
- return_NULL;
-
- memcpy(left_cat, left_cat->left, sizeof(*left_cat));
- memcpy(right_cat, right_cat->left, sizeof(*right_cat));
- }
-
- return new_r;
-}
-
static struct rx_node *_pass(struct dm_pool *mem,
struct rx_node *r,
int *changed)
@@ -478,12 +445,16 @@ static struct rx_node *_pass(struct dm_pool *mem,
{
struct rx_node *left, *right;
- if (_find_leftmost_common(r, &left, &right, 1)) {
- r = _exchange_nodes(mem, r, left, right, 1);
+ if (_find_leftmost_common(r, &left, &right)) {
+ struct rx_node *new_r = _node(mem, CAT, left->left, r);
- *changed = 1;
- } else if (_find_leftmost_common(r, &left, &right, 0)) {
- r = _exchange_nodes(mem, r, left, right, 0);
+ if (!new_r)
+ return_NULL;
+
+ memcpy(left, left->right, sizeof(*left));
+ memcpy(right, right->right, sizeof(*right));
+
+ r = new_r;
*changed = 1;
}
@@ -502,8 +473,6 @@ static struct rx_node *_optimise(struct dm_pool *mem, struct rx_node *r)
/*
* We're looking for (or (... (cat <foo> a)) (... (cat <foo> b)))
* and want to turn it into (cat <foo> (or (... a) (... b)))
- *
- * (fa)|(fb) becomes f(a|b)
*/
/*
--
1.7.0.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH 2/3] Revert "Add a regex optimisation pass for shared character prefixes."
2010-04-22 13:29 [PATCH 0/3] Regexp speedup Zdenek Kabelac
2010-04-22 13:29 ` [PATCH 1/3] Revert "Still not satisfactory..." Zdenek Kabelac
@ 2010-04-22 13:29 ` Zdenek Kabelac
2010-04-22 13:29 ` [PATCH 3/3] New regexp code made by EJT Zdenek Kabelac
2 siblings, 0 replies; 4+ messages in thread
From: Zdenek Kabelac @ 2010-04-22 13:29 UTC (permalink / raw)
To: lvm-devel
This reverts commit 23dd4773d4decbe564090430e8d82eea1198344c.
Signed-off-by: Zdenek Kabelac <zkabelac@redhat.com>
---
WHATS_NEW_DM | 1 -
libdm/regex/parse_rx.c | 180 ++----------------------------------------------
2 files changed, 5 insertions(+), 176 deletions(-)
diff --git a/WHATS_NEW_DM b/WHATS_NEW_DM
index c96260c..60725a8 100644
--- a/WHATS_NEW_DM
+++ b/WHATS_NEW_DM
@@ -1,6 +1,5 @@
Version 1.02.47 -
=================================
- Add a regex optimisation pass for shared character prefixes.
Add dm_bit_and and dm_bitset_equal to libdevmapper.
Simplify dm_bitset_create.
Speed up dm_bit_get_next with ffs().
diff --git a/libdm/regex/parse_rx.c b/libdm/regex/parse_rx.c
index fb8b886..fdea1a3 100644
--- a/libdm/regex/parse_rx.c
+++ b/libdm/regex/parse_rx.c
@@ -329,182 +329,19 @@ static struct rx_node *_or_term(struct parse_sp *ps)
return n;
}
-/*----------------------------------------------------------------*/
-
-/*
- * The optimiser spots common prefixes on either side of an 'or' node, and
- * lifts them outside the 'or' with a 'cat'.
- */
-static unsigned _leftmost_depth(struct rx_node *r)
-{
- int count = 1;
-
- while (r->type != CHARSET) {
- count++;
- r = r->left;
- }
-
- return count;
-}
-
-/*
- * FIXME: a unique key could be built up as part of the parse, to make the
- * comparison quick. Alternatively we could use cons-hashing, and then
- * this would simply be a pointer comparison.
- */
-static int _nodes_equal(struct rx_node *l, struct rx_node *r)
-{
- if (l->type != r->type)
- return 0;
-
- switch (l->type) {
- case CAT:
- case OR:
- return _nodes_equal(l->left, r->left) &&
- _nodes_equal(l->right, r->right);
-
- case STAR:
- case PLUS:
- case QUEST:
- return _nodes_equal(l->left, r->left);
-
- case CHARSET:
- return dm_bitset_equal(l->charset, r->charset);
- }
-
- /* NOTREACHED */
- return_0;
-}
-
-static int _find_leftmost_common(struct rx_node *or,
- struct rx_node **l,
- struct rx_node **r)
-{
- struct rx_node *left = or->left, *right = or->right;
- unsigned left_depth = _leftmost_depth(left);
- unsigned right_depth = _leftmost_depth(right);
-
- while (left_depth > right_depth) {
- left = left->left;
- left_depth--;
- }
-
- while (right_depth > left_depth) {
- right = right->left;
- right_depth--;
- }
-
- while (left_depth) {
- if (left->type == CAT && right->type == CAT) {
- if (_nodes_equal(left->left, right->left)) {
- *l = left;
- *r = right;
- return 1;
- }
- }
- left = left->left;
- right = right->left;
- left_depth--;
- }
-
- return 0;
-}
-
-static struct rx_node *_pass(struct dm_pool *mem,
- struct rx_node *r,
- int *changed)
-{
- /*
- * walk the tree, optimising every 'or' node.
- */
- switch (r->type) {
- case CAT:
- if (!(r->left = _pass(mem, r->left, changed)))
- return_NULL;
-
- if (!(r->right = _pass(mem, r->right, changed)))
- return_NULL;
-
- break;
-
- case STAR:
- case PLUS:
- case QUEST:
- if (!(r->left = _pass(mem, r->left, changed)))
- return_NULL;
- break;
-
- case OR:
- /* It's important we optimise sub nodes first */
- if (!(r->left = _pass(mem, r->left, changed)))
- return_NULL;
-
- if (!(r->right = _pass(mem, r->right, changed)))
- return_NULL;
-
- {
- struct rx_node *left, *right;
-
- if (_find_leftmost_common(r, &left, &right)) {
- struct rx_node *new_r = _node(mem, CAT, left->left, r);
-
- if (!new_r)
- return_NULL;
-
- memcpy(left, left->right, sizeof(*left));
- memcpy(right, right->right, sizeof(*right));
-
- r = new_r;
-
- *changed = 1;
- }
- }
- break;
-
- case CHARSET:
- break;
- }
-
- return r;
-}
-
-static struct rx_node *_optimise(struct dm_pool *mem, struct rx_node *r)
-{
- /*
- * We're looking for (or (... (cat <foo> a)) (... (cat <foo> b)))
- * and want to turn it into (cat <foo> (or (... a) (... b)))
- */
-
- /*
- * Initially done as an inefficient multipass algorithm.
- */
- int changed;
-
- do {
- changed = 0;
- r = _pass(mem, r, &changed);
- } while (r && changed);
-
- return r;
-}
-
-/*----------------------------------------------------------------*/
-
struct rx_node *rx_parse_tok(struct dm_pool *mem,
const char *begin, const char *end)
{
struct rx_node *r;
struct parse_sp *ps = dm_pool_zalloc(mem, sizeof(*ps));
- if (!ps)
- return_NULL;
-
- ps->mem = mem;
- if (!(ps->charset = dm_bitset_create(mem, 256))) {
- log_error("Regex charset allocation failed");
- dm_pool_free(mem, ps);
+ if (!ps) {
+ stack;
return NULL;
}
+
+ ps->mem = mem;
+ ps->charset = dm_bitset_create(mem, 256);
ps->cursor = begin;
ps->rx_end = end;
_rx_get_token(ps); /* load the first token */
@@ -512,13 +349,6 @@ struct rx_node *rx_parse_tok(struct dm_pool *mem,
if (!(r = _or_term(ps))) {
log_error("Parse error in regex");
dm_pool_free(mem, ps);
- return NULL;
- }
-
- if (!(r = _optimise(mem, r))) {
- log_error("Regex optimisation error");
- dm_pool_free(mem, ps);
- return NULL;
}
return r;
--
1.7.0.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH 3/3] New regexp code made by EJT.
2010-04-22 13:29 [PATCH 0/3] Regexp speedup Zdenek Kabelac
2010-04-22 13:29 ` [PATCH 1/3] Revert "Still not satisfactory..." Zdenek Kabelac
2010-04-22 13:29 ` [PATCH 2/3] Revert "Add a regex optimisation pass for shared character prefixes." Zdenek Kabelac
@ 2010-04-22 13:29 ` Zdenek Kabelac
2 siblings, 0 replies; 4+ messages in thread
From: Zdenek Kabelac @ 2010-04-22 13:29 UTC (permalink / raw)
To: lvm-devel
It remove 'scratch' buffer usage and avoids expression optimization.
It creates regexp tree differently (and faster) then current algorihtm.
So far not tested deeply, but survived few things.
Signed-off-by: Zdenek Kabelac <zkabelac@redhat.com>
---
libdm/regex/matcher.c | 265 +++++++++++++++++++++++++++---------------------
libdm/regex/parse_rx.c | 4 +-
libdm/regex/parse_rx.h | 1 +
3 files changed, 154 insertions(+), 116 deletions(-)
diff --git a/libdm/regex/matcher.c b/libdm/regex/matcher.c
index d9c2d8a..19f4043 100644
--- a/libdm/regex/matcher.c
+++ b/libdm/regex/matcher.c
@@ -1,6 +1,6 @@
/*
- * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
*
* This file is part of the device-mapper userspace tools.
*
@@ -19,22 +19,28 @@
#include "assert.h"
struct dfa_state {
+ struct dfa_state *next;
int final;
- struct dfa_state *lookup[256];
-};
-
-struct state_queue {
- struct dfa_state *s;
dm_bitset_t bits;
- struct state_queue *next;
+ struct dfa_state *lookup[256];
};
struct dm_regex { /* Instance variables for the lexer */
struct dfa_state *start;
unsigned num_nodes;
+ unsigned num_charsets;
int nodes_entered;
struct rx_node **nodes;
+ int charsets_entered;
+ struct rx_node **charsets;
struct dm_pool *scratch, *mem;
+
+ /* stuff for on the fly dfa calculation */
+ dm_bitset_t charmap[256];
+ dm_bitset_t dfa_copy;
+ struct ttree *tt;
+ dm_bitset_t bs;
+ struct dfa_state *h, *t;
};
#define TARGET_TRANS '\0'
@@ -52,6 +58,33 @@ static int _count_nodes(struct rx_node *rx)
return r;
}
+static unsigned _count_charsets(struct rx_node *rx)
+{
+ if (rx->type == CHARSET)
+ return 1;
+
+ return (rx->left ? _count_charsets(rx->left) : 0) +
+ (rx->right ? _count_charsets(rx->right) : 0);
+}
+
+static void _enumerate_charsets_internal(struct rx_node *rx, unsigned *i)
+{
+ if (rx->type == CHARSET)
+ rx->charset_index = (*i)++;
+ else {
+ if (rx->left)
+ _enumerate_charsets_internal(rx->left, i);
+ if (rx->right)
+ _enumerate_charsets_internal(rx->right, i);
+ }
+}
+
+static void _enumerate_charsets(struct rx_node *rx)
+{
+ unsigned i = 0;
+ _enumerate_charsets_internal(rx, &i);
+}
+
static void _fill_table(struct dm_regex *m, struct rx_node *rx)
{
assert((rx->type != OR) || (rx->left && rx->right));
@@ -63,6 +96,8 @@ static void _fill_table(struct dm_regex *m, struct rx_node *rx)
_fill_table(m, rx->right);
m->nodes[m->nodes_entered++] = rx;
+ if (rx->type == CHARSET)
+ m->charsets[m->charsets_entered++] = rx;
}
static void _create_bitsets(struct dm_regex *m)
@@ -71,9 +106,9 @@ static void _create_bitsets(struct dm_regex *m)
for (i = 0; i < m->num_nodes; i++) {
struct rx_node *n = m->nodes[i];
- n->firstpos = dm_bitset_create(m->scratch, m->num_nodes);
- n->lastpos = dm_bitset_create(m->scratch, m->num_nodes);
- n->followpos = dm_bitset_create(m->scratch, m->num_nodes);
+ n->firstpos = dm_bitset_create(m->scratch, m->num_charsets);
+ n->lastpos = dm_bitset_create(m->scratch, m->num_charsets);
+ n->followpos = dm_bitset_create(m->scratch, m->num_charsets);
}
}
@@ -87,7 +122,7 @@ static void _calc_functions(struct dm_regex *m)
c1 = rx->left;
c2 = rx->right;
- if (dm_bit(rx->charset, TARGET_TRANS))
+ if (rx->type == CHARSET && dm_bit(rx->charset, TARGET_TRANS))
rx->final = final++;
switch (rx->type) {
@@ -127,8 +162,8 @@ static void _calc_functions(struct dm_regex *m)
break;
case CHARSET:
- dm_bit_set(rx->firstpos, i);
- dm_bit_set(rx->lastpos, i);
+ dm_bit_set(rx->firstpos, rx->charset_index);
+ dm_bit_set(rx->lastpos, rx->charset_index);
rx->nullable = 0;
break;
@@ -143,23 +178,21 @@ static void _calc_functions(struct dm_regex *m)
*/
switch (rx->type) {
case CAT:
- for (j = 0; j < m->num_nodes; j++) {
- if (dm_bit(c1->lastpos, j)) {
- struct rx_node *n = m->nodes[j];
+ for (j = 0; j < m->num_charsets; j++) {
+ struct rx_node *n = m->charsets[j];
+ if (dm_bit(c1->lastpos, j))
dm_bit_union(n->followpos,
- n->followpos, c2->firstpos);
- }
+ n->followpos, c2->firstpos);
}
break;
case PLUS:
case STAR:
- for (j = 0; j < m->num_nodes; j++) {
- if (dm_bit(rx->lastpos, j)) {
- struct rx_node *n = m->nodes[j];
+ for (j = 0; j < m->num_charsets; j++) {
+ struct rx_node *n = m->charsets[j];
+ if (dm_bit(rx->lastpos, j))
dm_bit_union(n->followpos,
- n->followpos, rx->firstpos);
- }
+ n->followpos, rx->firstpos);
}
break;
}
@@ -171,95 +204,91 @@ static struct dfa_state *_create_dfa_state(struct dm_pool *mem)
return dm_pool_zalloc(mem, sizeof(struct dfa_state));
}
-static struct state_queue *_create_state_queue(struct dm_pool *mem,
- struct dfa_state *dfa,
- dm_bitset_t bits)
+static struct dfa_state *_create_state_queue(struct dm_pool *mem,
+ struct dfa_state *dfa,
+ dm_bitset_t bits)
{
- struct state_queue *r = dm_pool_alloc(mem, sizeof(*r));
+ dfa->bits = dm_bitset_create(mem, bits[0]); /* first element is the size */
+ dm_bit_copy(dfa->bits, bits);
+ dfa->next = 0;
+ dfa->final = -1;
+ return dfa;
+}
- if (!r) {
- stack;
- return NULL;
+static void _calc_state(struct dm_regex *m, struct dfa_state *dfa, int a)
+{
+ int set_bits = 0, i;
+ struct dfa_state *tmp, *ldfa;
+ dm_bitset_t dfa_bits = dfa->bits;
+
+ dm_bit_and(m->dfa_copy, m->charmap[a], dfa_bits);
+
+ /* iterate through all the states in firstpos */
+ for (i = dm_bit_get_first(m->dfa_copy); i >= 0; i = dm_bit_get_next(m->dfa_copy, i)) {
+ if (a == TARGET_TRANS)
+ dfa->final = m->charsets[i]->final;
+
+ dm_bit_union(m->bs, m->bs, m->charsets[i]->followpos);
+ set_bits = 1;
}
- r->s = dfa;
- r->bits = dm_bitset_create(mem, bits[0]); /* first element is the size */
- dm_bit_copy(r->bits, bits);
- r->next = 0;
- return r;
+ if (set_bits) {
+ ldfa = ttree_lookup(m->tt, m->bs + 1);
+ if (!ldfa) {
+ /* push */
+ ldfa = _create_dfa_state(m->mem);
+ ttree_insert(m->tt, m->bs + 1, ldfa);
+ tmp = _create_state_queue(m->scratch, ldfa, m->bs);
+ if (!m->h)
+ m->h = m->t = tmp;
+ else {
+ m->t->next = tmp;
+ m->t = tmp;
+ }
+ }
+
+ dfa->lookup[a] = ldfa;
+ dm_bit_clear_all(m->bs);
+ }
}
static int _calc_states(struct dm_regex *m, struct rx_node *rx)
{
- unsigned iwidth = (m->num_nodes / DM_BITS_PER_INT) + 1;
- struct ttree *tt = ttree_create(m->scratch, iwidth);
- struct state_queue *h, *t, *tmp;
- struct dfa_state *dfa, *ldfa;
- int i, a, set_bits = 0, count = 0;
- dm_bitset_t bs, dfa_bits;
-
- if (!tt)
+ unsigned iwidth = (m->num_charsets / DM_BITS_PER_INT) + 1;
+ struct dfa_state *dfa;
+ int i, a;
+
+ m->tt = ttree_create(m->scratch, iwidth);
+ if (!m->tt)
return_0;
- if (!(bs = dm_bitset_create(m->scratch, m->num_nodes)))
+ if (!(m->bs = dm_bitset_create(m->scratch, m->num_charsets)))
return_0;
+ /* build some char maps */
+ for (a = 0; a < 256; a++) {
+ m->charmap[a] = dm_bitset_create(m->scratch, m->num_charsets);
+ if (!m->charmap[a])
+ return_0;
+ }
+
+ for (i = 0; i < m->num_nodes; i++) {
+ struct rx_node *n = m->nodes[i];
+ if (n->type == CHARSET) {
+ for (a = dm_bit_get_first(n->charset);
+ a >= 0; a = dm_bit_get_next(n->charset, a))
+ dm_bit_set(m->charmap[a], n->charset_index);
+ }
+ }
+
/* create first state */
dfa = _create_dfa_state(m->mem);
m->start = dfa;
- ttree_insert(tt, rx->firstpos + 1, dfa);
+ ttree_insert(m->tt, rx->firstpos + 1, dfa);
/* prime the queue */
- h = t = _create_state_queue(m->scratch, dfa, rx->firstpos);
- while (h) {
- /* pop state off front of the queue */
- dfa = h->s;
- dfa_bits = h->bits;
- h = h->next;
-
- /* iterate through all the inputs for this state */
- dm_bit_clear_all(bs);
- for (a = 0; a < 256; a++) {
- /* iterate through all the states in firstpos */
- for (i = dm_bit_get_first(dfa_bits);
- i >= 0; i = dm_bit_get_next(dfa_bits, i)) {
- if (dm_bit(m->nodes[i]->charset, a)) {
- if (a == TARGET_TRANS)
- dfa->final = m->nodes[i]->final;
-
- dm_bit_union(bs, bs,
- m->nodes[i]->followpos);
- set_bits = 1;
- }
- }
-
- if (set_bits) {
- ldfa = ttree_lookup(tt, bs + 1);
- if (!ldfa) {
- /* push */
- ldfa = _create_dfa_state(m->mem);
- ttree_insert(tt, bs + 1, ldfa);
- tmp =
- _create_state_queue(m->scratch,
- ldfa, bs);
- if (!h)
- h = t = tmp;
- else {
- t->next = tmp;
- t = tmp;
- }
-
- count++;
- }
-
- dfa->lookup[a] = ldfa;
- set_bits = 0;
- dm_bit_clear_all(bs);
- }
- }
- }
-
- log_debug("Matcher built with %d dfa states", count);
+ m->h = m->t = _create_state_queue(m->scratch, dfa, rx->firstpos);
+ m->dfa_copy = dm_bitset_create(m->scratch, m->num_charsets);
return 1;
}
@@ -270,16 +299,11 @@ struct dm_regex *dm_regex_create(struct dm_pool *mem, const char **patterns,
int i;
size_t len = 0;
struct rx_node *rx;
- struct dm_pool *scratch = dm_pool_create("regex matcher", 10 * 1024);
struct dm_regex *m;
+ struct dm_pool *scratch = mem;
- if (!scratch)
- return_NULL;
-
- if (!(m = dm_pool_alloc(mem, sizeof(*m)))) {
- dm_pool_destroy(scratch);
+ if (!(m = dm_pool_alloc(mem, sizeof(*m))))
return_NULL;
- }
memset(m, 0, sizeof(*m));
@@ -307,35 +331,47 @@ struct dm_regex *dm_regex_create(struct dm_pool *mem, const char **patterns,
m->mem = mem;
m->scratch = scratch;
m->num_nodes = _count_nodes(rx);
+ m->num_charsets = _count_charsets(rx);
+ _enumerate_charsets(rx);
m->nodes = dm_pool_alloc(scratch, sizeof(*m->nodes) * m->num_nodes);
if (!m->nodes)
goto_bad;
+ m->charsets = dm_pool_alloc(scratch, sizeof(*m->charsets) * m->num_charsets);
+ if (!m->charsets)
+ goto_bad;
+
_fill_table(m, rx);
_create_bitsets(m);
_calc_functions(m);
_calc_states(m, rx);
- dm_pool_destroy(scratch);
- m->scratch = NULL;
return m;
bad:
- dm_pool_destroy(scratch);
dm_pool_free(mem, m);
return NULL;
}
-static struct dfa_state *_step_matcher(int c, struct dfa_state *cs, int *r)
+static struct dfa_state *_step_matcher(struct dm_regex *m, int c, struct dfa_state *cs, int *r)
{
- if (!(cs = cs->lookup[(unsigned char) c]))
+ struct dfa_state *ns;
+
+ if (!(ns = cs->lookup[(unsigned char) c]))
+ _calc_state(m, cs, c);
+
+ if (!(ns = cs->lookup[(unsigned char) c]))
return NULL;
- if (cs->final && (cs->final > *r))
- *r = cs->final;
+ // Yuck, we have to special case the target trans
+ if (ns->final == -1)
+ _calc_state(m, ns, TARGET_TRANS);
+
+ if (ns->final && (ns->final > *r))
+ *r = ns->final;
- return cs;
+ return ns;
}
int dm_regex_match(struct dm_regex *regex, const char *s)
@@ -343,14 +379,15 @@ int dm_regex_match(struct dm_regex *regex, const char *s)
struct dfa_state *cs = regex->start;
int r = 0;
- if (!(cs = _step_matcher(HAT_CHAR, cs, &r)))
+ dm_bit_clear_all(regex->bs);
+ if (!(cs = _step_matcher(regex, HAT_CHAR, cs, &r)))
goto out;
for (; *s; s++)
- if (!(cs = _step_matcher(*s, cs, &r)))
+ if (!(cs = _step_matcher(regex, *s, cs, &r)))
goto out;
- _step_matcher(DOLLAR_CHAR, cs, &r);
+ _step_matcher(regex, DOLLAR_CHAR, cs, &r);
out:
/* subtract 1 to get back to zero index */
diff --git a/libdm/regex/parse_rx.c b/libdm/regex/parse_rx.c
index fdea1a3..f9e36ad 100644
--- a/libdm/regex/parse_rx.c
+++ b/libdm/regex/parse_rx.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
*
* This file is part of the device-mapper userspace tools.
@@ -205,7 +205,7 @@ static struct rx_node *_node(struct dm_pool *mem, int type,
struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n));
if (n) {
- if (!(n->charset = dm_bitset_create(mem, 256))) {
+ if (type == CHARSET && !(n->charset = dm_bitset_create(mem, 256))) {
dm_pool_free(mem, n);
return NULL;
}
diff --git a/libdm/regex/parse_rx.h b/libdm/regex/parse_rx.h
index 1c2393f..a81b774 100644
--- a/libdm/regex/parse_rx.h
+++ b/libdm/regex/parse_rx.h
@@ -39,6 +39,7 @@ struct rx_node {
struct rx_node *left, *right;
/* used to build the dfa for the toker */
+ unsigned charset_index;
int nullable, final;
dm_bitset_t firstpos;
dm_bitset_t lastpos;
--
1.7.0.1
^ permalink raw reply related [flat|nested] 4+ messages in thread