* [PATCH 3/8] textsearch: fix Boyer-Moore algorithm for case insensitive searching
@ 2008-06-21 7:54 Joonwoo Park
2008-06-21 8:25 ` Patrick McHardy
0 siblings, 1 reply; 6+ messages in thread
From: Joonwoo Park @ 2008-06-21 7:54 UTC (permalink / raw)
To: Pablo Neira Ayuso, Patrick McHardy; +Cc: netdev, netfilter-devel, Joonwoo Park
Fix Boyer-Moore algorithm to have capability of case insensitive
searching
Signed-off-by: Joonwoo Park <joonwpark81@gmail.com>
---
lib/ts_bm.c | 26 ++++++++++++++++++++------
1 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/lib/ts_bm.c b/lib/ts_bm.c
index 4a7fce7..77749f3 100644
--- a/lib/ts_bm.c
+++ b/lib/ts_bm.c
@@ -39,6 +39,7 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/string.h>
+#include <linux/ctype.h>
#include <linux/textsearch.h>
/* Alphabet size, use ASCII */
@@ -64,6 +65,7 @@ static unsigned int bm_find(struct ts_config *conf, struct ts_state *state)
unsigned int i, text_len, consumed = state->offset;
const u8 *text;
int shift = bm->patlen - 1, bs;
+ const u8 icase = conf->icase;
for (;;) {
text_len = conf->get_next_block(consumed, &text, conf, state);
@@ -75,7 +77,9 @@ static unsigned int bm_find(struct ts_config *conf, struct ts_state *state)
DEBUGP("Searching in position %d (%c)\n",
shift, text[shift]);
for (i = 0; i < bm->patlen; i++)
- if (text[shift-i] != bm->pattern[bm->patlen-1-i])
+ if ((icase ? toupper(text[shift-i])
+ : text[shift-i])
+ != bm->pattern[bm->patlen-1-i])
goto next;
/* London calling... */
@@ -111,14 +115,18 @@ static int subpattern(u8 *pattern, int i, int j, int g)
return ret;
}
-static void compute_prefix_tbl(struct ts_bm *bm)
+static void compute_prefix_tbl(struct ts_bm *bm, u8 icase)
{
int i, j, g;
for (i = 0; i < ASIZE; i++)
bm->bad_shift[i] = bm->patlen;
- for (i = 0; i < bm->patlen - 1; i++)
+ for (i = 0; i < bm->patlen - 1; i++) {
bm->bad_shift[bm->pattern[i]] = bm->patlen - 1 - i;
+ if (icase)
+ bm->bad_shift[tolower(bm->pattern[i])]
+ = bm->patlen - 1 - i;
+ }
/* Compute the good shift array, used to match reocurrences
* of a subpattern */
@@ -135,10 +143,11 @@ static void compute_prefix_tbl(struct ts_bm *bm)
}
static struct ts_config *bm_init(const void *pattern, unsigned int len,
- gfp_t gfp_mask)
+ u8 icase, gfp_t gfp_mask)
{
struct ts_config *conf;
struct ts_bm *bm;
+ int i;
unsigned int prefix_tbl_len = len * sizeof(unsigned int);
size_t priv_size = sizeof(*bm) + len + prefix_tbl_len;
@@ -146,11 +155,16 @@ static struct ts_config *bm_init(const void *pattern, unsigned int len,
if (IS_ERR(conf))
return conf;
+ conf->icase = icase;
bm = ts_config_priv(conf);
bm->patlen = len;
bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len;
- memcpy(bm->pattern, pattern, len);
- compute_prefix_tbl(bm);
+ if (icase)
+ for (i = 0; i < len; i++)
+ bm->pattern[i] = toupper(((u8 *)pattern)[i]);
+ else
+ memcpy(bm->pattern, pattern, len);
+ compute_prefix_tbl(bm, icase);
return conf;
}
--
1.5.4.3
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH 3/8] textsearch: fix Boyer-Moore algorithm for case insensitive searching
2008-06-21 7:54 [PATCH 3/8] textsearch: fix Boyer-Moore algorithm for case insensitive searching Joonwoo Park
@ 2008-06-21 8:25 ` Patrick McHardy
2008-06-21 9:17 ` Joonwoo Park
0 siblings, 1 reply; 6+ messages in thread
From: Patrick McHardy @ 2008-06-21 8:25 UTC (permalink / raw)
To: Joonwoo Park; +Cc: Pablo Neira Ayuso, netdev, netfilter-devel
Joonwoo Park wrote:
> @@ -75,7 +77,9 @@ static unsigned int bm_find(struct ts_config *conf, struct ts_state *state)
> DEBUGP("Searching in position %d (%c)\n",
> shift, text[shift]);
> for (i = 0; i < bm->patlen; i++)
> - if (text[shift-i] != bm->pattern[bm->patlen-1-i])
> + if ((icase ? toupper(text[shift-i])
> + : text[shift-i])
> + != bm->pattern[bm->patlen-1-i])
> goto next;
>
> /* London calling... */
> @@ -111,14 +115,18 @@ static int subpattern(u8 *pattern, int i, int j, int g)
> return ret;
> }
>
> -static void compute_prefix_tbl(struct ts_bm *bm)
> +static void compute_prefix_tbl(struct ts_bm *bm, u8 icase)
> {
> int i, j, g;
>
> for (i = 0; i < ASIZE; i++)
> bm->bad_shift[i] = bm->patlen;
> - for (i = 0; i < bm->patlen - 1; i++)
> + for (i = 0; i < bm->patlen - 1; i++) {
> bm->bad_shift[bm->pattern[i]] = bm->patlen - 1 - i;
> + if (icase)
> + bm->bad_shift[tolower(bm->pattern[i])]
> + = bm->patlen - 1 - i;
> + }
You use toupper() above and tolower() here, is that correct?
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH 3/8] textsearch: fix Boyer-Moore algorithm for case insensitive searching
2008-06-21 8:25 ` Patrick McHardy
@ 2008-06-21 9:17 ` Joonwoo Park
2008-06-23 11:33 ` Patrick McHardy
0 siblings, 1 reply; 6+ messages in thread
From: Joonwoo Park @ 2008-06-21 9:17 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Pablo Neira Ayuso, netdev, netfilter-devel
2008/6/21 Patrick McHardy <kaber@trash.net>:
> Joonwoo Park wrote:
>>
>> @@ -75,7 +77,9 @@ static unsigned int bm_find(struct ts_config *conf,
>> struct ts_state *state)
>> DEBUGP("Searching in position %d (%c)\n",
>> shift, text[shift]);
>> for (i = 0; i < bm->patlen; i++) -
>> if (text[shift-i] != bm->pattern[bm->patlen-1-i])
>> + if ((icase ? toupper(text[shift-i])
>> + : text[shift-i])
>> + != bm->pattern[bm->patlen-1-i])
>> goto next;
>> /* London calling... */
>> @@ -111,14 +115,18 @@ static int subpattern(u8 *pattern, int i, int j, int
>> g)
>> return ret;
>> }
>> -static void compute_prefix_tbl(struct ts_bm *bm)
>> +static void compute_prefix_tbl(struct ts_bm *bm, u8 icase)
>> {
>> int i, j, g;
>> for (i = 0; i < ASIZE; i++)
>> bm->bad_shift[i] = bm->patlen;
>> - for (i = 0; i < bm->patlen - 1; i++)
>> + for (i = 0; i < bm->patlen - 1; i++) {
>> bm->bad_shift[bm->pattern[i]] = bm->patlen - 1 - i;
>> + if (icase)
>> + bm->bad_shift[tolower(bm->pattern[i])]
>> + = bm->patlen - 1 - i;
>> + }
>
> You use toupper() above and tolower() here, is that correct?
>
It should be, bm->pattern's characters are all upper case since It was
altered before.
So we should use toupper() for target string character to compare.
And to ignore case, we should prepare one more bad_shift array
calculation, opposite tolower() should be used for it.
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH 3/8] textsearch: fix Boyer-Moore algorithm for case insensitive searching
2008-06-21 9:17 ` Joonwoo Park
@ 2008-06-23 11:33 ` Patrick McHardy
2008-06-23 12:27 ` Pablo Neira Ayuso
0 siblings, 1 reply; 6+ messages in thread
From: Patrick McHardy @ 2008-06-23 11:33 UTC (permalink / raw)
To: Joonwoo Park; +Cc: Pablo Neira Ayuso, netdev, netfilter-devel, Thomas Graf
Joonwoo Park wrote:
> 2008/6/21 Patrick McHardy <kaber@trash.net>:
>>> +static void compute_prefix_tbl(struct ts_bm *bm, u8 icase)
>>> {
>>> int i, j, g;
>>> for (i = 0; i < ASIZE; i++)
>>> bm->bad_shift[i] = bm->patlen;
>>> - for (i = 0; i < bm->patlen - 1; i++)
>>> + for (i = 0; i < bm->patlen - 1; i++) {
>>> bm->bad_shift[bm->pattern[i]] = bm->patlen - 1 - i;
>>> + if (icase)
>>> + bm->bad_shift[tolower(bm->pattern[i])]
>>> + = bm->patlen - 1 - i;
>>> + }
>> You use toupper() above and tolower() here, is that correct?
>>
>
> It should be, bm->pattern's characters are all upper case since It was
> altered before.
> So we should use toupper() for target string character to compare.
> And to ignore case, we should prepare one more bad_shift array
> calculation, opposite tolower() should be used for it.
OK thanks. I'll wait for an ACK from Thomas before applying
them though.
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH 3/8] textsearch: fix Boyer-Moore algorithm for case insensitive searching
2008-06-23 11:33 ` Patrick McHardy
@ 2008-06-23 12:27 ` Pablo Neira Ayuso
2008-06-23 20:41 ` Joonwoo Park
0 siblings, 1 reply; 6+ messages in thread
From: Pablo Neira Ayuso @ 2008-06-23 12:27 UTC (permalink / raw)
To: Joonwoo Park; +Cc: Patrick McHardy, netdev, netfilter-devel, Thomas Graf
Patrick McHardy wrote:
> Joonwoo Park wrote:
>> 2008/6/21 Patrick McHardy <kaber@trash.net>:
>>>> +static void compute_prefix_tbl(struct ts_bm *bm, u8 icase)
>>>> {
>>>> int i, j, g;
>>>> for (i = 0; i < ASIZE; i++)
>>>> bm->bad_shift[i] = bm->patlen;
>>>> - for (i = 0; i < bm->patlen - 1; i++)
>>>> + for (i = 0; i < bm->patlen - 1; i++) {
>>>> bm->bad_shift[bm->pattern[i]] = bm->patlen - 1 - i;
>>>> + if (icase)
>>>> + bm->bad_shift[tolower(bm->pattern[i])]
>>>> + = bm->patlen - 1 - i;
>>>> + }
>>> You use toupper() above and tolower() here, is that correct?
>>>
>>
>> It should be, bm->pattern's characters are all upper case since It was
>> altered before.
>> So we should use toupper() for target string character to compare.
>> And to ignore case, we should prepare one more bad_shift array
>> calculation, opposite tolower() should be used for it.
>
> OK thanks. I'll wait for an ACK from Thomas before applying
> them though.
This is rather confusing. Why not set both the pattern and the bad_shift
to lower or upper case?
--
"Los honestos son inadaptados sociales" -- Les Luthiers
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH 3/8] textsearch: fix Boyer-Moore algorithm for case insensitive searching
2008-06-23 12:27 ` Pablo Neira Ayuso
@ 2008-06-23 20:41 ` Joonwoo Park
0 siblings, 0 replies; 6+ messages in thread
From: Joonwoo Park @ 2008-06-23 20:41 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: Patrick McHardy, netdev, netfilter-devel, Thomas Graf
2008/6/23 Pablo Neira Ayuso <pablo@netfilter.org>:
> Patrick McHardy wrote:
>> Joonwoo Park wrote:
>>> 2008/6/21 Patrick McHardy <kaber@trash.net>:
>>>>> +static void compute_prefix_tbl(struct ts_bm *bm, u8 icase)
>>>>> {
>>>>> int i, j, g;
>>>>> for (i = 0; i < ASIZE; i++)
>>>>> bm->bad_shift[i] = bm->patlen;
>>>>> - for (i = 0; i < bm->patlen - 1; i++)
>>>>> + for (i = 0; i < bm->patlen - 1; i++) {
>>>>> bm->bad_shift[bm->pattern[i]] = bm->patlen - 1 - i;
>>>>> + if (icase)
>>>>> + bm->bad_shift[tolower(bm->pattern[i])]
>>>>> + = bm->patlen - 1 - i;
>>>>> + }
>>>> You use toupper() above and tolower() here, is that correct?
>>>>
>>>
>>> It should be, bm->pattern's characters are all upper case since It was
>>> altered before.
>>> So we should use toupper() for target string character to compare.
>>> And to ignore case, we should prepare one more bad_shift array
>>> calculation, opposite tolower() should be used for it.
>>
>> OK thanks. I'll wait for an ACK from Thomas before applying
>> them though.
>
> This is rather confusing. Why not set both the pattern and the bad_shift
> to lower or upper case?
>
To be honest, pattern can be set to lower or upper case, it doesn't matter.
But for bad_shift, we should add one more array for diffrence of case.
So bad_shift array should have both of lower and upper.
>>>>> + for (i = 0; i < bm->patlen - 1; i++) {
>>>>> bm->bad_shift[bm->pattern[i]] = bm->patlen - 1 - i;
Therefore, at here, this makes bad_shift for upper case,
>>>>> + if (icase)
>>>>> + bm->bad_shift[tolower(bm->pattern[i])]
>>>>> + = bm->patlen - 1 - i;
At here, this makes bad_shift for lower case.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2008-06-23 20:41 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-21 7:54 [PATCH 3/8] textsearch: fix Boyer-Moore algorithm for case insensitive searching Joonwoo Park
2008-06-21 8:25 ` Patrick McHardy
2008-06-21 9:17 ` Joonwoo Park
2008-06-23 11:33 ` Patrick McHardy
2008-06-23 12:27 ` Pablo Neira Ayuso
2008-06-23 20:41 ` Joonwoo Park
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).