From: Pablo Neira <pablo@eurodev.net>
To: Netfilter Development Mailinglist <netfilter-devel@lists.netfilter.org>
Cc: Harald Welte <laforge@netfilter.org>
Subject: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter
Date: Sun, 09 Jan 2005 23:23:09 +0100 [thread overview]
Message-ID: <41E1AECD.6020209@eurodev.net> (raw)
[-- Attachment #1: Type: text/plain, Size: 527 bytes --]
Hi,
I've finished a first usable version of a infrastructure to look for
matchings in a packet. Features:
* A library consisting in three public functions: constructor,
destructor and searching.
* Boyer-moore algorithm to perform fast matchings.
* Brute force search on the edges of two fragments to look for
fragmented matches, that is O(m) searchs where m is the size of the
pattern, it's not that bad for small pattern I think. It's fragment
aware by means of rusty's skb_iter functions.
Comments welcome.
--
Pablo
[-- Attachment #2: string-match.patch --]
[-- Type: text/x-patch, Size: 7683 bytes --]
--- /dev/null 2004-09-23 01:18:13.000000000 +0200
+++ linux-2.5/net/ipv4/netfilter/nf_string_match.c 2005-01-09 21:54:04.000000000 +0100
@@ -0,0 +1,264 @@
+/* Efficient string matching fragment aware infrastrure for netfilter
+ * Copyright (C) 2004 Pablo Neira Ayuso <pablo@eurodev.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Idea:
+ *
+ * This algorithm used Boyer-Moore fast matching algorithm to look
+ * for a pattern in the payload of a packet. Since it is fragment
+ * aware, it looks for partial matches on the edges of two fragments.
+ *
+ * Literature:
+ *
+ * A Fast String Searching Algorithm, R.S. Boyer and Moore.
+ * Communications of the Association for Computing Machinery,
+ * 20(10), 1977, pp. 762-772.
+ * http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf
+ *
+ * Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004
+ * http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf
+ *
+ * Implementation:
+ *
+ * Boyer Moore implementation heavily based on Gianni Tedesco's.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/nf_string_match.h>
+#include <linux/skbuff.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Compute constant structures needed by the boyer-moore algorithm,
+ * this is done just ONCE. */
+static void
+sm_preprocess(struct nf_string_match *sm)
+{
+ int i, j, len[ASIZE], ended;
+
+ /* Compute the bad caracter shift array */
+ for (i = 0; i < ASIZE; i++)
+ sm->badshift[i] = sm->patlen;
+ for (i = 0; i < sm->patlen - 1; i++)
+ sm->badshift[(int) sm->pat[i]] = sm->patlen - 1 - i;
+
+ /* Compute the good shift array, used to match reocurrences
+ * of a subpattern */
+ for (i = 1; i < sm->patlen; i++) {
+ for (j = 0; j < sm->patlen && sm->pat[sm->patlen -1 - j]
+ == sm->pat[sm->patlen - 1 - i - j]; j++);
+ len[i] = j;
+ }
+
+ sm->goodshift[0] = 1;
+ for (i = 1; i < sm->patlen; i++)
+ sm->goodshift[i] = sm->patlen;
+ for (i = sm->patlen - 1; i > 0; i--)
+ sm->goodshift[len[i]] = i;
+
+ ended = 0;
+ for (i = 0; i < sm->patlen; i++) {
+ if (len[i] == sm->patlen - 1 - i)
+ ended = i;
+ if (ended)
+ sm->goodshift[i] = ended;
+ }
+}
+
+struct nf_string_match *
+nf_string_match_create(const char *pat, int patlen)
+{
+ struct nf_string_match *sm;
+
+ sm = kmalloc(sizeof(struct nf_string_match), GFP_KERNEL);
+ if (!sm)
+ return NULL;
+ memset(sm, 0, sizeof(struct nf_string_match));
+
+ sm->goodshift = (int *) kmalloc(patlen * sizeof(int), GFP_KERNEL);
+ if (!sm->goodshift)
+ return NULL;
+ memset(sm->goodshift, 0, sizeof(int) * patlen);
+
+ sm->pat = (char *) kmalloc(patlen * sizeof(int), GFP_KERNEL);
+ if (!sm->pat) {
+ kfree(sm->goodshift);
+ return NULL;
+ }
+ memset(sm->pat, patlen, sizeof(char) * patlen);
+
+ strcpy(sm->pat, pat);
+ sm->patlen = patlen;
+
+ /* Preprocess the pattern */
+ sm_preprocess(sm);
+
+ return sm;
+}
+
+/* release the structure */
+void nf_string_match_destroy(struct nf_string_match *sm)
+{
+ kfree(sm->goodshift);
+ kfree(sm->pat);
+ kfree(sm);
+}
+
+#define MAX(a,b) a>b?a:b
+
+/* this functions returns the offset where a matching was found */
+static unsigned int
+sm_search(struct nf_string_match *sm, unsigned char *data, int size,
+ unsigned int *offset)
+{
+ int right_end = sm->patlen, sk, sh, i;
+
+ while (right_end < size) {
+ DEBUGP("Searching in position %d (%c)\n",
+ right_end, data[right_end]);
+ for (i = 0; i < sm->patlen && data[right_end - i]
+ == sm->pat[sm->patlen - 1 - i]; i++);
+ if (i == sm->patlen) {
+ /* London calling... */
+ DEBUGP("found!\n");
+ *offset += (right_end - (sm->patlen - 1));
+ return 1;
+ }
+
+ sk = sm->badshift[data[right_end - i]];
+ sh = sm->goodshift[i];
+ DEBUGP("bad shift: %d good shift: %d\n", sk, sh);
+
+ /* Now jumping to... */
+ right_end = MAX(right_end - i + sk, right_end + sh);
+ }
+ *offset += size;
+ return 0;
+}
+
+/* Brute force: look for a partial matching at the end of the packet.
+ * A match never hits here, it's impossible. */
+static void
+sm_frag_search_end(struct nf_string_match *sm, void *ptr, int size,
+ int *partial)
+{
+ int i;
+ unsigned char *data = ptr + size - sm->patlen;
+
+ for (i=0; i < sm->patlen; i++) {
+ DEBUGP("end: %c == %c (%d)\n", data[i],
+ sm->pat[*partial], *partial);
+
+ if (data[i] == sm->pat[*partial])
+ (*partial)++;
+ else {
+ if (data[i] == sm->pat[0])
+ *partial = 1;
+ else
+ *partial = 0;
+ }
+ }
+ DEBUGP("sm->partial:%d\n", *partial);
+}
+
+/* Brute force: Look for a matching at the beginning. Yes, here
+ * we can hit a full match */
+static int
+sm_frag_search_begin(struct nf_string_match *sm, void *ptr, int size,
+ unsigned int *offset, int *partial)
+{
+ int i, j = sm->patlen - *partial;
+ unsigned char *data = (unsigned char *) ptr;
+
+ for (i=0; i<j; i++) {
+ DEBUGP("begin: %c == %c (%d)\n", data[i],
+ sm->pat[*partial], *partial);
+
+ if (data[i] == sm->pat[*partial])
+ (*partial)++;
+ else
+ *partial = 0;
+ }
+
+ if (*partial == sm->patlen) {
+ DEBUGP("Fragmented match!\n");
+ *offset += i;
+ return 1;
+ }
+
+ /* No matching founds, so reset counter */
+ *partial = 0;
+
+ return 0;
+}
+
+static inline int
+frag_search(void *data, int size, struct nf_string_match *sm,
+ unsigned int *offset, int *partial)
+{
+ /* There was a partial match before */
+ if (sm_frag_search_begin(sm, data, size, offset, partial))
+ return 1;
+
+ if (sm_search(sm, data, size, offset))
+ return 1;
+
+ sm_frag_search_end(sm, data, size, partial);
+
+ return 0;
+}
+
+int
+nf_string_match_search(const struct sk_buff *skb, struct nf_string_match *sm,
+ unsigned int *offset)
+{
+ /* make sure offset is always set to zero */
+ *offset = 0;
+
+ if (!skb_is_nonlinear(skb)) {
+ DEBUGP("linear searching\n");
+ /* General case: look for pattern in linear packet */
+ if (sm_search(sm, skb->data, skb->len, offset))
+ return 1;
+ } else {
+ /* The packet is fragmented */
+ struct skb_iter i;
+ int partial = 0;
+
+ DEBUGP("non linear searching\n");
+
+ /* Look for the pattern in first fragment */
+ skb_iter_first(skb, &i);
+ if (frag_search(i.data, i.len, sm, offset, &partial))
+ return 1;
+
+ /* Now, the rest of fragments ... */
+ while (skb_iter_next(skb, &i)) {
+ DEBUGP("%p (size:%d)\n", i.data, i.len);
+ if (frag_search(i.data, i.len, sm, offset, &partial))
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int __init init(void) { return 1; }
+static void __exit fini(void) {}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL_GPL(nf_string_match_create);
+EXPORT_SYMBOL_GPL(nf_string_match_search);
+EXPORT_SYMBOL_GPL(nf_string_match_destroy);
--- /dev/null 2004-09-23 01:18:13.000000000 +0200
+++ linux-2.5/include/linux/netfilter_ipv4/nf_string_match.h 2005-01-09 19:28:25.000000000 +0100
@@ -0,0 +1,24 @@
+#ifndef _STRING_MATCHING_H
+#define _STRING_MATCHING_H
+
+#include <linux/skbuff.h>
+
+/* Alphabet size */
+#define ASIZE 256
+
+struct nf_string_match {
+ char *pat;
+ int patlen;
+
+ int badshift[ASIZE];
+ int *goodshift;
+};
+
+extern struct nf_string_match *
+nf_string_match_create(const char *pat, int patlen);
+extern int nf_string_match_search(const struct sk_buff *skb,
+ struct nf_string_match *sm,
+ unsigned int *offset);
+extern void nf_string_match_destroy(struct nf_string_match *sm);
+
+#endif
next reply other threads:[~2005-01-09 22:23 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-01-09 22:23 Pablo Neira [this message]
2005-01-09 23:10 ` [PATCH 1/4] RFC: fast string matching infrastrure for netfilter Patrick McHardy
2005-01-09 23:19 ` Pablo Neira
2005-01-10 19:54 ` nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter] Jozsef Kadlecsik
2005-01-10 20:31 ` Patrick McHardy
2005-01-10 21:28 ` Harald Welte
2005-01-14 2:45 ` Patrick McHardy
2005-01-14 4:31 ` nf_conntrack Yasuyuki KOZAKAI
2005-01-14 7:01 ` nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter] Rusty Russell
2005-01-14 8:20 ` Patrick Schaaf
2005-01-15 18:18 ` KOVACS Krisztian
2005-01-16 16:09 ` Jozsef Kadlecsik
2005-01-14 8:37 ` Harald Welte
2005-01-14 10:22 ` Rusty Russell
2005-01-14 18:02 ` Patrick McHardy
2005-01-14 17:52 ` Patrick McHardy
2005-01-14 8:31 ` Harald Welte
2005-01-14 18:00 ` Patrick McHardy
2005-01-14 3:16 ` Patrick McHardy
2005-01-10 21:20 ` Harald Welte
2005-01-10 8:49 ` [PATCH 1/4] RFC: fast string matching infrastrure for netfilter Sven Schuster
2005-01-10 23:18 ` Pablo Neira
2005-01-10 10:06 ` Harald Welte
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=41E1AECD.6020209@eurodev.net \
--to=pablo@eurodev.net \
--cc=laforge@netfilter.org \
--cc=netfilter-devel@lists.netfilter.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.