[PATCH 1/4] RFC: fast string matching infrastrure for netfilter

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/4] RFC: fast string matching infrastrure for netfilter
@ 2005-01-09 22:23 Pablo Neira
  2005-01-09 23:10 ` Patrick McHardy
                   ` (2 more replies)
  0 siblings, 3 replies; 23+ messages in thread
From: Pablo Neira @ 2005-01-09 22:23 UTC (permalink / raw)
  To: Netfilter Development Mailinglist; +Cc: Harald Welte

[-- Attachment #1: Type: text/plain, Size: 527 bytes --]

Hi,

I've finished a first usable version of a infrastructure to look for 
matchings in a packet. Features:

* A library consisting in three public functions: constructor, 
destructor and searching.
* Boyer-moore algorithm to perform fast matchings.
* Brute force search on the edges of two fragments to look for 
fragmented matches, that is O(m) searchs where m is the size of the 
pattern, it's not that bad for small pattern I think. It's fragment 
aware by means of rusty's skb_iter functions.

Comments welcome.

--
Pablo

[-- Attachment #2: string-match.patch --]
[-- Type: text/x-patch, Size: 7683 bytes --]

--- /dev/null	2004-09-23 01:18:13.000000000 +0200
+++ linux-2.5/net/ipv4/netfilter/nf_string_match.c	2005-01-09 21:54:04.000000000 +0100
@@ -0,0 +1,264 @@
+/* Efficient string matching fragment aware infrastrure for netfilter
+ * Copyright (C) 2004 Pablo Neira Ayuso <pablo@eurodev.net>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ * Idea:
+ * 	
+ * 	This algorithm used Boyer-Moore fast matching algorithm to look
+ * 	for a pattern in the payload of a packet. Since it is fragment
+ * 	aware, it looks for partial matches on the edges of two fragments.
+ *
+ * Literature:
+ * 
+ *      A Fast String Searching Algorithm, R.S. Boyer and Moore.
+ *      Communications of the Association for Computing Machinery, 
+ *      20(10), 1977, pp. 762-772.
+ *	http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf
+ *
+ * 	Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004
+ * 	http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf
+ * 
+ * Implementation:
+ * 
+ * 	Boyer Moore implementation heavily based on Gianni Tedesco's.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/nf_string_match.h>
+#include <linux/skbuff.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+/* Compute constant structures needed by the boyer-moore algorithm,
+ * this is done just ONCE. */
+static void
+sm_preprocess(struct nf_string_match *sm)
+{
+	int i, j, len[ASIZE], ended;
+
+	/* Compute the bad caracter shift array */
+	for (i = 0; i < ASIZE; i++)
+		sm->badshift[i] = sm->patlen;
+	for (i = 0; i < sm->patlen - 1; i++)
+		sm->badshift[(int) sm->pat[i]] = sm->patlen - 1 - i;
+
+	/* Compute the good shift array, used to match reocurrences 
+	 * of a subpattern */
+	for (i = 1; i < sm->patlen; i++) {
+		for (j = 0; j < sm->patlen && sm->pat[sm->patlen -1 - j] 
+				== sm->pat[sm->patlen - 1 - i - j]; j++);
+		len[i] = j;
+	}  
+
+	sm->goodshift[0] = 1;
+	for (i = 1; i < sm->patlen; i++)
+		sm->goodshift[i] = sm->patlen;
+	for (i = sm->patlen - 1; i > 0; i--)
+		sm->goodshift[len[i]] = i;
+
+	ended = 0;
+	for (i = 0; i < sm->patlen; i++) {
+		if (len[i] == sm->patlen - 1 - i)
+			ended = i;
+		if (ended)
+			sm->goodshift[i] = ended;
+	}
+}
+
+struct nf_string_match *
+nf_string_match_create(const char *pat, int patlen)
+{
+	struct nf_string_match *sm;
+
+	sm = kmalloc(sizeof(struct nf_string_match), GFP_KERNEL);
+	if (!sm)
+		return NULL;
+	memset(sm, 0, sizeof(struct nf_string_match));
+
+	sm->goodshift = (int *) kmalloc(patlen * sizeof(int), GFP_KERNEL);
+	if (!sm->goodshift)
+		return NULL;
+	memset(sm->goodshift, 0, sizeof(int) * patlen);
+	
+	sm->pat = (char *) kmalloc(patlen * sizeof(int), GFP_KERNEL);
+	if (!sm->pat) {
+		kfree(sm->goodshift);
+		return NULL;
+	}
+	memset(sm->pat, patlen, sizeof(char) * patlen);
+	
+	strcpy(sm->pat, pat);
+	sm->patlen = patlen;
+
+	/* Preprocess the pattern */
+	sm_preprocess(sm);
+
+	return sm;
+}
+
+/* release the structure */
+void nf_string_match_destroy(struct nf_string_match *sm)
+{
+	kfree(sm->goodshift);
+	kfree(sm->pat);
+	kfree(sm);
+}
+
+#define MAX(a,b) a>b?a:b
+
+/* this functions returns the offset where a matching was found */
+static unsigned int
+sm_search(struct nf_string_match *sm, unsigned char *data, int size, 
+	  unsigned int *offset)
+{
+	int right_end = sm->patlen, sk, sh, i;
+
+	while (right_end < size) {
+		DEBUGP("Searching in position %d (%c)\n", 
+		       right_end, data[right_end]);
+		for (i = 0; i < sm->patlen && data[right_end - i] 
+				== sm->pat[sm->patlen - 1 - i]; i++);
+		if (i == sm->patlen) {
+			/* London calling... */
+			DEBUGP("found!\n");
+			*offset += (right_end - (sm->patlen - 1));
+			return 1;
+		}
+		
+		sk = sm->badshift[data[right_end - i]];  
+		sh = sm->goodshift[i];
+		DEBUGP("bad shift: %d good shift: %d\n", sk, sh);
+
+		/* Now jumping to... */
+		right_end = MAX(right_end - i + sk, right_end + sh);
+	}
+	*offset += size;
+	return 0;
+}
+
+/* Brute force: look for a partial matching at the end of the packet.
+ * A match never hits here, it's impossible. */
+static void
+sm_frag_search_end(struct nf_string_match *sm, void *ptr, int size, 
+		   int *partial)
+{
+	int i;
+	unsigned char *data = ptr + size - sm->patlen;
+
+	for (i=0; i < sm->patlen; i++) {
+		DEBUGP("end: %c == %c (%d)\n", data[i], 
+			sm->pat[*partial], *partial);
+		
+		if (data[i] == sm->pat[*partial])
+			(*partial)++;
+		else {
+			if (data[i] == sm->pat[0])
+				*partial = 1;
+			else 
+				*partial = 0;
+		}
+	}
+	DEBUGP("sm->partial:%d\n", *partial);
+}
+
+/* Brute force: Look for a matching at the beginning. Yes, here
+ * we can hit a full match */
+static int
+sm_frag_search_begin(struct nf_string_match *sm, void *ptr, int size, 
+		     unsigned int *offset, int *partial)
+{
+	int i, j = sm->patlen - *partial;
+	unsigned char *data = (unsigned char *) ptr;
+
+	for (i=0; i<j; i++) {
+		DEBUGP("begin: %c == %c (%d)\n", data[i],
+			sm->pat[*partial], *partial);
+
+		if (data[i] == sm->pat[*partial])
+			(*partial)++;
+		else
+			*partial = 0;
+	}
+
+	if (*partial == sm->patlen) {
+		DEBUGP("Fragmented match!\n");
+		*offset += i;
+		return 1;
+	}
+
+	/* No matching founds, so reset counter */
+	*partial = 0;
+
+	return 0;
+}
+
+static inline int
+frag_search(void *data, int size, struct nf_string_match *sm, 
+	    unsigned int *offset, int *partial)
+{
+	/* There was a partial match before */
+	if (sm_frag_search_begin(sm, data, size, offset, partial))
+		return 1;
+
+	if (sm_search(sm, data, size, offset))
+		return 1;
+
+	sm_frag_search_end(sm, data, size, partial);
+
+	return 0;
+}
+
+int
+nf_string_match_search(const struct sk_buff *skb, struct nf_string_match *sm,
+		       unsigned int *offset)
+{
+	/* make sure offset is always set to zero */
+	*offset = 0;
+
+	if (!skb_is_nonlinear(skb)) {
+		DEBUGP("linear searching\n");
+		/* General case: look for pattern in linear packet */
+		if (sm_search(sm, skb->data, skb->len, offset))
+			return 1;
+	} else {
+		/* The packet is fragmented */
+		struct skb_iter i;
+		int partial = 0;
+
+		DEBUGP("non linear searching\n");
+
+		/* Look for the pattern in first fragment */
+		skb_iter_first(skb, &i);
+		if (frag_search(i.data, i.len, sm, offset, &partial))
+			return 1;
+
+		/* Now, the rest of fragments ... */
+		while (skb_iter_next(skb, &i)) {
+			DEBUGP("%p (size:%d)\n", i.data, i.len);
+			if (frag_search(i.data, i.len, sm, offset, &partial))
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int __init init(void) { return 1; }
+static void __exit fini(void) {}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL_GPL(nf_string_match_create);
+EXPORT_SYMBOL_GPL(nf_string_match_search);
+EXPORT_SYMBOL_GPL(nf_string_match_destroy);
--- /dev/null	2004-09-23 01:18:13.000000000 +0200
+++ linux-2.5/include/linux/netfilter_ipv4/nf_string_match.h	2005-01-09 19:28:25.000000000 +0100
@@ -0,0 +1,24 @@
+#ifndef _STRING_MATCHING_H
+#define _STRING_MATCHING_H
+
+#include <linux/skbuff.h>
+
+/* Alphabet size */
+#define ASIZE 256
+
+struct nf_string_match {
+	char 		*pat;
+	int 		patlen;
+
+	int 		badshift[ASIZE];
+	int 		*goodshift;
+};
+
+extern struct nf_string_match *
+nf_string_match_create(const char *pat, int patlen);
+extern int nf_string_match_search(const struct sk_buff *skb, 
+				  struct nf_string_match *sm,
+				  unsigned int *offset);
+extern void nf_string_match_destroy(struct nf_string_match *sm);
+
+#endif

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter
  2005-01-09 22:23 [PATCH 1/4] RFC: fast string matching infrastrure for netfilter Pablo Neira
@ 2005-01-09 23:10 ` Patrick McHardy
  2005-01-09 23:19   ` Pablo Neira
  2005-01-10 19:54   ` nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter] Jozsef Kadlecsik
  2005-01-10  8:49 ` [PATCH 1/4] RFC: fast string matching infrastrure for netfilter Sven Schuster
  2005-01-10 10:06 ` Harald Welte
  2 siblings, 2 replies; 23+ messages in thread
From: Patrick McHardy @ 2005-01-09 23:10 UTC (permalink / raw)
  To: Pablo Neira; +Cc: Harald Welte, Netfilter Development Mailinglist

Pablo Neira wrote:

> Hi,
>
> I've finished a first usable version of a infrastructure to look for 
> matchings in a packet. Features:
>
> * A library consisting in three public functions: constructor, 
> destructor and searching.
> * Boyer-moore algorithm to perform fast matchings.
> * Brute force search on the edges of two fragments to look for 
> fragmented matches, that is O(m) searchs where m is the size of the 
> pattern, it's not that bad for small pattern I think. It's fragment 
> aware by means of rusty's skb_iter functions.

Looks good. A problem is that it's only in-tree user is part of 
ip_conntrack.
I have actually given up keeping nf_conntrack up to date currently, but
I hope we can now really put ip_conntrack in maintenance mode and 
concentrate
on nf_conntrack. Any chance you want to base this on the nf_conntrack 
patch ?

> Comments welcome.

See below.

Regards
Patrick

>
> -- 
> Pablo
>
>------------------------------------------------------------------------
>
>--- /dev/null	2004-09-23 01:18:13.000000000 +0200
>+++ linux-2.5/net/ipv4/netfilter/nf_string_match.c	2005-01-09 21:54:04.000000000 +0100
>@@ -0,0 +1,264 @@
>+/* Efficient string matching fragment aware infrastrure for netfilter
>+ * Copyright (C) 2004 Pablo Neira Ayuso <pablo@eurodev.net>
>+ *
>+ *      This program is free software; you can redistribute it and/or
>+ *      modify it under the terms of the GNU General Public License
>+ *      as published by the Free Software Foundation; either version
>+ *      2 of the License, or (at your option) any later version.
>+ *
>+ * Idea:
>+ * 	
>+ * 	This algorithm used Boyer-Moore fast matching algorithm to look
>+ * 	for a pattern in the payload of a packet. Since it is fragment
>+ * 	aware, it looks for partial matches on the edges of two fragments.
>+ *
>+ * Literature:
>+ * 
>+ *      A Fast String Searching Algorithm, R.S. Boyer and Moore.
>+ *      Communications of the Association for Computing Machinery, 
>+ *      20(10), 1977, pp. 762-772.
>+ *	http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf
>+ *
>+ * 	Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004
>+ * 	http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf
>+ * 
>+ * Implementation:
>+ * 
>+ * 	Boyer Moore implementation heavily based on Gianni Tedesco's.
>+ */
>+
>+#include <linux/module.h>
>+#include <linux/netfilter_ipv4/nf_string_match.h>
>+#include <linux/skbuff.h>
>+
>+#if 0
>+#define DEBUGP printk
>+#else
>+#define DEBUGP(format, args...)
>+#endif
>+
>+/* Compute constant structures needed by the boyer-moore algorithm,
>+ * this is done just ONCE. */
>+static void
>+sm_preprocess(struct nf_string_match *sm)
>+{
>+	int i, j, len[ASIZE], ended;
>+
>+	/* Compute the bad caracter shift array */
>+	for (i = 0; i < ASIZE; i++)
>+		sm->badshift[i] = sm->patlen;
>+	for (i = 0; i < sm->patlen - 1; i++)
>+		sm->badshift[(int) sm->pat[i]] = sm->patlen - 1 - i;
>+
>+	/* Compute the good shift array, used to match reocurrences 
>+	 * of a subpattern */
>+	for (i = 1; i < sm->patlen; i++) {
>+		for (j = 0; j < sm->patlen && sm->pat[sm->patlen -1 - j] 
>+				== sm->pat[sm->patlen - 1 - i - j]; j++);
>+		len[i] = j;
>+	}  
>+
>+	sm->goodshift[0] = 1;
>+	for (i = 1; i < sm->patlen; i++)
>+		sm->goodshift[i] = sm->patlen;
>+	for (i = sm->patlen - 1; i > 0; i--)
>+		sm->goodshift[len[i]] = i;
>+
>+	ended = 0;
>+	for (i = 0; i < sm->patlen; i++) {
>+		if (len[i] == sm->patlen - 1 - i)
>+			ended = i;
>+		if (ended)
>+			sm->goodshift[i] = ended;
>+	}
>+}
>+
>+struct nf_string_match *
>+nf_string_match_create(const char *pat, int patlen)
>+{
>+	struct nf_string_match *sm;
>+
>+	sm = kmalloc(sizeof(struct nf_string_match), GFP_KERNEL);
>+	if (!sm)
>+		return NULL;
>+	memset(sm, 0, sizeof(struct nf_string_match));
>+
>+	sm->goodshift = (int *) kmalloc(patlen * sizeof(int), GFP_KERNEL);
>+	if (!sm->goodshift)
>  
>
^ leaks sm

>+		return NULL;
>+	memset(sm->goodshift, 0, sizeof(int) * patlen);
>+	
>+	sm->pat = (char *) kmalloc(patlen * sizeof(int), GFP_KERNEL);
>+	if (!sm->pat) {
>  
>
^ leaks sm

>+		kfree(sm->goodshift);
>+		return NULL;
>+	}
>+	memset(sm->pat, patlen, sizeof(char) * patlen);
>+	
>+	strcpy(sm->pat, pat);
>+	sm->patlen = patlen;
>+
>+	/* Preprocess the pattern */
>+	sm_preprocess(sm);
>+
>+	return sm;
>+}
>+
>+/* release the structure */
>+void nf_string_match_destroy(struct nf_string_match *sm)
>+{
>+	kfree(sm->goodshift);
>+	kfree(sm->pat);
>+	kfree(sm);
>+}
>+
>+#define MAX(a,b) a>b?a:b
>+
>+/* this functions returns the offset where a matching was found */
>+static unsigned int
>+sm_search(struct nf_string_match *sm, unsigned char *data, int size, 
>+	  unsigned int *offset)
>+{
>+	int right_end = sm->patlen, sk, sh, i;
>+
>+	while (right_end < size) {
>+		DEBUGP("Searching in position %d (%c)\n", 
>+		       right_end, data[right_end]);
>+		for (i = 0; i < sm->patlen && data[right_end - i] 
>+				== sm->pat[sm->patlen - 1 - i]; i++);
>+		if (i == sm->patlen) {
>+			/* London calling... */
>+			DEBUGP("found!\n");
>+			*offset += (right_end - (sm->patlen - 1));
>+			return 1;
>+		}
>+		
>+		sk = sm->badshift[data[right_end - i]];  
>+		sh = sm->goodshift[i];
>+		DEBUGP("bad shift: %d good shift: %d\n", sk, sh);
>+
>+		/* Now jumping to... */
>+		right_end = MAX(right_end - i + sk, right_end + sh);
>+	}
>+	*offset += size;
>+	return 0;
>+}
>+
>+/* Brute force: look for a partial matching at the end of the packet.
>+ * A match never hits here, it's impossible. */
>+static void
>+sm_frag_search_end(struct nf_string_match *sm, void *ptr, int size, 
>+		   int *partial)
>+{
>+	int i;
>+	unsigned char *data = ptr + size - sm->patlen;
>+
>+	for (i=0; i < sm->patlen; i++) {
>+		DEBUGP("end: %c == %c (%d)\n", data[i], 
>+			sm->pat[*partial], *partial);
>+		
>+		if (data[i] == sm->pat[*partial])
>+			(*partial)++;
>+		else {
>+			if (data[i] == sm->pat[0])
>+				*partial = 1;
>+			else 
>+				*partial = 0;
>+		}
>+	}
>+	DEBUGP("sm->partial:%d\n", *partial);
>+}
>+
>+/* Brute force: Look for a matching at the beginning. Yes, here
>+ * we can hit a full match */
>+static int
>+sm_frag_search_begin(struct nf_string_match *sm, void *ptr, int size, 
>+		     unsigned int *offset, int *partial)
>+{
>+	int i, j = sm->patlen - *partial;
>+	unsigned char *data = (unsigned char *) ptr;
>+
>+	for (i=0; i<j; i++) {
>+		DEBUGP("begin: %c == %c (%d)\n", data[i],
>+			sm->pat[*partial], *partial);
>+
>+		if (data[i] == sm->pat[*partial])
>+			(*partial)++;
>+		else
>+			*partial = 0;
>+	}
>+
>+	if (*partial == sm->patlen) {
>+		DEBUGP("Fragmented match!\n");
>+		*offset += i;
>+		return 1;
>+	}
>+
>+	/* No matching founds, so reset counter */
>+	*partial = 0;
>+
>+	return 0;
>+}
>+
>+static inline int
>+frag_search(void *data, int size, struct nf_string_match *sm, 
>+	    unsigned int *offset, int *partial)
>+{
>+	/* There was a partial match before */
>+	if (sm_frag_search_begin(sm, data, size, offset, partial))
>+		return 1;
>+
>+	if (sm_search(sm, data, size, offset))
>+		return 1;
>+
>+	sm_frag_search_end(sm, data, size, partial);
>+
>+	return 0;
>+}
>+
>+int
>+nf_string_match_search(const struct sk_buff *skb, struct nf_string_match *sm,
>+		       unsigned int *offset)
>+{
>+	/* make sure offset is always set to zero */
>+	*offset = 0;
>+
>+	if (!skb_is_nonlinear(skb)) {
>+		DEBUGP("linear searching\n");
>+		/* General case: look for pattern in linear packet */
>+		if (sm_search(sm, skb->data, skb->len, offset))
>+			return 1;
>+	} else {
>+		/* The packet is fragmented */
>+		struct skb_iter i;
>+		int partial = 0;
>+
>+		DEBUGP("non linear searching\n");
>+
>+		/* Look for the pattern in first fragment */
>+		skb_iter_first(skb, &i);
>+		if (frag_search(i.data, i.len, sm, offset, &partial))
>+			return 1;
>+
>+		/* Now, the rest of fragments ... */
>+		while (skb_iter_next(skb, &i)) {
>+			DEBUGP("%p (size:%d)\n", i.data, i.len);
>+			if (frag_search(i.data, i.len, sm, offset, &partial))
>+				return 1;
>+		}
>+	}
>+
>+	return 0;
>+}
>+
>+static int __init init(void) { return 1; }
>+static void __exit fini(void) {}
>+
>+module_init(init);
>+module_exit(fini);
>  
>
What is this for ?

>+
>+MODULE_LICENSE("GPL");
>+
>+EXPORT_SYMBOL_GPL(nf_string_match_create);
>+EXPORT_SYMBOL_GPL(nf_string_match_search);
>+EXPORT_SYMBOL_GPL(nf_string_match_destroy);
>--- /dev/null	2004-09-23 01:18:13.000000000 +0200
>+++ linux-2.5/include/linux/netfilter_ipv4/nf_string_match.h	2005-01-09 19:28:25.000000000 +0100
>@@ -0,0 +1,24 @@
>+#ifndef _STRING_MATCHING_H
>+#define _STRING_MATCHING_H
>+
>+#include <linux/skbuff.h>
>+
>+/* Alphabet size */
>+#define ASIZE 256
>+
>+struct nf_string_match {
>+	char 		*pat;
>+	int 		patlen;
>+
>+	int 		badshift[ASIZE];
>+	int 		*goodshift;
>+};
>+
>+extern struct nf_string_match *
>+nf_string_match_create(const char *pat, int patlen);
>+extern int nf_string_match_search(const struct sk_buff *skb, 
>+				  struct nf_string_match *sm,
>+				  unsigned int *offset);
>+extern void nf_string_match_destroy(struct nf_string_match *sm);
>+
>+#endif
>  
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter
  2005-01-09 23:10 ` Patrick McHardy
@ 2005-01-09 23:19   ` Pablo Neira
  2005-01-10 19:54   ` nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter] Jozsef Kadlecsik
  1 sibling, 0 replies; 23+ messages in thread
From: Pablo Neira @ 2005-01-09 23:19 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Harald Welte, Netfilter Development Mailinglist

Hi Patrick,

Patrick McHardy wrote:

>> I've finished a first usable version of a infrastructure to look for 
>> matchings in a packet. Features:
>>
>> * A library consisting in three public functions: constructor, 
>> destructor and searching.
>> * Boyer-moore algorithm to perform fast matchings.
>> * Brute force search on the edges of two fragments to look for 
>> fragmented matches, that is O(m) searchs where m is the size of the 
>> pattern, it's not that bad for small pattern I think. It's fragment 
>> aware by means of rusty's skb_iter functions.
>
>
> Looks good. A problem is that it's only in-tree user is part of 
> ip_conntrack.
> I have actually given up keeping nf_conntrack up to date currently, but
> I hope we can now really put ip_conntrack in maintenance mode and 
> concentrate
> on nf_conntrack. Any chance you want to base this on the nf_conntrack 
> patch ?


Sure, I don't mind about porting this to nf_conntrack. Expect a patch in 
next days.

>> Comments welcome.
>
>
> See below.


I'll fix those leaks, thanks for those good catches. The module_init and 
exit stuff is because I've compiling this as module and AFAIK I need them.

--
Pablo

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter
  2005-01-09 22:23 [PATCH 1/4] RFC: fast string matching infrastrure for netfilter Pablo Neira
  2005-01-09 23:10 ` Patrick McHardy
@ 2005-01-10  8:49 ` Sven Schuster
  2005-01-10 23:18   ` Pablo Neira
  2005-01-10 10:06 ` Harald Welte
  2 siblings, 1 reply; 23+ messages in thread
From: Sven Schuster @ 2005-01-10  8:49 UTC (permalink / raw)
  To: Pablo Neira; +Cc: Harald Welte, Netfilter Development Mailinglist

[-- Attachment #1: Type: text/plain, Size: 1326 bytes --]


Hi Pablo,

On Sun, Jan 09, 2005 at 11:23:09PM +0100, Pablo Neira told us:
> --- /dev/null	2004-09-23 01:18:13.000000000 +0200
> +++ linux-2.5/net/ipv4/netfilter/nf_string_match.c	2005-01-09 21:54:04.000000000 +0100
....
> +struct nf_string_match *
> +nf_string_match_create(const char *pat, int patlen)
> +{
> +	struct nf_string_match *sm;
> +
> +	sm = kmalloc(sizeof(struct nf_string_match), GFP_KERNEL);
> +	if (!sm)
> +		return NULL;
> +	memset(sm, 0, sizeof(struct nf_string_match));
> +
> +	sm->goodshift = (int *) kmalloc(patlen * sizeof(int), GFP_KERNEL);
> +	if (!sm->goodshift)
> +		return NULL;
> +	memset(sm->goodshift, 0, sizeof(int) * patlen);
> +	
> +	sm->pat = (char *) kmalloc(patlen * sizeof(int), GFP_KERNEL);
> +	if (!sm->pat) {
> +		kfree(sm->goodshift);
> +		return NULL;
> +	}
> +	memset(sm->pat, patlen, sizeof(char) * patlen);

is there any intention behind kmalloc()ing the pattern using sizeof(int)
and then memset()ting it using sizeof(char)?


Sven

> +	
> +	strcpy(sm->pat, pat);
> +	sm->patlen = patlen;
> +
> +	/* Preprocess the pattern */
> +	sm_preprocess(sm);
> +
> +	return sm;
> +}
> +

-- 
Linux zion 2.6.10-bk7 #0 Fri Jan 7 19:08:39 CET 2005 i686 athlon i386 GNU/Linux
 09:42:02 up 2 days, 14:19,  1 user,  load average: 0.09, 0.08, 0.03

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter
  2005-01-09 22:23 [PATCH 1/4] RFC: fast string matching infrastrure for netfilter Pablo Neira
  2005-01-09 23:10 ` Patrick McHardy
  2005-01-10  8:49 ` [PATCH 1/4] RFC: fast string matching infrastrure for netfilter Sven Schuster
@ 2005-01-10 10:06 ` Harald Welte
  2 siblings, 0 replies; 23+ messages in thread
From: Harald Welte @ 2005-01-10 10:06 UTC (permalink / raw)
  To: Pablo Neira; +Cc: Netfilter Development Mailinglist

[-- Attachment #1: Type: text/plain, Size: 1257 bytes --]

On Sun, Jan 09, 2005 at 11:23:09PM +0100, Pablo Neira wrote:
> Hi,
> 
> I've finished a first usable version of a infrastructure to look for 
> matchings in a packet. Features:
> 
> * A library consisting in three public functions: constructor, 
> destructor and searching.

you are aware that I was working on exactly the same thing, based on
in-kernel libqsearch by Phillipe Biondi? ;)

One of my major changes to the original libqsearch was to move
allocation from callee-based allocation to caller-based allocation.

This way we avoid too many dynamically kmalloc-allocated structures, and
a caller (for example a conntrack helper) can allocate the search
context as part of a different structure.

I haven't finished those patches, but I'll try to clean them up until
tomorrow, so we can have a discussion and maybe merge the good ideas of
both.

> Pablo

-- 
- Harald Welte <laforge@netfilter.org>             http://www.netfilter.org/
============================================================================
  "Fragmentation is like classful addressing -- an interesting early
   architectural error that shows how much experimentation was going
   on while IP was being designed."                    -- Paul Vixie

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-09 23:10 ` Patrick McHardy
  2005-01-09 23:19   ` Pablo Neira
@ 2005-01-10 19:54   ` Jozsef Kadlecsik
  2005-01-10 20:31     ` Patrick McHardy
  2005-01-10 21:20     ` Harald Welte
  1 sibling, 2 replies; 23+ messages in thread
From: Jozsef Kadlecsik @ 2005-01-10 19:54 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: Harald Welte, Netfilter Development Mailinglist, Pablo Neira

On Mon, 10 Jan 2005, Patrick McHardy wrote:

> I have actually given up keeping nf_conntrack up to date currently, but
> I hope we can now really put ip_conntrack in maintenance mode and
> concentrate
> on nf_conntrack. Any chance you want to base this on the nf_conntrack
> patch ?

Actually, what is the opinion that nf_conntrack uses the union of IPv4 and
IPv6 addresses in the tuples?

The infrastructure is there in the patch to support IPv4/IPv6 conntrack
separatedly in spite of the common code and not to waste so many bytes at
every IPv4 connections for the sake of supporting IPv6. Shouldn't there be
put more effort into this long standing issue, before swithcing over it?

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlec@sunserv.kfki.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : KFKI Research Institute for Particle and Nuclear Physics
          H-1525 Budapest 114, POB. 49, Hungary

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-10 19:54   ` nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter] Jozsef Kadlecsik
@ 2005-01-10 20:31     ` Patrick McHardy
  2005-01-10 21:28       ` Harald Welte
  2005-01-10 21:20     ` Harald Welte
  1 sibling, 1 reply; 23+ messages in thread
From: Patrick McHardy @ 2005-01-10 20:31 UTC (permalink / raw)
  To: Jozsef Kadlecsik
  Cc: Harald Welte, Netfilter Development Mailinglist, Pablo Neira

Jozsef Kadlecsik wrote:

>On Mon, 10 Jan 2005, Patrick McHardy wrote:
>
>
>>I have actually given up keeping nf_conntrack up to date currently, but
>>I hope we can now really put ip_conntrack in maintenance mode and
>>concentrate
>>on nf_conntrack. Any chance you want to base this on the nf_conntrack
>>patch ?
>>
>
>Actually, what is the opinion that nf_conntrack uses the union of IPv4 and
>IPv6 addresses in the tuples?
>
In my opinion that's something that could also be improved later.

>The infrastructure is there in the patch to support IPv4/IPv6 conntrack
>separatedly in spite of the common code and not to waste so many bytes at
>every IPv4 connections for the sake of supporting IPv6.
>
You mean the get_features stuff and seperate caches ? I'm don't like this
part very much, I thing Rusty's "structure extension stuff" (ct_extend)
is a nicer way to do this, although its probably not useable for the tuples.

> Shouldn't there be
>put more effort into this long standing issue, before swithcing over it?
>  
>
I think we should put ip_conntrack in maintenance mode, than we can
resync nf_conntrack and make changes like this before we submit it.

Regards
Patrick

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-10 19:54   ` nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter] Jozsef Kadlecsik
  2005-01-10 20:31     ` Patrick McHardy
@ 2005-01-10 21:20     ` Harald Welte
  1 sibling, 0 replies; 23+ messages in thread
From: Harald Welte @ 2005-01-10 21:20 UTC (permalink / raw)
  To: Jozsef Kadlecsik
  Cc: Netfilter Development Mailinglist, Pablo Neira, Patrick McHardy

[-- Attachment #1: Type: text/plain, Size: 982 bytes --]

On Mon, Jan 10, 2005 at 08:54:40PM +0100, Jozsef Kadlecsik wrote:
> 
> > I have actually given up keeping nf_conntrack up to date currently, but
> > I hope we can now really put ip_conntrack in maintenance mode and
> > concentrate
> > on nf_conntrack. Any chance you want to base this on the nf_conntrack
> > patch ?
> 
> Actually, what is the opinion that nf_conntrack uses the union of IPv4 and
> IPv6 addresses in the tuples?

My opinion was that we can rectify the waste of bytes here, since we
save so many bytes due to the get_features and multiple sized conntrack
structures.

> Jozsef

-- 
- Harald Welte <laforge@netfilter.org>             http://www.netfilter.org/
============================================================================
  "Fragmentation is like classful addressing -- an interesting early
   architectural error that shows how much experimentation was going
   on while IP was being designed."                    -- Paul Vixie

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-10 20:31     ` Patrick McHardy
@ 2005-01-10 21:28       ` Harald Welte
  2005-01-14  2:45         ` Patrick McHardy
  2005-01-14  3:16         ` Patrick McHardy
  0 siblings, 2 replies; 23+ messages in thread
From: Harald Welte @ 2005-01-10 21:28 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: Netfilter Development Mailinglist, Pablo Neira, Jozsef Kadlecsik

[-- Attachment #1: Type: text/plain, Size: 2839 bytes --]

On Mon, Jan 10, 2005 at 09:31:45PM +0100, Patrick McHardy wrote:
> >Actually, what is the opinion that nf_conntrack uses the union of IPv4 and
> >IPv6 addresses in the tuples?
> >
> In my opinion that's something that could also be improved later.

Agreed.

> >The infrastructure is there in the patch to support IPv4/IPv6 conntrack
> >separatedly in spite of the common code and not to waste so many bytes at
> >every IPv4 connections for the sake of supporting IPv6.
>
> You mean the get_features stuff and seperate caches ? I'm don't like this
> part very much, I thing Rusty's "structure extension stuff" (ct_extend)
> is a nicer way to do this, although its probably not useable for the tuples.

Unfortunately I have to disagree.  I think I pointed it out in an
earlier email, but I'm not sure whether it was on IRC or whether you've
been on the list of recipients.

the nf_conntrack approach of having multiple slab caches of differently
sized conntrack structures is better from a performance point of view.

If you look at the typical applications, people will usually

a) either use NAT on most/all of their connections
b) or not use NAT on any of their connections

Now why don't they (the 'b' users) recompile their kernel? Because they
use vendor-kernels, and they compile with everything enabled.

So what we end up if we use rusty's scheme for nat private data, is that
we have at least one additional allocation, and two disjunct seperate
pieces of memory (which will probably also introduce yet another cache
miss).   This is acceptable for rare exceptional cases, not for 99.9% of
your connections.

So even if Rusty's architecture and API is cleaner, I'm very much in
favour of the nf_conntrack design and Yasuyuki's current
implementation.

Now you can argue about conntrack/nat helper private data.  Here my
argument is weaker, since normally you have something like < 1% of your
connections with a helper.  For this I would be willing to accept the
additional allocation and whatever.

But, then think of a firewall in front of an FTP (IRC/...) Server, and
your traffic pattern immediately looks like 50% of your connections (not
talking about traffic) have helper private data... and my argument
becomes stronger again.

> I think we should put ip_conntrack in maintenance mode, than we can
> resync nf_conntrack and make changes like this before we submit it.

At last, we again agree :)

-- 
- Harald Welte <laforge@netfilter.org>             http://www.netfilter.org/
============================================================================
  "Fragmentation is like classful addressing -- an interesting early
   architectural error that shows how much experimentation was going
   on while IP was being designed."                    -- Paul Vixie

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter
  2005-01-10  8:49 ` [PATCH 1/4] RFC: fast string matching infrastrure for netfilter Sven Schuster
@ 2005-01-10 23:18   ` Pablo Neira
  0 siblings, 0 replies; 23+ messages in thread
From: Pablo Neira @ 2005-01-10 23:18 UTC (permalink / raw)
  To: Sven Schuster; +Cc: Harald Welte, Netfilter Development Mailinglist

Sven Schuster wrote:

>>+struct nf_string_match *
>>+nf_string_match_create(const char *pat, int patlen)
>>+{
>>+	struct nf_string_match *sm;
>>+
>>+	sm = kmalloc(sizeof(struct nf_string_match), GFP_KERNEL);
>>+	if (!sm)
>>+		return NULL;
>>+	memset(sm, 0, sizeof(struct nf_string_match));
>>+
>>+	sm->goodshift = (int *) kmalloc(patlen * sizeof(int), GFP_KERNEL);
>>+	if (!sm->goodshift)
>>+		return NULL;
>>+	memset(sm->goodshift, 0, sizeof(int) * patlen);
>>+	
>>+	sm->pat = (char *) kmalloc(patlen * sizeof(int), GFP_KERNEL);
>>+	if (!sm->pat) {
>>+		kfree(sm->goodshift);
>>+		return NULL;
>>+	}
>>+	memset(sm->pat, patlen, sizeof(char) * patlen);
>>    
>>
>
>is there any intention behind kmalloc()ing the pattern using sizeof(int)
>and then memset()ting it using sizeof(char)?
>  
>

yes, I've got a severe brain misfunction or a leak in the copy-and-paste 
facility of my editor :). Thanks, I just fixed those in my tree.

Harald, I'll wait for your news about your patches to reduce the 
pressure in memory allocation. Anyway, I've already got some thoughts 
about libqsearch. I think that it's a bit bloated for kernel space. That 
plugin infrastructure provided by libqsearch is nice but it's much more 
than we need since there are just just two plugins: brute force and 
boyer-moore searchings.

My current implementation doesn't support wildcards[1] but that feature 
isn't difficult to implement, as well as iterative searching of patterns 
loaded in a list.

Wait for your comments.

[1] When talking about wildcards, I mean simple ones, that is, 'p*blo' 
matches 'pablo' but not 'paablo' since boyer-moore shifting can't allow 
that. I know you are aware of that ;).

--
Pablo

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-10 21:28       ` Harald Welte
@ 2005-01-14  2:45         ` Patrick McHardy
  2005-01-14  4:31           ` nf_conntrack Yasuyuki KOZAKAI
                             ` (2 more replies)
  2005-01-14  3:16         ` Patrick McHardy
  1 sibling, 3 replies; 23+ messages in thread
From: Patrick McHardy @ 2005-01-14  2:45 UTC (permalink / raw)
  To: Harald Welte
  Cc: Rusty Russell, Netfilter Development Mailinglist, Pablo Neira,
	Jozsef Kadlecsik

Harald Welte wrote:

>On Mon, Jan 10, 2005 at 09:31:45PM +0100, Patrick McHardy wrote:
>  
>
>>>The infrastructure is there in the patch to support IPv4/IPv6 conntrack
>>>separatedly in spite of the common code and not to waste so many bytes at
>>>every IPv4 connections for the sake of supporting IPv6.
>>>      
>>>
>>You mean the get_features stuff and seperate caches ? I'm don't like this
>>part very much, I thing Rusty's "structure extension stuff" (ct_extend)
>>is a nicer way to do this, although its probably not useable for the tuples.
>>    
>>
>
>Unfortunately I have to disagree.  I think I pointed it out in an
>earlier email, but I'm not sure whether it was on IRC or whether you've
>been on the list of recipients.
>
Hmm, I can't recall talking about this earlier.

>the nf_conntrack approach of having multiple slab caches of differently
>sized conntrack structures is better from a performance point of view.
>
>If you look at the typical applications, people will usually
>
>a) either use NAT on most/all of their connections
>b) or not use NAT on any of their connections
>
>Now why don't they (the 'b' users) recompile their kernel? Because they
>use vendor-kernels, and they compile with everything enabled.
>
>So what we end up if we use rusty's scheme for nat private data, is that
>we have at least one additional allocation, and two disjunct seperate
>pieces of memory (which will probably also introduce yet another cache
>miss).   This is acceptable for rare exceptional cases, not for 99.9% of
>your connections.
>
>So even if Rusty's architecture and API is cleaner, I'm very much in
>favour of the nf_conntrack design and Yasuyuki's current
>implementation.
>  
>
But unlike ct_extend, the get_features stuff is very unflexible. The
way it is currently implemented it only works for a single dynamically
allocated part without leaving holes, and it even it if could handle more,
they all need to be determined at allocation time, so a worst-case 
assumption
has to be made for anything depending on the ruleset. This is especially
a concern with modules and distribution-kernels, NAT could be loaded at any
time, so the memory needs to be allocated in advance.

>Now you can argue about conntrack/nat helper private data.  Here my
>argument is weaker, since normally you have something like < 1% of your
>connections with a helper.  For this I would be willing to accept the
>additional allocation and whatever.
>
>But, then think of a firewall in front of an FTP (IRC/...) Server, and
>your traffic pattern immediately looks like 50% of your connections (not
>talking about traffic) have helper private data... and my argument
>becomes stronger again.
>  
>
Sure, but helpers are slow anyway. The additional allocation is a one-time
effort, the two disjunct pieces of memory shouldn't hurt too much since
nf_conntrack is very large and covers multiple cache-lines already. Rusty
mentioned he got struct ip_conntrack down to ~150b, this makes it possible
to have a lot more active connections without exceeding the cache.

>>I think we should put ip_conntrack in maintenance mode, than we can
>>resync nf_conntrack and make changes like this before we submit it.
>>    
>>
>
>At last, we again agree :)
>  
>
One more thing I was thinking about. If we put ip_conntrack in maintenance
mode and probably deprecate it at some time, I see little reason to remove
ipchains compat just now. Why not just let it fade out together with
ip_conntrack once nf_conntrack is in ? I haven't checked if it was because
of incompatibilities or required changes with Rusty's latest work, if so
that would be a good reason.

Regards
Patrick

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-10 21:28       ` Harald Welte
  2005-01-14  2:45         ` Patrick McHardy
@ 2005-01-14  3:16         ` Patrick McHardy
  1 sibling, 0 replies; 23+ messages in thread
From: Patrick McHardy @ 2005-01-14  3:16 UTC (permalink / raw)
  To: Harald Welte
  Cc: Netfilter Development Mailinglist, Pablo Neira, Jozsef Kadlecsik

[-- Attachment #1: Type: text/plain, Size: 337 bytes --]

Harald Welte wrote:

>>I think we should put ip_conntrack in maintenance mode, than we can
>>resync nf_conntrack and make changes like this before we submit it.
>>
>
>At last, we again agree :)
>

I've commited a TODO list for nf_conntrack (just off the top of my head)
to svn. Any further suggestions are appreciated.

Regards
Patrick


[-- Attachment #2: TODO --]
[-- Type: text/plain, Size: 716 bytes --]

TODOs for nf_conntrack (last changed Jan 14 2005)
-------------------------------------------------

Some items are controversial, so make sure to get some feedback on
netfilter-develbefore working on any of these.

- Resync with latest ip_conntrack changes
- Function/structure name unification: s/nf_conntrack/nf_ct/
- Replace get_features stuff by ct_extend ?
- Don't waste space for IPv4 tuples (currently union with IPv6 tuples) ?
- kill ->confirm, nf_conntrack_confirm is generic
- Add nf_ct_get_afinfo for things like ip_ct_attach
- Some IPv4/IPv6 registered hook functions just call generic functions, this
  means even longer call-chains. Add family argument to hook functions and
  kill them ?
- IPv4 NAT


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack
  2005-01-14  2:45         ` Patrick McHardy
@ 2005-01-14  4:31           ` Yasuyuki KOZAKAI
  2005-01-14  7:01           ` nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter] Rusty Russell
  2005-01-14  8:31           ` Harald Welte
  2 siblings, 0 replies; 23+ messages in thread
From: Yasuyuki KOZAKAI @ 2005-01-14  4:31 UTC (permalink / raw)
  To: kaber; +Cc: laforge, pablo, rusty, netfilter-devel, kadlec


Hi,

From: Patrick McHardy <kaber@trash.net>
Date: Fri, 14 Jan 2005 03:45:44 +0100

> Harald Welte wrote:
> 
> >the nf_conntrack approach of having multiple slab caches of differently
> >sized conntrack structures is better from a performance point of view.
> >
> >If you look at the typical applications, people will usually
> >
> >a) either use NAT on most/all of their connections
> >b) or not use NAT on any of their connections
> >
> >Now why don't they (the 'b' users) recompile their kernel? Because they
> >use vendor-kernels, and they compile with everything enabled.
> >
> >So what we end up if we use rusty's scheme for nat private data, is that
> >we have at least one additional allocation, and two disjunct seperate
> >pieces of memory (which will probably also introduce yet another cache
> >miss).   This is acceptable for rare exceptional cases, not for 99.9% of
> >your connections.
> >
> >So even if Rusty's architecture and API is cleaner, I'm very much in
> >favour of the nf_conntrack design and Yasuyuki's current
> >implementation.
> >  
> >
> But unlike ct_extend, the get_features stuff is very unflexible. The
> way it is currently implemented it only works for a single dynamically
> allocated part without leaving holes, and it even it if could handle more,
> they all need to be determined at allocation time, so a worst-case 
> assumption
> has to be made for anything depending on the ruleset. This is especially
> a concern with modules and distribution-kernels, NAT could be loaded at any
> time, so the memory needs to be allocated in advance.

ct_extend is definitely flexible. But at the same time, I agree to use
multiple slab caches if we can improve performance.

I haven't deeply read ct_extend codes, but can we combine it and cache scheme ?

For example, allocate conntrack from cache at first. We can roughly find out
its size by checking l3 protocol, loaded modules, finding helpers which want
to inspect packet, and so on. And if it is not enough, we can allocate more
piecies.

Anyway, I think we have many chance to optimize it later.

So why don't we port ct_extend to nf_conntrack and then optimize it with slab
cache ?

-----------------------------------------------------------------
Yasuyuki KOZAKAI @ USAGI Project <yasuyuki.kozakai@toshiba.co.jp>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-14  2:45         ` Patrick McHardy
  2005-01-14  4:31           ` nf_conntrack Yasuyuki KOZAKAI
@ 2005-01-14  7:01           ` Rusty Russell
  2005-01-14  8:20             ` Patrick Schaaf
                               ` (2 more replies)
  2005-01-14  8:31           ` Harald Welte
  2 siblings, 3 replies; 23+ messages in thread
From: Rusty Russell @ 2005-01-14  7:01 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: Harald Welte, Netfilter Development Mailinglist, Pablo Neira,
	Jozsef Kadlecsik

On Fri, 2005-01-14 at 03:45 +0100, Patrick McHardy wrote:
> Harald Welte wrote:
> 
> >On Mon, Jan 10, 2005 at 09:31:45PM +0100, Patrick McHardy wrote:
> >  
> >
> >>>The infrastructure is there in the patch to support IPv4/IPv6 conntrack
> >>>separatedly in spite of the common code and not to waste so many bytes at
> >>>every IPv4 connections for the sake of supporting IPv6.
> >>>      
> >>>
> >>You mean the get_features stuff and seperate caches ? I'm don't like this
> >>part very much, I thing Rusty's "structure extension stuff" (ct_extend)
> >>is a nicer way to do this, although its probably not useable for the tuples.
> >>    
> >>
> >
> >Unfortunately I have to disagree.  I think I pointed it out in an
> >earlier email, but I'm not sure whether it was on IRC or whether you've
> >been on the list of recipients.
> >
> Hmm, I can't recall talking about this earlier.

It was IRC, and for IPv6 I agree with Harald that it's probably not the
right mechanism.  I prefer a simple discriminated union and two slab
caches (IPv4 and IPv6), which is hardcoded, but still fairly easy to
read.

Note that I don't think ct_extend should go in now: we need more
benchmarking and work (particularly locking).

> >Now you can argue about conntrack/nat helper private data.  Here my
> >argument is weaker, since normally you have something like < 1% of your
> >connections with a helper.  For this I would be willing to accept the
> >additional allocation and whatever.
> >
> >But, then think of a firewall in front of an FTP (IRC/...) Server, and
> >your traffic pattern immediately looks like 50% of your connections (not
> >talking about traffic) have helper private data... and my argument
> >becomes stronger again.

I don't think so.  While you will have to do an additional allocation on
connection creation, the only real per-packet cost is the extra
cacheline (the memchr to find the data in the extend is deliberately
trivial).   They'll gain more by removing the copying into buffer on
those helpers, and the lock that goes with it.

Current size with ct_extend is about 180 bytes (192 in nfsim, which has
larger struct timer_list).  128 is the goal.  Potential areas for
shrinkage using 32-bit machine numbers (I'm not saying these are all
good ideas)

	timeout: move to a 32-bit seconds counter, and use a sweep-method to
clean up connections rather than a timer per conn.  Save 28 bytes.

	counters: use 32-bits for packet counts.  Save 8 bytes.

	ip_conntrack_proto: merge fields and flags.  Save 8 bytes.

	ip_nat_info: use hash tree.  Save 8 bytes.

	tuplehash: use hash tree, put proto in status word.  Save 24 bytes.

That gets us to 104 bytes without doing anything too insane.

> >>I think we should put ip_conntrack in maintenance mode, than we can
> >>resync nf_conntrack and make changes like this before we submit it.
> >>    
> >>
> >
> >At last, we again agree :)

Does that mean I should *not* send the expectation and NAT patches to
DaveM now?  (Posted before the ct_extend patches):

# Fix overlapping expectations in existing expectation code
expectation_overlap_fix-plus-put.patch.gz
# Call NAT Helper Modules Directly from Conntrack Modules, fixup FTP
direct_nat_expect.patch.gz
# Fix Up IRC, AMANDA, TFTP and SNMP Helpers
update-other-helpers.patch.gz
# Simplify expect handling
simplify-expect.patch.gz
# Make Expectations Timeouts Compulsory 
compulsory_timers.patch.gz
# Remove remaining multirange related code
cleanup-manips-left.patch.gz
# Adrian Bunk's Cleanup Patches
adrian-bunk.patch.gz
# Remove manip array from conntrack entry
remove-nat-manips.patch.gz
# Remove ip_conntrack_tuple_hash "ctrack" pointer
remove_ctrack_backpointer.patch.gz
# Use a bit in conntrack status to indicate sequence number adjustment
nat-seq_adjust-bit.patch.gz
# Get rid of initialized in nat structure: use conntrack status bits
nat-initialized-in-status.patch.gz

Thanks,
Rusty.
-- 
A bad analogy is like a leaky screwdriver -- Richard Braakman

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-14  7:01           ` nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter] Rusty Russell
@ 2005-01-14  8:20             ` Patrick Schaaf
  2005-01-15 18:18               ` KOVACS Krisztian
  2005-01-14  8:37             ` Harald Welte
  2005-01-14 17:52             ` Patrick McHardy
  2 siblings, 1 reply; 23+ messages in thread
From: Patrick Schaaf @ 2005-01-14  8:20 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Harald Welte, Netfilter Development Mailinglist, Pablo Neira,
	Patrick McHardy, Jozsef Kadlecsik

> 	timeout: move to a 32-bit seconds counter, and use a sweep-method to
> clean up connections rather than a timer per conn.  Save 28 bytes.

I don't like sweep...

This reminds me: what about my "timer management frequency reduction"
from longtimeago? Basic points:

1) Note that the normal, tcp stream, per-packet timer usually only increases,
   and is way out in the future (for ESTABLISHED connections).
2) Meditate
3) Have a store area for the jiffies target, per conntrack, which is
   independant of the kernel timer.
4) When the timeout jiffies target is set or modified, and the kernel timer
   is not already running, start it up, as usual. Also store the jiffies
   target in the conntrack.
5) When the timeout jiffies target changes, and we have an already running
   kernel timer, compare stored jiffies target with new jiffies target:
5a) New target is smaller than old target: store new target, modify kernel
    timer, as usual.
5b) New target is larger than old target: store new target. DONE.
6) when timeout happens, compare stored jiffies target to $now:
6a) If stored target and current time match (or stored is older),
    run the timer activity we have now, i.e. destroy conntrack, usually.
6b) If the stored target is in the future, restart kernel timer
    to the target time. DO NOT fire traditional timer activity.
7) Meditate
8) Note that the normal, tcp stream, per-packet timer usually only increases,
   and is way out in the future (for ESTABLISHED connections).

We'll save the whole kernel timer modification for each packet after the first,
for all usual ESTABLISHED connections. They'll only rearm their timer once
every few days.

I had this coded up and working (for some hours, on my box). Patch must be
somewhere in the archives...

> 	ip_nat_info: use hash tree.  Save 8 bytes.
> 	tuplehash: use hash tree, put proto in status word.  Save 24 bytes.

Umm. What exactly is meant with "hash tree", here?

best regards
  Patrick

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-14  2:45         ` Patrick McHardy
  2005-01-14  4:31           ` nf_conntrack Yasuyuki KOZAKAI
  2005-01-14  7:01           ` nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter] Rusty Russell
@ 2005-01-14  8:31           ` Harald Welte
  2005-01-14 18:00             ` Patrick McHardy
  2 siblings, 1 reply; 23+ messages in thread
From: Harald Welte @ 2005-01-14  8:31 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: Rusty Russell, Netfilter Development Mailinglist, Pablo Neira,
	Jozsef Kadlecsik

[-- Attachment #1: Type: text/plain, Size: 2661 bytes --]

On Fri, Jan 14, 2005 at 03:45:44AM +0100, Patrick McHardy wrote:

> But unlike ct_extend, the get_features stuff is very unflexible. The
> way it is currently implemented it only works for a single dynamically
> allocated part without leaving holes, and it even it if could handle more,
> they all need to be determined at allocation time, so a worst-case 
> assumption
> has to be made for anything depending on the ruleset. This is especially
> a concern with modules and distribution-kernels, NAT could be loaded at any
> time, so the memory needs to be allocated in advance.

Nobody should ever load iptable_nat if it isn't actually used.  That's
what module autoloading is about.  If a distribution does this, it is
straight forward broken.

Also, I haven't seen this in practise so far, so it's not a valid
optimization target from my point of view.

> >But, then think of a firewall in front of an FTP (IRC/...) Server, and
> >your traffic pattern immediately looks like 50% of your connections (not
> >talking about traffic) have helper private data... and my argument
> >becomes stronger again.
> >
> Sure, but helpers are slow anyway. The additional allocation is a one-time
> effort, the two disjunct pieces of memory shouldn't hurt too much since
> nf_conntrack is very large and covers multiple cache-lines already. Rusty
> mentioned he got struct ip_conntrack down to ~150b, this makes it possible
> to have a lot more active connections without exceeding the cache.

Ok, agreed for the helper case.  But my argument for NAT still holds, at
least as far as I see.  Also, stuff like the accounting counters are a
similar case.

So maybe in the end we need 'features' for those, and ct_extend for
helper related data?

> One more thing I was thinking about. If we put ip_conntrack in maintenance
> mode and probably deprecate it at some time, I see little reason to remove
> ipchains compat just now. Why not just let it fade out together with
> ip_conntrack once nf_conntrack is in ? I haven't checked if it was because
> of incompatibilities or required changes with Rusty's latest work, if so
> that would be a good reason.

I don't have a particular favour for or against any of the proposed
solutions.

> Regards
> Patrick

-- 
- Harald Welte <laforge@netfilter.org>             http://www.netfilter.org/
============================================================================
  "Fragmentation is like classful addressing -- an interesting early
   architectural error that shows how much experimentation was going
   on while IP was being designed."                    -- Paul Vixie

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-14  7:01           ` nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter] Rusty Russell
  2005-01-14  8:20             ` Patrick Schaaf
@ 2005-01-14  8:37             ` Harald Welte
  2005-01-14 10:22               ` Rusty Russell
  2005-01-14 18:02               ` Patrick McHardy
  2005-01-14 17:52             ` Patrick McHardy
  2 siblings, 2 replies; 23+ messages in thread
From: Harald Welte @ 2005-01-14  8:37 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Netfilter Development Mailinglist, Pablo Neira, Patrick McHardy,
	Jozsef Kadlecsik

[-- Attachment #1: Type: text/plain, Size: 1920 bytes --]

On Fri, Jan 14, 2005 at 06:01:42PM +1100, Rusty Russell wrote:

> It was IRC, and for IPv6 I agree with Harald that it's probably not the
> right mechanism.  I prefer a simple discriminated union and two slab
> caches (IPv4 and IPv6), which is hardcoded, but still fairly easy to
> read.

so what about the nat_info ?  If you load iptable_nat, it will be needed
for all connections...

> > >But, then think of a firewall in front of an FTP (IRC/...) Server, and
> > >your traffic pattern immediately looks like 50% of your connections (not
> > >talking about traffic) have helper private data... and my argument
> > >becomes stronger again.
> 
> I don't think so.  While you will have to do an additional allocation on
> connection creation, the only real per-packet cost is the extra
> cacheline (the memchr to find the data in the extend is deliberately
> trivial).   They'll gain more by removing the copying into buffer on
> those helpers, and the lock that goes with it.

ok, convinced for helpers.

> Does that mean I should *not* send the expectation and NAT patches to
> DaveM now?  (Posted before the ct_extend patches):

I was wondering all the time why we keep talking about maintainance mode
for ip_conntrack and the coincidential spike in new development for
ip_conntrack.

I think if we push it into ip_conntrack, we need to make sure
nf_conntrack is in sync at the same time - otherwise poor Yasuyuki has
to (again) sync all the changes... I think he did that often enough...

> Thanks,
> Rusty.

-- 
- Harald Welte <laforge@netfilter.org>             http://www.netfilter.org/
============================================================================
  "Fragmentation is like classful addressing -- an interesting early
   architectural error that shows how much experimentation was going
   on while IP was being designed."                    -- Paul Vixie

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-14  8:37             ` Harald Welte
@ 2005-01-14 10:22               ` Rusty Russell
  2005-01-14 18:02               ` Patrick McHardy
  1 sibling, 0 replies; 23+ messages in thread
From: Rusty Russell @ 2005-01-14 10:22 UTC (permalink / raw)
  To: Harald Welte
  Cc: Netfilter Development Mailinglist, Pablo Neira, Patrick McHardy,
	Jozsef Kadlecsik

On Fri, 2005-01-14 at 09:37 +0100, Harald Welte wrote:
> On Fri, Jan 14, 2005 at 06:01:42PM +1100, Rusty Russell wrote:
> 
> > It was IRC, and for IPv6 I agree with Harald that it's probably not the
> > right mechanism.  I prefer a simple discriminated union and two slab
> > caches (IPv4 and IPv6), which is hardcoded, but still fairly easy to
> > read.
> 
> so what about the nat_info ?  If you load iptable_nat, it will be needed
> for all connections...

My current patches reduce the nat_info to one list_head.  If we were to
use a hash tree as per Gandalf's recent work, it'd be 0.  IMHO it's not
worth the pain even if it's still a list_head.

> > Does that mean I should *not* send the expectation and NAT patches to
> > DaveM now?  (Posted before the ct_extend patches):
> 
> I was wondering all the time why we keep talking about maintainance mode
> for ip_conntrack and the coincidential spike in new development for
> ip_conntrack.

That's why I asked.  Unfortunately, I only just found time to do some
netfilter work 8(

I definitely think ct_extend should wait for nf_conntrack (but I wanted
the patches out there, since I think they're complimentary).  For the
others, I think it's better to merge them now so nf_conntrack only has
to sync once (presumably it will have to sync with some of the existing
changes already), and plan nf_conntrack for 2.6.11.

Rusty.
-- 
A bad analogy is like a leaky screwdriver -- Richard Braakman

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-14  7:01           ` nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter] Rusty Russell
  2005-01-14  8:20             ` Patrick Schaaf
  2005-01-14  8:37             ` Harald Welte
@ 2005-01-14 17:52             ` Patrick McHardy
  2 siblings, 0 replies; 23+ messages in thread
From: Patrick McHardy @ 2005-01-14 17:52 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Harald Welte, Netfilter Development Mailinglist, Pablo Neira,
	Jozsef Kadlecsik

Rusty Russell wrote:

>It was IRC, and for IPv6 I agree with Harald that it's probably not the
>right mechanism.  I prefer a simple discriminated union and two slab
>caches (IPv4 and IPv6), which is hardcoded, but still fairly easy to
>read.
>  
>
Sounds good.

>Does that mean I should *not* send the expectation and NAT patches to
>DaveM now?  (Posted before the ct_extend patches):
>
No, I just meant we need to stop at some point some so nf_conntrack has a
chance of beeing resynced.

Regards
Patrick

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-14  8:31           ` Harald Welte
@ 2005-01-14 18:00             ` Patrick McHardy
  0 siblings, 0 replies; 23+ messages in thread
From: Patrick McHardy @ 2005-01-14 18:00 UTC (permalink / raw)
  To: Harald Welte
  Cc: Rusty Russell, Netfilter Development Mailinglist, Pablo Neira,
	Jozsef Kadlecsik

Harald Welte wrote:

>Nobody should ever load iptable_nat if it isn't actually used.  That's
>what module autoloading is about.  If a distribution does this, it is
>straight forward broken.
>
I was talking about autoloading or loading by other means. If the NAT
module is _present_ but not loaded, the get_features stuff needs to
assume it could be loaded any time and thus reserve memory in the
conntrack tuple. Or reallocate on module load.

>
>Also, I haven't seen this in practise so far, so it's not a valid
>optimization target from my point of view.
>
>
>>Sure, but helpers are slow anyway. The additional allocation is a one-time
>>effort, the two disjunct pieces of memory shouldn't hurt too much since
>>nf_conntrack is very large and covers multiple cache-lines already. Rusty
>>mentioned he got struct ip_conntrack down to ~150b, this makes it possible
>>to have a lot more active connections without exceeding the cache.
>>
>
>Ok, agreed for the helper case.  But my argument for NAT still holds, at
>least as far as I see.  Also, stuff like the accounting counters are a
>similar case.
>
>So maybe in the end we need 'features' for those, and ct_extend for
>helper related data?
>
I think helper data should go in ct_extend anyway. But since get_features
needs to allocate the memory for ip_nat_info anyways, I think we should go
with Rusty's propsal of two hardcoded slabcaches (IPv4/IPv6).

>>One more thing I was thinking about. If we put ip_conntrack in maintenance
>>mode and probably deprecate it at some time, I see little reason to remove
>>ipchains compat just now. Why not just let it fade out together with
>>ip_conntrack once nf_conntrack is in ? I haven't checked if it was because
>>of incompatibilities or required changes with Rusty's latest work, if so
>>that would be a good reason.
>>
>
>I don't have a particular favour for or against any of the proposed
>solutions.
>
Well, it's gone now anyway, lets just leave it this way :)

Regards
Patrick

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-14  8:37             ` Harald Welte
  2005-01-14 10:22               ` Rusty Russell
@ 2005-01-14 18:02               ` Patrick McHardy
  1 sibling, 0 replies; 23+ messages in thread
From: Patrick McHardy @ 2005-01-14 18:02 UTC (permalink / raw)
  To: Harald Welte
  Cc: Netfilter Development Mailinglist, Rusty Russell, Pablo Neira,
	Jozsef Kadlecsik

Harald Welte wrote:

>>Does that mean I should *not* send the expectation and NAT patches to
>>DaveM now?  (Posted before the ct_extend patches):
>>
>
>I was wondering all the time why we keep talking about maintainance mode
>for ip_conntrack and the coincidential spike in new development for
>ip_conntrack.
>
>I think if we push it into ip_conntrack, we need to make sure
>nf_conntrack is in sync at the same time - otherwise poor Yasuyuki has
>to (again) sync all the changes... I think he did that often enough...
>
I'm willing to take care of this after Rusty's patches are in.

Regards
Patrick

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-14  8:20             ` Patrick Schaaf
@ 2005-01-15 18:18               ` KOVACS Krisztian
  2005-01-16 16:09                 ` Jozsef Kadlecsik
  0 siblings, 1 reply; 23+ messages in thread
From: KOVACS Krisztian @ 2005-01-15 18:18 UTC (permalink / raw)
  To: Patrick Schaaf
  Cc: Netfilter Development Mailinglist, Harald Welte, Rusty Russell,
	Pablo Neira, Jozsef Kadlecsik, Patrick McHardy

  Hi,

2005-01-14, p keltezéssel 09.20-kor Patrick Schaaf ezt írta:
> I don't like sweep...

  I don't like it either. The dynamic timer management of Linux is quite
fast and efficient, so if we'd like to rely on our own timeout
management facility it won't be trivial either. (Although the 'sweep'
timer would run much less frequently of course, so it's not so clear...)

> This reminds me: what about my "timer management frequency reduction"
> from longtimeago? Basic points:

  If my memory serves me well, Jozsef made some tests using a similar
patch (or maybe yours?), but the results were quite controversial.
Jozsef?

-- 
  Krisztian Kovacs

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter]
  2005-01-15 18:18               ` KOVACS Krisztian
@ 2005-01-16 16:09                 ` Jozsef Kadlecsik
  0 siblings, 0 replies; 23+ messages in thread
From: Jozsef Kadlecsik @ 2005-01-16 16:09 UTC (permalink / raw)
  To: KOVACS Krisztian; +Cc: Netfilter Development Mailinglist, pasztor

On Sat, 15 Jan 2005, KOVACS Krisztian wrote:

> > This reminds me: what about my "timer management frequency reduction"
> > from longtimeago? Basic points:
>
>   If my memory serves me well, Jozsef made some tests using a similar
> patch (or maybe yours?), but the results were quite controversial.

Yes, but that was probably due to my crappy code.

There is a plan to repeate conntrack performance tests in spring, in a
real environment: ~30 machines as server on one side and ~100 machine as
client on the other side, thanks to Gyuri Pasztor. There are some
conditions we have to satisfy (the machines are available for testings at
nights and weekends only) but that'll be fine. They still have a lot of
work to put the machines in normal production tought, but I hope real
soon we can work on the ramdisk images for the servers/clients, on the
smooth switching back and forth between testing/production mode, etc.

Any hints on performance tunings at Gb lines, better metodology that what
was used previously (httperf as traffic generator), suggestions on what
patches/patch sets to test are welcomed!

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlec@sunserv.kfki.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : KFKI Research Institute for Particle and Nuclear Physics
          H-1525 Budapest 114, POB. 49, Hungary

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2005-01-16 16:09 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-01-09 22:23 [PATCH 1/4] RFC: fast string matching infrastrure for netfilter Pablo Neira
2005-01-09 23:10 ` Patrick McHardy
2005-01-09 23:19   ` Pablo Neira
2005-01-10 19:54   ` nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter] Jozsef Kadlecsik
2005-01-10 20:31     ` Patrick McHardy
2005-01-10 21:28       ` Harald Welte
2005-01-14  2:45         ` Patrick McHardy
2005-01-14  4:31           ` nf_conntrack Yasuyuki KOZAKAI
2005-01-14  7:01           ` nf_conntrack [was Re: [PATCH 1/4] RFC: fast string matching infrastrure for netfilter] Rusty Russell
2005-01-14  8:20             ` Patrick Schaaf
2005-01-15 18:18               ` KOVACS Krisztian
2005-01-16 16:09                 ` Jozsef Kadlecsik
2005-01-14  8:37             ` Harald Welte
2005-01-14 10:22               ` Rusty Russell
2005-01-14 18:02               ` Patrick McHardy
2005-01-14 17:52             ` Patrick McHardy
2005-01-14  8:31           ` Harald Welte
2005-01-14 18:00             ` Patrick McHardy
2005-01-14  3:16         ` Patrick McHardy
2005-01-10 21:20     ` Harald Welte
2005-01-10  8:49 ` [PATCH 1/4] RFC: fast string matching infrastrure for netfilter Sven Schuster
2005-01-10 23:18   ` Pablo Neira
2005-01-10 10:06 ` Harald Welte

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.