Re: [PATCH] new match extension `flow'

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Pablo Neira <pablo@eurodev.net>
To: Josh Samuelson <josamue1@wsc.edu>
Cc: netfilter-devel@lists.netfilter.org
Subject: Re: [PATCH] new match extension `flow'
Date: Sat, 13 Nov 2004 23:12:35 +0100	[thread overview]
Message-ID: <419686D3.3070600@eurodev.net> (raw)
In-Reply-To: <20041104022039.GA25749@wsc.edu>

Josh,

Some minor comments about the code that I had in mind.

Josh Samuelson wrote:

>diff -Pru linux-2.6.9/include/linux/netfilter_ipv4/ip_cte_flow.h linux-2.6.9-flow/include/linux/netfilter_ipv4/ip_cte_flow.h
>--- linux-2.6.9/include/linux/netfilter_ipv4/ip_cte_flow.h	1969-12-31 18:00:00.000000000 -0600
>+++ linux-2.6.9-flow/include/linux/netfilter_ipv4/ip_cte_flow.h	2004-11-03 19:10:13.000000000 -0600
>@@ -0,0 +1,28 @@
>+#ifndef _IP_CTE_FLOW_H
>+#define _IP_CTE_FLOW_H
>+
>+#define BIT_SET(b) (1 << (b))
>+#define INCREMENT_WITHOUT_OVERFLOW(c) if (c < ~(1 << ((sizeof(c) * 8) - 1))) c++
>+
>+/* A `ip_cte_flow' is a structure containing the IP connection count
>+   on various IP protocols.
>+*/
>+
>+struct ip_cte_flow
>+{
>+	 struct list_head list;
>+	 u_int32_t ip_ct_dir_original_ip,
>+		   ip,
>+		   icmp,
>+		   tcp,
>+		   udp;
>+};
>+
>+extern struct list_head *ip_cte_flow_hash;
>+extern unsigned int ip_cte_flow_htable_size;
>+DECLARE_RWLOCK_EXTERN(ip_cte_flow_lock);
>+extern int ip_cte_flow_register_notifier(struct notifier_block *nb);
>+extern int ip_cte_flow_unregister_notifier(struct notifier_block *nb);
>+extern u_int32_t ip_cte_hash_flow_ip(u_int32_t ip);
>+
>+#endif /* _IP_CTE_FLOW_H */
>diff -Pru linux-2.6.9/include/linux/netfilter_ipv4/ipt_flow.h linux-2.6.9-flow/include/linux/netfilter_ipv4/ipt_flow.h
>--- linux-2.6.9/include/linux/netfilter_ipv4/ipt_flow.h	1969-12-31 18:00:00.000000000 -0600
>+++ linux-2.6.9-flow/include/linux/netfilter_ipv4/ipt_flow.h	2004-11-03 17:02:37.000000000 -0600
>@@ -0,0 +1,41 @@
>+#ifndef _IPT_FLOW_H
>+#define _IPT_FLOW_H
>+
>+#define BIT_SET(b) (1 << (b))
>  
>

What if we use test_bit instead?

>+
>+typedef enum
>+{
>+	IPFLOW_IP,
>+	IPFLOW_ICMP,
>+	IPFLOW_TCP,
>+	IPFLOW_UDP,
>+	IPFLOW_NETWORK_MASK = 31
>+} ipflow_bits_t;
>+
>+struct ipt_flow_nm
>+{
>+	u_int32_t ip,
>+		  icmp,
>+		  tcp,
>+		  udp;
>+};
>+
>+struct ipt_flow_info
>+{
>+#ifdef __KERNEL__
>+	struct list_head list;
>+#else
>+	struct
>+	{
>+		void *next,
>+		     *prev;
>+	} list;
>+#endif
>+	u_int32_t proto,
>+		  network,
>+		  mask,
>+		  max;
>+	struct ipt_flow_nm *nm;
>+};
>+
>+#endif /* IPT_FLOW_H */
>diff -Pru linux-2.6.9/net/ipv4/netfilter/Kconfig linux-2.6.9-flow/net/ipv4/netfilter/Kconfig
>--- linux-2.6.9/net/ipv4/netfilter/Kconfig	2004-10-18 16:54:55.000000000 -0500
>+++ linux-2.6.9-flow/net/ipv4/netfilter/Kconfig	2004-11-02 11:01:58.000000000 -0600
>@@ -32,6 +32,28 @@
> 
> 	  If unsure, say `N'.
> 
>+config IP_NF_CONNTRACK_EVENTS
>+	bool "Connection tracking events"
>+	depends on IP_NF_CONNTRACK
>+	help
>+	  If unsure, say `N'.
>+	  
>+config IP_NF_CTE_FLOW
>+	tristate "Connection tracking events: protocol flow counters (EXPERIMENTAL)"
>+	depends on IP_NF_CONNTRACK_EVENTS && EXPERIMENTAL
>+	help
>+	  This option uses the connection tracking event notifiers to
>+	  keep protocol flow counters indexed by the original direction
>+	  source IP address.  The protocol counters include generic IP,
>+	  ICMP, TCP and UDP.
>+
>+	  These counters can be read from "/proc/net/ip_cte_flow".
>+
>+	  These counters can be used in the "protocol flow counters match
>+	  support", see below.
>+
>+	  To compile it as a module, choose M here.  If unsure, say N.
>+
> config IP_NF_CT_PROTO_SCTP
> 	tristate  'SCTP protocol connection tracking support (EXPERIMENTAL)'
> 	depends on IP_NF_CONNTRACK && EXPERIMENTAL
>@@ -279,6 +301,22 @@
> 
> 	  To compile it as a module, choose M here.  If unsure, say N.
> 
>+config IP_NF_CTE_MATCH_FLOW
>+       tristate 'protocol flow counters match support (EXPERIMENTAL)'
>+       depends on IP_NF_IPTABLES && IP_NF_CTE_FLOW && (IP_NF_FILTER || IP_NF_NAT || IP_NF_MANGLE) && EXPERIMENTAL
>+       help
>+         `flow' matching allows you to match a packet when the specified
>+         number of known protocol connections from a original direction
>+         source IP address is exceeded.  The matches can be made against
>+         generic IP, ICMP, TCP or UDP flow counters.  This match can be
>+         used in all tables but raw.
>+
>+         For example, this match allows you to control the number and type
>+         of connections (flows) from hosts in a known local network routing
>+         through the machine.
>+
>+         To compile it as a module, choose M here.  If unsure, say N.
>+
> config IP_NF_MATCH_OWNER
> 	tristate "Owner match support"
> 	depends on IP_NF_IPTABLES
>diff -Pru linux-2.6.9/net/ipv4/netfilter/Makefile linux-2.6.9-flow/net/ipv4/netfilter/Makefile
>--- linux-2.6.9/net/ipv4/netfilter/Makefile	2004-10-18 16:53:43.000000000 -0500
>+++ linux-2.6.9-flow/net/ipv4/netfilter/Makefile	2004-11-02 10:58:21.000000000 -0600
>@@ -34,6 +34,9 @@
> obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
> obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o
> 
>+# connection tracking event objects
>+obj-$(CONFIG_IP_NF_CTE_FLOW) += ip_cte_flow.o
>+
> # generic IP tables 
> obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
> 
>@@ -67,6 +70,7 @@
> obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
> obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o
> obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o
>+obj-$(CONFIG_IP_NF_CTE_MATCH_FLOW) += ipt_flow.o
> 
> # targets
> obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
>diff -Pru linux-2.6.9/net/ipv4/netfilter/ip_cte_flow.c linux-2.6.9-flow/net/ipv4/netfilter/ip_cte_flow.c
>--- linux-2.6.9/net/ipv4/netfilter/ip_cte_flow.c	1969-12-31 18:00:00.000000000 -0600
>+++ linux-2.6.9-flow/net/ipv4/netfilter/ip_cte_flow.c	2004-11-03 20:12:09.000000000 -0600
>@@ -0,0 +1,377 @@
>+/* Kernel module to track [IP|ICMP|TCP|UDP] flow counts. */
>+
>+/* (C) 2004 Josh Samuelson <josamue1@wsc.edu>
>+ *
>+ * This program is free software; you can redistribute it and/or modify
>+ * it under the terms of the GNU General Public License version 2 as
>+ * published by the Free Software Foundation.
>+ */
>+
>+#include <linux/module.h>
>+#include <linux/vmalloc.h>
>+#include <linux/jhash.h>
>+#include <linux/random.h>
>+#include <linux/in.h>
>+#include <linux/notifier.h>
>+#ifdef CONFIG_PROC_FS
>+#include <linux/proc_fs.h>
>+#include <linux/seq_file.h>
>+#endif
>+#include <linux/netfilter_ipv4/ip_conntrack.h>
>+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
>+#include <linux/netfilter_ipv4/ip_cte_flow.h>
>+
>+MODULE_LICENSE("GPL");
>+MODULE_AUTHOR("Josh Samuelson <josamue1@wsc.edu>");
>+MODULE_DESCRIPTION("protocol flow count tracking via connection tracking events");
>+
>+DECLARE_RWLOCK(ip_cte_flow_lock);
>+struct list_head *ip_cte_flow_hash;
>+unsigned int ip_cte_flow_htable_size = 0;
>+/* static atomic_t ip_cte_flow_count = ATOMIC_INIT(0); */
>+static int ip_cte_flow_hash_rnd;
>+static kmem_cache_t *ip_cte_flow_cachep;
>+static struct notifier_block *ip_cte_flow_nb_chain;
>+
>+#ifdef CONFIG_PROC_FS
>+static void *ip_cte_flow_seq_start(struct seq_file *s, loff_t *pos)
>+{
>+	if (*pos >= ip_cte_flow_htable_size)
>+		return NULL;
>+	return &ip_cte_flow_hash[*pos];
>+}
>+  
>+static void ip_cte_flow_seq_stop(struct seq_file *s, void *v)
>+{
>+}
>+
>+static void *ip_cte_flow_seq_next(struct seq_file *s, void *v, loff_t *pos)
>+{
>+	(*pos)++;
>+	if (*pos >= ip_cte_flow_htable_size)
>+		return NULL;
>+	return &ip_cte_flow_hash[*pos];
>+}
>+
>+static int ip_cte_flow_seq_show(struct seq_file *s, void *v)
>+{
>+	int ret = 0;
>+	struct list_head *list;
>+	struct ip_cte_flow *flow;
>+
>+	list = (struct list_head *) v;
>+	list = list->next;
>+	READ_LOCK(&ip_cte_flow_lock);
>+	list_for_each(list, (struct list_head *) v) {
>+		flow = (struct ip_cte_flow *) list;
>+		if (seq_printf(s, "%u.%u.%u.%u IP: %u ICMP: %hu "
>+				  "TCP: %hu UDP: %hu\n",
>+				  NIPQUAD(flow->ip_ct_dir_original_ip),
>+				  flow->ip,
>+				  flow->icmp,
>+				  flow->tcp,
>+				  flow->udp)) {
>+			ret = -ENOSPC;
>+			break;
>+		}
>+	}
>+	READ_UNLOCK(&ip_cte_flow_lock);
>+	return ret;
>+}
>+  
>+static struct seq_operations ip_cte_flow_seq_ops = {
>+	.start = ip_cte_flow_seq_start,
>+	.next  = ip_cte_flow_seq_next,
>+	.stop  = ip_cte_flow_seq_stop,
>+	.show  = ip_cte_flow_seq_show
>+};
>+
>+static int ip_cte_flow_open(struct inode *inode, struct file *file)
>+{
>+	return seq_open(file, &ip_cte_flow_seq_ops);
>+}
>+
>+static struct file_operations ip_cte_flow_file_ops = {
>+	.owner   = THIS_MODULE,
>+	.open    = ip_cte_flow_open,
>+	.read    = seq_read,
>+	.llseek  = seq_lseek,
>+	.release = seq_release
>+};
>+#endif /* CONFIG_PROC_FS */
>+  
>+int
>+ip_cte_flow_register_notifier(struct notifier_block *nb)
>+{
>+	return notifier_chain_register(&ip_cte_flow_nb_chain, nb);
>+}
>+
>+int
>+ip_cte_flow_unregister_notifier(struct notifier_block *nb)
>+{
>+	return notifier_chain_unregister(&ip_cte_flow_nb_chain, nb);
>+}
>+
>+void
>+ip_cte_flow_event(enum ip_conntrack_events event, struct ip_conntrack *ct)
>+{
>+	notifier_call_chain(&ip_cte_flow_nb_chain, event, ct);
>+}
>+
>+u_int32_t
>+ip_cte_hash_flow_ip(u_int32_t ip)
>+{
>+	return(jhash_1word(ip, ip_cte_flow_hash_rnd) % ip_cte_flow_htable_size);
>+}
>+
>+static int
>+ip_cte_flow_inc(struct ip_conntrack_tuple_hash *hash)
>+{
>+	unsigned int flow_hash;
>+	u_int32_t ip;
>+	struct list_head *list;
>+	struct ip_cte_flow *flow = NULL;
>+
>+	ip = hash->tuple.src.ip;
>+	flow_hash = ip_cte_hash_flow_ip(ip);
>+	READ_LOCK(&ip_cte_flow_lock);
>+	list_for_each(list, &ip_cte_flow_hash[flow_hash]) {
>+		if (((struct ip_cte_flow *) list)->ip_ct_dir_original_ip ==
>+		    ip) {
>+			flow = (struct ip_cte_flow *) list;
>+			break;
>+		}
>+	}
>+	READ_UNLOCK(&ip_cte_flow_lock);
>  
>

"Code which grabs a read lock, searches a list, fails to find what if 
wants, drops the read lock, grabs a write lock and insert the object has 
a race condition". --Rusty's unreliable guide to locking

So we have to fix this :)

>+	WRITE_LOCK(&ip_cte_flow_lock);
>+	if (!flow) {
>+		flow = kmem_cache_alloc(ip_cte_flow_cachep, GFP_ATOMIC);
>+		if (flow) {
>+			/* atomic_inc(&ip_cte_flow_count); */
>+			memset(flow, 0, sizeof(struct ip_cte_flow));
>+			flow->ip_ct_dir_original_ip = ip;
>+			list = (struct list_head *) flow;
>+			list_add(list, &ip_cte_flow_hash[flow_hash]);
>+		}
>+	}
>+	if (flow) {
>+		INCREMENT_WITHOUT_OVERFLOW(flow->ip);
>+		switch (hash->tuple.dst.protonum) {
>+			case IPPROTO_ICMP:
>+				INCREMENT_WITHOUT_OVERFLOW(flow->icmp);
>+				break;
>+			case IPPROTO_TCP:
>+				INCREMENT_WITHOUT_OVERFLOW(flow->tcp);
>+				break;
>+			case IPPROTO_UDP:
>+				INCREMENT_WITHOUT_OVERFLOW(flow->udp);
>+				break;
>+		}
>+	}
>+	WRITE_UNLOCK(&ip_cte_flow_lock);
>+	return(flow == NULL);
>+}
>+
>+static void
>+ip_cte_flow_dec(struct ip_conntrack_tuple_hash *hash)
>+{
>+	unsigned int flow_hash;
>+	u_int32_t ip;
>+	struct list_head *list;
>+	struct ip_cte_flow *flow = NULL;
>+
>+	ip = hash->tuple.src.ip;
>+	flow_hash = ip_cte_hash_flow_ip(ip);
>+	READ_LOCK(&ip_cte_flow_lock);
>+	list_for_each(list, &ip_cte_flow_hash[flow_hash]) {
>+		if (((struct ip_cte_flow *) list)->ip_ct_dir_original_ip ==
>+		    ip) {
>+			flow = (struct ip_cte_flow *) list;
>+			break;
>+		}
>+	}
>+	READ_UNLOCK(&ip_cte_flow_lock);
>  
>

Same problem

>+	if (flow) {
>+		WRITE_LOCK(&ip_cte_flow_lock);
>+		if(flow->ip)
>+			flow->ip--;
>+		switch (hash->tuple.dst.protonum) {
>+			case IPPROTO_ICMP:
>+				if (flow->icmp)
>+					flow->icmp--;
>+				break;
>+			case IPPROTO_TCP:
>+				if (flow->tcp)
>+					flow->tcp--;
>+				break;
>+			case IPPROTO_UDP:
>+				if (flow->udp)
>+					flow->udp--;
>+				break;
>+		}
>+		if (flow->ip == 0) {
>+			list = (struct list_head *) flow;
>+			list_del(list);
>+			kmem_cache_free(ip_cte_flow_cachep, flow);
>+			/* atomic_dec(&ip_cte_flow_count); */
>+		}
>+		WRITE_UNLOCK(&ip_cte_flow_lock);
>+	} else {
>+		printk(KERN_WARNING "conntrack being destroyed, "
>+				    "yet not found on flow list\n"
>+				    "%u src: %u.%u.%u.%u dst: %u.%u.%u.%u\n",
>+				    hash->tuple.dst.protonum,
>+				    NIPQUAD(hash->tuple.src.ip),
>+				    NIPQUAD(hash->tuple.dst.ip));
>+	}
>+}
>+
>+static void
>+ip_cte_flow_existing(void)
>+{
>+	unsigned int i;
>+	struct list_head *list;
>+	struct ip_conntrack_tuple_hash *hash;
>+
>+	READ_LOCK(&ip_conntrack_lock);
>+	for (i = 0; i < ip_conntrack_htable_size; i++) {
>+		list_for_each(list, &ip_conntrack_hash[i]) {
>+			hash = (struct ip_conntrack_tuple_hash *) list;
>+			if (!DIRECTION(hash))
>+				ip_cte_flow_inc(hash);
>+		}
>+	}
>+	READ_UNLOCK(&ip_conntrack_lock);
>+}
>+
>+static void
>+ip_cte_flow_destroy(void)
>+{
>+	unsigned int i;
>+	struct list_head *list;
>+	struct ip_cte_flow *flow;
>+
>+	WRITE_LOCK(&ip_cte_flow_lock);
>+	for (i = 0; i < ip_cte_flow_htable_size; i++) {
>+		list = ip_cte_flow_hash[i].next;
>+		while (list != &ip_cte_flow_hash[i]) {
>+				flow = (struct ip_cte_flow *) list;
>+				list = list->next;
>+				list_del((struct list_head *) flow);
>+				kmem_cache_free(ip_cte_flow_cachep, flow);
>+		}
>+	}
>+	WRITE_UNLOCK(&ip_cte_flow_lock);
>+}
>+
>+static int ip_cte_flow_notifier(struct notifier_block *nb,
>+	unsigned long ips,
>+	void *v)
>+{
>+	struct ip_conntrack *ct = v;
>+
>+	switch (ips)
>+	{
>+		case BIT_SET(IPCT_NEW):
>+		case BIT_SET(IPCT_RELATED):
>+			ip_cte_flow_inc(&ct->tuplehash[IP_CT_DIR_ORIGINAL]);
>+			ip_cte_flow_event(ips, ct);
>+			break;
>+		case BIT_SET(IPCT_DESTROY):
>+			ip_cte_flow_dec(&ct->tuplehash[IP_CT_DIR_ORIGINAL]);
>+			ip_cte_flow_event(ips, ct);
>+			break;
>+	}
>+	return NOTIFY_OK;
>+}
>+
>+static struct notifier_block ip_cte_flow_nb = {
>+	.notifier_call = ip_cte_flow_notifier,
>+	.next = NULL,
>+	.priority = 0
>+};
>+
>+static int __init init(void)
>+{
>+	unsigned int i;
>+#ifdef CONFIG_PROC_FS
>+	struct proc_dir_entry *proc_flow;
>+#endif
>+
>+	need_ip_conntrack();
>+
>+	ip_cte_flow_htable_size = ip_conntrack_htable_size / 2;
>+
>+	get_random_bytes(&ip_cte_flow_hash_rnd, 4);
>+
>+	ip_cte_flow_hash = vmalloc(sizeof(struct list_head)
>+					* ip_cte_flow_htable_size);
>+	if (!ip_cte_flow_hash) {
>+		printk(KERN_ERR "Unable to create ip_cte_flow_hash\n");
>+		goto err;
>+	}
>+
>+	for (i = 0; i < ip_cte_flow_htable_size; i++)
>+		INIT_LIST_HEAD(&ip_cte_flow_hash[i]);
>+
>+	ip_cte_flow_cachep = kmem_cache_create("ip_cte_flow",
>+						sizeof(struct ip_cte_flow), 0,
>+						SLAB_HWCACHE_ALIGN, NULL, NULL);
>+	if (!ip_cte_flow_cachep) {
>+		printk(KERN_ERR "Unable to create ip_cte_flow slab cache\n");
>+		goto err_free_hash;
>+ 	}
>+
>+#ifdef CONFIG_PROC_FS
>+	proc_flow = proc_net_fops_create("ip_cte_flow", 0440, &ip_cte_flow_file_ops);
>+	if (!proc_flow) goto err_free_slab;
>+#endif
>+
>+	if (ip_conntrack_register_notifier(&ip_cte_flow_nb)) {
>+		goto cleanup_proc;
>+	}
>+
>+	ip_cte_flow_existing();
>+
>+	printk("connection tracking events: protocol flow counters "
>+	       "(%u buckets) - %Zd bytes per IP_CT_DIR_ORIGINAL source\n",
>+	       ip_cte_flow_htable_size,
>+	       sizeof(struct ip_cte_flow));
>+
>+	return 0;
>+
>+cleanup_proc:
>+#ifdef CONFIG_PROC_FS
>+	proc_net_remove("ip_cte_flow");
>+#endif
>+err_free_slab:
>+ 	kmem_cache_destroy(ip_cte_flow_cachep);
>+err_free_hash:
>+	vfree(ip_cte_flow_hash);
>+err:
>+	return -ENOMEM;
>+}
>+
>+static void __exit fini(void)
>+{
>+	ip_cte_flow_destroy();
>+
>+	if (ip_conntrack_unregister_notifier(&ip_cte_flow_nb))
>+		printk(KERN_ERR "ip_conntrack_unregister_notifier() "
>+				"failed, huh?\n");
>+#ifdef CONFIG_PROC_FS
>+	proc_net_remove("ip_cte_flow");
>+#endif
>+	kmem_cache_destroy(ip_cte_flow_cachep);
>+	vfree(ip_cte_flow_hash);
>+}
>+
>+module_init(init);
>+module_exit(fini);
>+
>+EXPORT_SYMBOL(ip_cte_flow_hash);
>+EXPORT_SYMBOL(ip_cte_flow_htable_size);
>+EXPORT_SYMBOL(ip_cte_flow_lock);
>+EXPORT_SYMBOL(ip_cte_flow_register_notifier);
>+EXPORT_SYMBOL(ip_cte_flow_unregister_notifier);
>+EXPORT_SYMBOL(ip_cte_hash_flow_ip);
>diff -Pru linux-2.6.9/net/ipv4/netfilter/ipt_flow.c linux-2.6.9-flow/net/ipv4/netfilter/ipt_flow.c
>--- linux-2.6.9/net/ipv4/netfilter/ipt_flow.c	1969-12-31 18:00:00.000000000 -0600
>+++ linux-2.6.9-flow/net/ipv4/netfilter/ipt_flow.c	2004-11-03 20:12:00.000000000 -0600
>@@ -0,0 +1,318 @@
>+/* Kernel module to match [IP|ICMP|TCP|UDP] flow counts. */
>+
>+/* (C) 2004 Josh Samuelson <josamue1@wsc.edu>
>+ *
>+ * This program is free software; you can redistribute it and/or modify
>+ * it under the terms of the GNU General Public License version 2 as
>+ * published by the Free Software Foundation.
>+ */
>+
>+#include <linux/module.h>
>+#include <linux/skbuff.h>
>+#include <linux/vmalloc.h>
>+#include <linux/notifier.h>
>+#include <linux/list.h>
>+#include <linux/netfilter_ipv4/ip_tables.h>
>+#include <linux/netfilter_ipv4/ip_conntrack.h>
>+#include <linux/netfilter_ipv4/ip_cte_flow.h>
>+#include <linux/netfilter_ipv4/ipt_flow.h>
>+
>+MODULE_LICENSE("GPL");
>+MODULE_AUTHOR("Josh Samuelson <josamue1@wsc.edu>");
>+MODULE_DESCRIPTION("protocol flow count match module");
>+
>+
>+DECLARE_RWLOCK(ipt_flow_lock);
>+LIST_HEAD(ipt_flow_notifier_list);
>+static atomic_t ipt_flow_notifier_list_count = ATOMIC_INIT(0);
>+static struct notifier_block ipt_flow_nb;
>+
>+static void
>+ipt_flow_nw_inc(struct ip_conntrack *ct,
>+	struct ipt_flow_info *finfo)
>+{
>+	INCREMENT_WITHOUT_OVERFLOW(finfo->nm->ip);
>+	switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
>+		case IPPROTO_ICMP:
>+			INCREMENT_WITHOUT_OVERFLOW(finfo->nm->icmp);
>+			break;
>+		case IPPROTO_TCP:
>+			INCREMENT_WITHOUT_OVERFLOW(finfo->nm->tcp);
>+			break;
>+		case IPPROTO_UDP:
>+			INCREMENT_WITHOUT_OVERFLOW(finfo->nm->udp);
>+			break;
>+	}
>+}
>+
>+static void
>+ipt_flow_nw_dec(struct ip_conntrack *ct,
>+	struct ipt_flow_info *finfo)
>+{
>+	if (finfo->nm->ip)
>+		finfo->nm->ip--;
>+	switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
>+		case IPPROTO_ICMP:
>+			if (finfo->nm->icmp)
>+				finfo->nm->icmp--;
>+			break;
>+		case IPPROTO_TCP:
>+			if (finfo->nm->tcp)
>+				finfo->nm->tcp--;
>+			break;
>+		case IPPROTO_UDP:
>+			if (finfo->nm->udp)
>+				finfo->nm->udp--;
>+			break;
>+	}
>+}
>+
>+static void
>+ipt_flow_existing(struct ipt_flow_info *finfo)
>+{
>+        unsigned int i;
>+        struct list_head *list;
>+        struct ip_cte_flow *flow;
>+
>+        READ_LOCK(&ip_cte_flow_lock);
>+        for (i = 0; i < ip_cte_flow_htable_size; i++) {
>+                list_for_each(list, &ip_cte_flow_hash[i]) {
>+                        flow = (struct ip_cte_flow *) list;
>+			if ((flow->ip_ct_dir_original_ip & finfo->mask) ==
>+			    finfo->network) {
>+				finfo->nm->ip += flow->ip;
>+				finfo->nm->icmp += flow->icmp;
>+				finfo->nm->tcp += flow->tcp;
>+				finfo->nm->udp += flow->udp;
>+			}
>+                }
>+        }
>+        READ_UNLOCK(&ip_cte_flow_lock);
>+}
>+
>+int
>+ipt_flow_nm_notifier(struct notifier_block *self,
>+	unsigned long event, void *vct)
>+{
>+	struct list_head *list;
>+	struct ipt_flow_info *finfo;
>+	struct ip_conntrack *ct = vct;
>+	u_int32_t ip;
>+
>+	READ_LOCK(&ipt_flow_lock);
>+	ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
>+	list_for_each(list, &ipt_flow_notifier_list) {
>+		finfo = (struct ipt_flow_info *) list;
>+		if ((ip & finfo->mask) == finfo->network) {
>+			switch (event) {
>+				case BIT_SET(IPCT_NEW):
>+				case BIT_SET(IPCT_RELATED):
>+					ipt_flow_nw_inc(ct, finfo);
>+					break;
>+				case BIT_SET(IPCT_DESTROY):
>+					ipt_flow_nw_dec(ct, finfo);
>+					break;
>+			}
>+		}
>+	}
>+	READ_UNLOCK(&ipt_flow_lock);
>+}
>+
>+static int
>+match(const struct sk_buff *skb,
>+	const struct net_device *in,
>+	const struct net_device *out,
>+	const void *matchinfo,
>+	int offset,
>+	int *hotdrop)
>+{
>+	const struct ipt_flow_info *finfo = matchinfo;
>+	struct list_head *list;
>+	struct ip_cte_flow *flow = NULL;
>+	u_int16_t proto;
>+	u_int32_t flow_hash,
>+		  ip;
>+	int ret = 0;
>+
>+	proto = skb->nh.iph->protocol;
>+	ip = skb->nh.iph->saddr;
>+	if ((finfo->proto & BIT_SET(IPFLOW_NETWORK_MASK)) &&
>+	    ((ip & finfo->mask) == finfo->network)) {
>+		if ((finfo->nm->ip >= finfo->max) &&
>+		    (finfo->proto & BIT_SET(IPFLOW_IP)))
>+			ret = 1;
>+		switch (proto) {
>+			case IPPROTO_ICMP:
>+				if ((finfo->nm->icmp >= finfo->max) &&
>+				    (finfo->proto & BIT_SET(IPFLOW_ICMP)))
>+					ret = 1;
>+				break;
>+			case IPPROTO_TCP:
>+				if ((finfo->nm->tcp >= finfo->max) &&
>+				    (finfo->proto & BIT_SET(IPFLOW_TCP)))
>+					ret = 1;
>+				break;
>+			case IPPROTO_UDP:
>+				if ((finfo->nm->udp >= finfo->max) &&
>+				    (finfo->proto & BIT_SET(IPFLOW_UDP)))
>+					ret = 1;
>+				break;
>+		}
>+	} else {
>+		flow_hash = ip_cte_hash_flow_ip(ip);
>+		READ_LOCK(&ip_cte_flow_lock);
>+		list_for_each(list, &ip_cte_flow_hash[flow_hash]) {
>+			if (((struct ip_cte_flow *)
>+			    list)->ip_ct_dir_original_ip == ip) {
>+				flow = (struct ip_cte_flow *) list;
>+				break;
>+			}
>+		}
>+		if (flow) {
>+			if ((flow->ip >= finfo->max) &&
>+			    (finfo->proto & BIT_SET(IPFLOW_IP)))
>+				ret = 1;
>+			switch (proto) {
>+				case IPPROTO_ICMP:
>+					if ((flow->icmp >= finfo->max) &&
>+					    (finfo->proto &
>+					    BIT_SET(IPFLOW_ICMP)))
>+						ret = 1;
>+					break;
>+				case IPPROTO_TCP:
>+					if ((flow->tcp >= finfo->max) &&
>+					    (finfo->proto &
>+					    BIT_SET(IPFLOW_TCP)))
>+						ret = 1;
>+					break;
>+				case IPPROTO_UDP:
>+					if ((flow->udp >= finfo->max) &&
>+					    (finfo->proto &
>+					    BIT_SET(IPFLOW_UDP)))
>+						ret = 1;
>+					break;
>+			}
>+		}
>+		READ_UNLOCK(&ip_cte_flow_lock);
>+	}
>+	return(ret);
>+}
>+
>+static int check(const char *tablename,
>+	const struct ipt_ip *ip,
>+	void *matchinfo,
>+	unsigned int matchsize,
>+	unsigned int hook_mask)
>+{
>+	struct list_head *list;
>+	struct ipt_flow_info *finfo = matchinfo;
>+
>+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_flow_info)))
>+		return 0;
>+
>+	if (strcmp(tablename, "raw") == 0) {
>+		printk(KERN_WARNING "flow: can not by used in the \"raw\" table\n");
>+		return 0;
>+	}
>+
>+	switch (ip->proto & ~BIT_SET(IPFLOW_NETWORK_MASK)) {
>+		case IPPROTO_IP:
>+			if ((finfo->proto & BIT_SET(IPFLOW_IP)) &&
>+			    (finfo->proto &
>+			    ~BIT_SET(IPFLOW_NETWORK_MASK) &
>+			    ~BIT_SET(IPFLOW_IP)))
>+				return 0;
>+			break;
>+		case IPPROTO_ICMP:
>+			if ((finfo->proto & BIT_SET(IPFLOW_ICMP)) &&
>+			    (finfo->proto &
>+			    ~BIT_SET(IPFLOW_NETWORK_MASK) &
>+			    ~BIT_SET(IPFLOW_ICMP)))
>+				return 0;
>+			break;
>+		case IPPROTO_TCP:
>+			if ((finfo->proto & BIT_SET(IPFLOW_TCP)) &&
>+			    (finfo->proto &
>+			    ~BIT_SET(IPFLOW_NETWORK_MASK) &
>+			    ~BIT_SET(IPFLOW_TCP)))
>+				return 0;
>+			break;
>+		case IPPROTO_UDP:
>+			if ((finfo->proto & BIT_SET(IPFLOW_UDP)) &&
>+			    (finfo->proto &
>+			    ~BIT_SET(IPFLOW_NETWORK_MASK) &
>+			    ~BIT_SET(IPFLOW_UDP)))
>+				return 0;
>+			break;
>+		default:
>+			return 0;
>+	}
>+
>+	if ((finfo->proto & BIT_SET(IPFLOW_NETWORK_MASK))) {
>+		finfo->nm = vmalloc(sizeof(struct ipt_flow_nm));
>  
>

I think that we can use kmalloc instead, the use of vmalloc is 
restricted to big allocations, like big arrays.

>+		if (!finfo->nm)
>+			return 0;
>+		memset(finfo->nm, 0, sizeof(struct ipt_flow_nm));
>+		WRITE_LOCK(&ipt_flow_lock);
>+		ipt_flow_existing(finfo);
>+		list = (struct list_head *) finfo;
>+		list_add(list, &ipt_flow_notifier_list);
>+		if (atomic_read(&ipt_flow_notifier_list_count) == 0) {
>+			ipt_flow_nb.notifier_call = ipt_flow_nm_notifier;
>+			if (ip_cte_flow_register_notifier(&ipt_flow_nb)) {
>+				vfree(finfo->nm);
>+				return 0;
>+			}
>+		}
>+		atomic_inc(&ipt_flow_notifier_list_count);
>+		WRITE_UNLOCK(&ipt_flow_lock);
>+	}
>+	return 1;
>+}
>+
>+void destroy(void *matchinfo,
>+	unsigned int matchsize)
>+{
>+	struct list_head *list;
>+	struct ipt_flow_info *finfo = matchinfo;
>+
>+	if (matchsize != IPT_ALIGN(sizeof(struct ipt_flow_info)))
>+		return;
>+
>+	if ((finfo->proto & BIT_SET(IPFLOW_NETWORK_MASK)) && finfo->nm) {
>+		WRITE_LOCK(&ipt_flow_lock);
>+		atomic_dec(&ipt_flow_notifier_list_count);
>+		if (atomic_read(&ipt_flow_notifier_list_count) == 0) {
>+			if (ip_cte_flow_unregister_notifier(&ipt_flow_nb))
>+				printk(KERN_ERR
>+				       "ip_cte_flow_unregister_notifier failed"
>+				       ", huh?\n");
>+		}
>+		list = (struct list_head *) finfo;
>+		list_del(list);
>+		WRITE_UNLOCK(&ipt_flow_lock);
>+		vfree(finfo->nm);
>+	}
>+}
>+
>+static struct ipt_match flow_match = {
>+	.name		= "flow",
>+	.match		= &match,
>+	.checkentry	= &check,
>+	.destroy	= &destroy,
>+	.me		= THIS_MODULE,
>+};
>+
>+static int __init init(void)
>+{
>+	need_ip_conntrack();
>+	return ipt_register_match(&flow_match);
>+}
>+
>+static void __exit fini(void)
>+{
>+	ipt_unregister_match(&flow_match);
>+}
>+
>+module_init(init);
>+module_exit(fini);
>  
>

--Pablo

     prev parent reply	other threads:[~2004-11-13 22:12 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-10-28  2:05 [PATCH] new match extension `flow' Josh Samuelson
2004-10-28 20:15 ` Josh Samuelson
2004-10-29 19:32 ` Pablo Neira
2004-10-31  6:38   ` Josh Samuelson
2004-10-31 14:41     ` Pablo Neira
2004-11-04  2:20       ` Josh Samuelson
2004-11-06 15:19         ` Pablo Neira
2004-11-08  2:52           ` Josh Samuelson
2004-11-10 18:12             ` Pablo Neira
2004-11-13 22:12         ` Pablo Neira [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=419686D3.3070600@eurodev.net \
    --to=pablo@eurodev.net \
    --cc=josamue1@wsc.edu \
    --cc=netfilter-devel@lists.netfilter.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.