From mboxrd@z Thu Jan 1 00:00:00 1970 From: Pablo Neira Subject: Re: [PATCH] new match extension `flow' Date: Sat, 13 Nov 2004 23:12:35 +0100 Message-ID: <419686D3.3070600@eurodev.net> References: <20041028020550.GA10097@wsc.edu> <41829ADC.2090708@eurodev.net> <20041031063813.GA29402@wsc.edu> <4184F9AD.9040902@eurodev.net> <20041104022039.GA25749@wsc.edu> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii; format=flowed Content-Transfer-Encoding: 7bit Cc: netfilter-devel@lists.netfilter.org Return-path: To: Josh Samuelson In-Reply-To: <20041104022039.GA25749@wsc.edu> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: netfilter-devel-bounces@lists.netfilter.org Errors-To: netfilter-devel-bounces@lists.netfilter.org List-Id: netfilter-devel.vger.kernel.org Josh, Some minor comments about the code that I had in mind. Josh Samuelson wrote: >diff -Pru linux-2.6.9/include/linux/netfilter_ipv4/ip_cte_flow.h linux-2.6.9-flow/include/linux/netfilter_ipv4/ip_cte_flow.h >--- linux-2.6.9/include/linux/netfilter_ipv4/ip_cte_flow.h 1969-12-31 18:00:00.000000000 -0600 >+++ linux-2.6.9-flow/include/linux/netfilter_ipv4/ip_cte_flow.h 2004-11-03 19:10:13.000000000 -0600 >@@ -0,0 +1,28 @@ >+#ifndef _IP_CTE_FLOW_H >+#define _IP_CTE_FLOW_H >+ >+#define BIT_SET(b) (1 << (b)) >+#define INCREMENT_WITHOUT_OVERFLOW(c) if (c < ~(1 << ((sizeof(c) * 8) - 1))) c++ >+ >+/* A `ip_cte_flow' is a structure containing the IP connection count >+ on various IP protocols. >+*/ >+ >+struct ip_cte_flow >+{ >+ struct list_head list; >+ u_int32_t ip_ct_dir_original_ip, >+ ip, >+ icmp, >+ tcp, >+ udp; >+}; >+ >+extern struct list_head *ip_cte_flow_hash; >+extern unsigned int ip_cte_flow_htable_size; >+DECLARE_RWLOCK_EXTERN(ip_cte_flow_lock); >+extern int ip_cte_flow_register_notifier(struct notifier_block *nb); >+extern int ip_cte_flow_unregister_notifier(struct notifier_block *nb); >+extern u_int32_t ip_cte_hash_flow_ip(u_int32_t ip); >+ >+#endif /* _IP_CTE_FLOW_H */ >diff -Pru linux-2.6.9/include/linux/netfilter_ipv4/ipt_flow.h linux-2.6.9-flow/include/linux/netfilter_ipv4/ipt_flow.h >--- linux-2.6.9/include/linux/netfilter_ipv4/ipt_flow.h 1969-12-31 18:00:00.000000000 -0600 >+++ linux-2.6.9-flow/include/linux/netfilter_ipv4/ipt_flow.h 2004-11-03 17:02:37.000000000 -0600 >@@ -0,0 +1,41 @@ >+#ifndef _IPT_FLOW_H >+#define _IPT_FLOW_H >+ >+#define BIT_SET(b) (1 << (b)) > > What if we use test_bit instead? >+ >+typedef enum >+{ >+ IPFLOW_IP, >+ IPFLOW_ICMP, >+ IPFLOW_TCP, >+ IPFLOW_UDP, >+ IPFLOW_NETWORK_MASK = 31 >+} ipflow_bits_t; >+ >+struct ipt_flow_nm >+{ >+ u_int32_t ip, >+ icmp, >+ tcp, >+ udp; >+}; >+ >+struct ipt_flow_info >+{ >+#ifdef __KERNEL__ >+ struct list_head list; >+#else >+ struct >+ { >+ void *next, >+ *prev; >+ } list; >+#endif >+ u_int32_t proto, >+ network, >+ mask, >+ max; >+ struct ipt_flow_nm *nm; >+}; >+ >+#endif /* IPT_FLOW_H */ >diff -Pru linux-2.6.9/net/ipv4/netfilter/Kconfig linux-2.6.9-flow/net/ipv4/netfilter/Kconfig >--- linux-2.6.9/net/ipv4/netfilter/Kconfig 2004-10-18 16:54:55.000000000 -0500 >+++ linux-2.6.9-flow/net/ipv4/netfilter/Kconfig 2004-11-02 11:01:58.000000000 -0600 >@@ -32,6 +32,28 @@ > > If unsure, say `N'. > >+config IP_NF_CONNTRACK_EVENTS >+ bool "Connection tracking events" >+ depends on IP_NF_CONNTRACK >+ help >+ If unsure, say `N'. >+ >+config IP_NF_CTE_FLOW >+ tristate "Connection tracking events: protocol flow counters (EXPERIMENTAL)" >+ depends on IP_NF_CONNTRACK_EVENTS && EXPERIMENTAL >+ help >+ This option uses the connection tracking event notifiers to >+ keep protocol flow counters indexed by the original direction >+ source IP address. The protocol counters include generic IP, >+ ICMP, TCP and UDP. >+ >+ These counters can be read from "/proc/net/ip_cte_flow". >+ >+ These counters can be used in the "protocol flow counters match >+ support", see below. >+ >+ To compile it as a module, choose M here. If unsure, say N. >+ > config IP_NF_CT_PROTO_SCTP > tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' > depends on IP_NF_CONNTRACK && EXPERIMENTAL >@@ -279,6 +301,22 @@ > > To compile it as a module, choose M here. If unsure, say N. > >+config IP_NF_CTE_MATCH_FLOW >+ tristate 'protocol flow counters match support (EXPERIMENTAL)' >+ depends on IP_NF_IPTABLES && IP_NF_CTE_FLOW && (IP_NF_FILTER || IP_NF_NAT || IP_NF_MANGLE) && EXPERIMENTAL >+ help >+ `flow' matching allows you to match a packet when the specified >+ number of known protocol connections from a original direction >+ source IP address is exceeded. The matches can be made against >+ generic IP, ICMP, TCP or UDP flow counters. This match can be >+ used in all tables but raw. >+ >+ For example, this match allows you to control the number and type >+ of connections (flows) from hosts in a known local network routing >+ through the machine. >+ >+ To compile it as a module, choose M here. If unsure, say N. >+ > config IP_NF_MATCH_OWNER > tristate "Owner match support" > depends on IP_NF_IPTABLES >diff -Pru linux-2.6.9/net/ipv4/netfilter/Makefile linux-2.6.9-flow/net/ipv4/netfilter/Makefile >--- linux-2.6.9/net/ipv4/netfilter/Makefile 2004-10-18 16:53:43.000000000 -0500 >+++ linux-2.6.9-flow/net/ipv4/netfilter/Makefile 2004-11-02 10:58:21.000000000 -0600 >@@ -34,6 +34,9 @@ > obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o > obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o > >+# connection tracking event objects >+obj-$(CONFIG_IP_NF_CTE_FLOW) += ip_cte_flow.o >+ > # generic IP tables > obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o > >@@ -67,6 +70,7 @@ > obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o > obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o > obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o >+obj-$(CONFIG_IP_NF_CTE_MATCH_FLOW) += ipt_flow.o > > # targets > obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o >diff -Pru linux-2.6.9/net/ipv4/netfilter/ip_cte_flow.c linux-2.6.9-flow/net/ipv4/netfilter/ip_cte_flow.c >--- linux-2.6.9/net/ipv4/netfilter/ip_cte_flow.c 1969-12-31 18:00:00.000000000 -0600 >+++ linux-2.6.9-flow/net/ipv4/netfilter/ip_cte_flow.c 2004-11-03 20:12:09.000000000 -0600 >@@ -0,0 +1,377 @@ >+/* Kernel module to track [IP|ICMP|TCP|UDP] flow counts. */ >+ >+/* (C) 2004 Josh Samuelson >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License version 2 as >+ * published by the Free Software Foundation. >+ */ >+ >+#include >+#include >+#include >+#include >+#include >+#include >+#ifdef CONFIG_PROC_FS >+#include >+#include >+#endif >+#include >+#include >+#include >+ >+MODULE_LICENSE("GPL"); >+MODULE_AUTHOR("Josh Samuelson "); >+MODULE_DESCRIPTION("protocol flow count tracking via connection tracking events"); >+ >+DECLARE_RWLOCK(ip_cte_flow_lock); >+struct list_head *ip_cte_flow_hash; >+unsigned int ip_cte_flow_htable_size = 0; >+/* static atomic_t ip_cte_flow_count = ATOMIC_INIT(0); */ >+static int ip_cte_flow_hash_rnd; >+static kmem_cache_t *ip_cte_flow_cachep; >+static struct notifier_block *ip_cte_flow_nb_chain; >+ >+#ifdef CONFIG_PROC_FS >+static void *ip_cte_flow_seq_start(struct seq_file *s, loff_t *pos) >+{ >+ if (*pos >= ip_cte_flow_htable_size) >+ return NULL; >+ return &ip_cte_flow_hash[*pos]; >+} >+ >+static void ip_cte_flow_seq_stop(struct seq_file *s, void *v) >+{ >+} >+ >+static void *ip_cte_flow_seq_next(struct seq_file *s, void *v, loff_t *pos) >+{ >+ (*pos)++; >+ if (*pos >= ip_cte_flow_htable_size) >+ return NULL; >+ return &ip_cte_flow_hash[*pos]; >+} >+ >+static int ip_cte_flow_seq_show(struct seq_file *s, void *v) >+{ >+ int ret = 0; >+ struct list_head *list; >+ struct ip_cte_flow *flow; >+ >+ list = (struct list_head *) v; >+ list = list->next; >+ READ_LOCK(&ip_cte_flow_lock); >+ list_for_each(list, (struct list_head *) v) { >+ flow = (struct ip_cte_flow *) list; >+ if (seq_printf(s, "%u.%u.%u.%u IP: %u ICMP: %hu " >+ "TCP: %hu UDP: %hu\n", >+ NIPQUAD(flow->ip_ct_dir_original_ip), >+ flow->ip, >+ flow->icmp, >+ flow->tcp, >+ flow->udp)) { >+ ret = -ENOSPC; >+ break; >+ } >+ } >+ READ_UNLOCK(&ip_cte_flow_lock); >+ return ret; >+} >+ >+static struct seq_operations ip_cte_flow_seq_ops = { >+ .start = ip_cte_flow_seq_start, >+ .next = ip_cte_flow_seq_next, >+ .stop = ip_cte_flow_seq_stop, >+ .show = ip_cte_flow_seq_show >+}; >+ >+static int ip_cte_flow_open(struct inode *inode, struct file *file) >+{ >+ return seq_open(file, &ip_cte_flow_seq_ops); >+} >+ >+static struct file_operations ip_cte_flow_file_ops = { >+ .owner = THIS_MODULE, >+ .open = ip_cte_flow_open, >+ .read = seq_read, >+ .llseek = seq_lseek, >+ .release = seq_release >+}; >+#endif /* CONFIG_PROC_FS */ >+ >+int >+ip_cte_flow_register_notifier(struct notifier_block *nb) >+{ >+ return notifier_chain_register(&ip_cte_flow_nb_chain, nb); >+} >+ >+int >+ip_cte_flow_unregister_notifier(struct notifier_block *nb) >+{ >+ return notifier_chain_unregister(&ip_cte_flow_nb_chain, nb); >+} >+ >+void >+ip_cte_flow_event(enum ip_conntrack_events event, struct ip_conntrack *ct) >+{ >+ notifier_call_chain(&ip_cte_flow_nb_chain, event, ct); >+} >+ >+u_int32_t >+ip_cte_hash_flow_ip(u_int32_t ip) >+{ >+ return(jhash_1word(ip, ip_cte_flow_hash_rnd) % ip_cte_flow_htable_size); >+} >+ >+static int >+ip_cte_flow_inc(struct ip_conntrack_tuple_hash *hash) >+{ >+ unsigned int flow_hash; >+ u_int32_t ip; >+ struct list_head *list; >+ struct ip_cte_flow *flow = NULL; >+ >+ ip = hash->tuple.src.ip; >+ flow_hash = ip_cte_hash_flow_ip(ip); >+ READ_LOCK(&ip_cte_flow_lock); >+ list_for_each(list, &ip_cte_flow_hash[flow_hash]) { >+ if (((struct ip_cte_flow *) list)->ip_ct_dir_original_ip == >+ ip) { >+ flow = (struct ip_cte_flow *) list; >+ break; >+ } >+ } >+ READ_UNLOCK(&ip_cte_flow_lock); > > "Code which grabs a read lock, searches a list, fails to find what if wants, drops the read lock, grabs a write lock and insert the object has a race condition". --Rusty's unreliable guide to locking So we have to fix this :) >+ WRITE_LOCK(&ip_cte_flow_lock); >+ if (!flow) { >+ flow = kmem_cache_alloc(ip_cte_flow_cachep, GFP_ATOMIC); >+ if (flow) { >+ /* atomic_inc(&ip_cte_flow_count); */ >+ memset(flow, 0, sizeof(struct ip_cte_flow)); >+ flow->ip_ct_dir_original_ip = ip; >+ list = (struct list_head *) flow; >+ list_add(list, &ip_cte_flow_hash[flow_hash]); >+ } >+ } >+ if (flow) { >+ INCREMENT_WITHOUT_OVERFLOW(flow->ip); >+ switch (hash->tuple.dst.protonum) { >+ case IPPROTO_ICMP: >+ INCREMENT_WITHOUT_OVERFLOW(flow->icmp); >+ break; >+ case IPPROTO_TCP: >+ INCREMENT_WITHOUT_OVERFLOW(flow->tcp); >+ break; >+ case IPPROTO_UDP: >+ INCREMENT_WITHOUT_OVERFLOW(flow->udp); >+ break; >+ } >+ } >+ WRITE_UNLOCK(&ip_cte_flow_lock); >+ return(flow == NULL); >+} >+ >+static void >+ip_cte_flow_dec(struct ip_conntrack_tuple_hash *hash) >+{ >+ unsigned int flow_hash; >+ u_int32_t ip; >+ struct list_head *list; >+ struct ip_cte_flow *flow = NULL; >+ >+ ip = hash->tuple.src.ip; >+ flow_hash = ip_cte_hash_flow_ip(ip); >+ READ_LOCK(&ip_cte_flow_lock); >+ list_for_each(list, &ip_cte_flow_hash[flow_hash]) { >+ if (((struct ip_cte_flow *) list)->ip_ct_dir_original_ip == >+ ip) { >+ flow = (struct ip_cte_flow *) list; >+ break; >+ } >+ } >+ READ_UNLOCK(&ip_cte_flow_lock); > > Same problem >+ if (flow) { >+ WRITE_LOCK(&ip_cte_flow_lock); >+ if(flow->ip) >+ flow->ip--; >+ switch (hash->tuple.dst.protonum) { >+ case IPPROTO_ICMP: >+ if (flow->icmp) >+ flow->icmp--; >+ break; >+ case IPPROTO_TCP: >+ if (flow->tcp) >+ flow->tcp--; >+ break; >+ case IPPROTO_UDP: >+ if (flow->udp) >+ flow->udp--; >+ break; >+ } >+ if (flow->ip == 0) { >+ list = (struct list_head *) flow; >+ list_del(list); >+ kmem_cache_free(ip_cte_flow_cachep, flow); >+ /* atomic_dec(&ip_cte_flow_count); */ >+ } >+ WRITE_UNLOCK(&ip_cte_flow_lock); >+ } else { >+ printk(KERN_WARNING "conntrack being destroyed, " >+ "yet not found on flow list\n" >+ "%u src: %u.%u.%u.%u dst: %u.%u.%u.%u\n", >+ hash->tuple.dst.protonum, >+ NIPQUAD(hash->tuple.src.ip), >+ NIPQUAD(hash->tuple.dst.ip)); >+ } >+} >+ >+static void >+ip_cte_flow_existing(void) >+{ >+ unsigned int i; >+ struct list_head *list; >+ struct ip_conntrack_tuple_hash *hash; >+ >+ READ_LOCK(&ip_conntrack_lock); >+ for (i = 0; i < ip_conntrack_htable_size; i++) { >+ list_for_each(list, &ip_conntrack_hash[i]) { >+ hash = (struct ip_conntrack_tuple_hash *) list; >+ if (!DIRECTION(hash)) >+ ip_cte_flow_inc(hash); >+ } >+ } >+ READ_UNLOCK(&ip_conntrack_lock); >+} >+ >+static void >+ip_cte_flow_destroy(void) >+{ >+ unsigned int i; >+ struct list_head *list; >+ struct ip_cte_flow *flow; >+ >+ WRITE_LOCK(&ip_cte_flow_lock); >+ for (i = 0; i < ip_cte_flow_htable_size; i++) { >+ list = ip_cte_flow_hash[i].next; >+ while (list != &ip_cte_flow_hash[i]) { >+ flow = (struct ip_cte_flow *) list; >+ list = list->next; >+ list_del((struct list_head *) flow); >+ kmem_cache_free(ip_cte_flow_cachep, flow); >+ } >+ } >+ WRITE_UNLOCK(&ip_cte_flow_lock); >+} >+ >+static int ip_cte_flow_notifier(struct notifier_block *nb, >+ unsigned long ips, >+ void *v) >+{ >+ struct ip_conntrack *ct = v; >+ >+ switch (ips) >+ { >+ case BIT_SET(IPCT_NEW): >+ case BIT_SET(IPCT_RELATED): >+ ip_cte_flow_inc(&ct->tuplehash[IP_CT_DIR_ORIGINAL]); >+ ip_cte_flow_event(ips, ct); >+ break; >+ case BIT_SET(IPCT_DESTROY): >+ ip_cte_flow_dec(&ct->tuplehash[IP_CT_DIR_ORIGINAL]); >+ ip_cte_flow_event(ips, ct); >+ break; >+ } >+ return NOTIFY_OK; >+} >+ >+static struct notifier_block ip_cte_flow_nb = { >+ .notifier_call = ip_cte_flow_notifier, >+ .next = NULL, >+ .priority = 0 >+}; >+ >+static int __init init(void) >+{ >+ unsigned int i; >+#ifdef CONFIG_PROC_FS >+ struct proc_dir_entry *proc_flow; >+#endif >+ >+ need_ip_conntrack(); >+ >+ ip_cte_flow_htable_size = ip_conntrack_htable_size / 2; >+ >+ get_random_bytes(&ip_cte_flow_hash_rnd, 4); >+ >+ ip_cte_flow_hash = vmalloc(sizeof(struct list_head) >+ * ip_cte_flow_htable_size); >+ if (!ip_cte_flow_hash) { >+ printk(KERN_ERR "Unable to create ip_cte_flow_hash\n"); >+ goto err; >+ } >+ >+ for (i = 0; i < ip_cte_flow_htable_size; i++) >+ INIT_LIST_HEAD(&ip_cte_flow_hash[i]); >+ >+ ip_cte_flow_cachep = kmem_cache_create("ip_cte_flow", >+ sizeof(struct ip_cte_flow), 0, >+ SLAB_HWCACHE_ALIGN, NULL, NULL); >+ if (!ip_cte_flow_cachep) { >+ printk(KERN_ERR "Unable to create ip_cte_flow slab cache\n"); >+ goto err_free_hash; >+ } >+ >+#ifdef CONFIG_PROC_FS >+ proc_flow = proc_net_fops_create("ip_cte_flow", 0440, &ip_cte_flow_file_ops); >+ if (!proc_flow) goto err_free_slab; >+#endif >+ >+ if (ip_conntrack_register_notifier(&ip_cte_flow_nb)) { >+ goto cleanup_proc; >+ } >+ >+ ip_cte_flow_existing(); >+ >+ printk("connection tracking events: protocol flow counters " >+ "(%u buckets) - %Zd bytes per IP_CT_DIR_ORIGINAL source\n", >+ ip_cte_flow_htable_size, >+ sizeof(struct ip_cte_flow)); >+ >+ return 0; >+ >+cleanup_proc: >+#ifdef CONFIG_PROC_FS >+ proc_net_remove("ip_cte_flow"); >+#endif >+err_free_slab: >+ kmem_cache_destroy(ip_cte_flow_cachep); >+err_free_hash: >+ vfree(ip_cte_flow_hash); >+err: >+ return -ENOMEM; >+} >+ >+static void __exit fini(void) >+{ >+ ip_cte_flow_destroy(); >+ >+ if (ip_conntrack_unregister_notifier(&ip_cte_flow_nb)) >+ printk(KERN_ERR "ip_conntrack_unregister_notifier() " >+ "failed, huh?\n"); >+#ifdef CONFIG_PROC_FS >+ proc_net_remove("ip_cte_flow"); >+#endif >+ kmem_cache_destroy(ip_cte_flow_cachep); >+ vfree(ip_cte_flow_hash); >+} >+ >+module_init(init); >+module_exit(fini); >+ >+EXPORT_SYMBOL(ip_cte_flow_hash); >+EXPORT_SYMBOL(ip_cte_flow_htable_size); >+EXPORT_SYMBOL(ip_cte_flow_lock); >+EXPORT_SYMBOL(ip_cte_flow_register_notifier); >+EXPORT_SYMBOL(ip_cte_flow_unregister_notifier); >+EXPORT_SYMBOL(ip_cte_hash_flow_ip); >diff -Pru linux-2.6.9/net/ipv4/netfilter/ipt_flow.c linux-2.6.9-flow/net/ipv4/netfilter/ipt_flow.c >--- linux-2.6.9/net/ipv4/netfilter/ipt_flow.c 1969-12-31 18:00:00.000000000 -0600 >+++ linux-2.6.9-flow/net/ipv4/netfilter/ipt_flow.c 2004-11-03 20:12:00.000000000 -0600 >@@ -0,0 +1,318 @@ >+/* Kernel module to match [IP|ICMP|TCP|UDP] flow counts. */ >+ >+/* (C) 2004 Josh Samuelson >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License version 2 as >+ * published by the Free Software Foundation. >+ */ >+ >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+#include >+ >+MODULE_LICENSE("GPL"); >+MODULE_AUTHOR("Josh Samuelson "); >+MODULE_DESCRIPTION("protocol flow count match module"); >+ >+ >+DECLARE_RWLOCK(ipt_flow_lock); >+LIST_HEAD(ipt_flow_notifier_list); >+static atomic_t ipt_flow_notifier_list_count = ATOMIC_INIT(0); >+static struct notifier_block ipt_flow_nb; >+ >+static void >+ipt_flow_nw_inc(struct ip_conntrack *ct, >+ struct ipt_flow_info *finfo) >+{ >+ INCREMENT_WITHOUT_OVERFLOW(finfo->nm->ip); >+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { >+ case IPPROTO_ICMP: >+ INCREMENT_WITHOUT_OVERFLOW(finfo->nm->icmp); >+ break; >+ case IPPROTO_TCP: >+ INCREMENT_WITHOUT_OVERFLOW(finfo->nm->tcp); >+ break; >+ case IPPROTO_UDP: >+ INCREMENT_WITHOUT_OVERFLOW(finfo->nm->udp); >+ break; >+ } >+} >+ >+static void >+ipt_flow_nw_dec(struct ip_conntrack *ct, >+ struct ipt_flow_info *finfo) >+{ >+ if (finfo->nm->ip) >+ finfo->nm->ip--; >+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { >+ case IPPROTO_ICMP: >+ if (finfo->nm->icmp) >+ finfo->nm->icmp--; >+ break; >+ case IPPROTO_TCP: >+ if (finfo->nm->tcp) >+ finfo->nm->tcp--; >+ break; >+ case IPPROTO_UDP: >+ if (finfo->nm->udp) >+ finfo->nm->udp--; >+ break; >+ } >+} >+ >+static void >+ipt_flow_existing(struct ipt_flow_info *finfo) >+{ >+ unsigned int i; >+ struct list_head *list; >+ struct ip_cte_flow *flow; >+ >+ READ_LOCK(&ip_cte_flow_lock); >+ for (i = 0; i < ip_cte_flow_htable_size; i++) { >+ list_for_each(list, &ip_cte_flow_hash[i]) { >+ flow = (struct ip_cte_flow *) list; >+ if ((flow->ip_ct_dir_original_ip & finfo->mask) == >+ finfo->network) { >+ finfo->nm->ip += flow->ip; >+ finfo->nm->icmp += flow->icmp; >+ finfo->nm->tcp += flow->tcp; >+ finfo->nm->udp += flow->udp; >+ } >+ } >+ } >+ READ_UNLOCK(&ip_cte_flow_lock); >+} >+ >+int >+ipt_flow_nm_notifier(struct notifier_block *self, >+ unsigned long event, void *vct) >+{ >+ struct list_head *list; >+ struct ipt_flow_info *finfo; >+ struct ip_conntrack *ct = vct; >+ u_int32_t ip; >+ >+ READ_LOCK(&ipt_flow_lock); >+ ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; >+ list_for_each(list, &ipt_flow_notifier_list) { >+ finfo = (struct ipt_flow_info *) list; >+ if ((ip & finfo->mask) == finfo->network) { >+ switch (event) { >+ case BIT_SET(IPCT_NEW): >+ case BIT_SET(IPCT_RELATED): >+ ipt_flow_nw_inc(ct, finfo); >+ break; >+ case BIT_SET(IPCT_DESTROY): >+ ipt_flow_nw_dec(ct, finfo); >+ break; >+ } >+ } >+ } >+ READ_UNLOCK(&ipt_flow_lock); >+} >+ >+static int >+match(const struct sk_buff *skb, >+ const struct net_device *in, >+ const struct net_device *out, >+ const void *matchinfo, >+ int offset, >+ int *hotdrop) >+{ >+ const struct ipt_flow_info *finfo = matchinfo; >+ struct list_head *list; >+ struct ip_cte_flow *flow = NULL; >+ u_int16_t proto; >+ u_int32_t flow_hash, >+ ip; >+ int ret = 0; >+ >+ proto = skb->nh.iph->protocol; >+ ip = skb->nh.iph->saddr; >+ if ((finfo->proto & BIT_SET(IPFLOW_NETWORK_MASK)) && >+ ((ip & finfo->mask) == finfo->network)) { >+ if ((finfo->nm->ip >= finfo->max) && >+ (finfo->proto & BIT_SET(IPFLOW_IP))) >+ ret = 1; >+ switch (proto) { >+ case IPPROTO_ICMP: >+ if ((finfo->nm->icmp >= finfo->max) && >+ (finfo->proto & BIT_SET(IPFLOW_ICMP))) >+ ret = 1; >+ break; >+ case IPPROTO_TCP: >+ if ((finfo->nm->tcp >= finfo->max) && >+ (finfo->proto & BIT_SET(IPFLOW_TCP))) >+ ret = 1; >+ break; >+ case IPPROTO_UDP: >+ if ((finfo->nm->udp >= finfo->max) && >+ (finfo->proto & BIT_SET(IPFLOW_UDP))) >+ ret = 1; >+ break; >+ } >+ } else { >+ flow_hash = ip_cte_hash_flow_ip(ip); >+ READ_LOCK(&ip_cte_flow_lock); >+ list_for_each(list, &ip_cte_flow_hash[flow_hash]) { >+ if (((struct ip_cte_flow *) >+ list)->ip_ct_dir_original_ip == ip) { >+ flow = (struct ip_cte_flow *) list; >+ break; >+ } >+ } >+ if (flow) { >+ if ((flow->ip >= finfo->max) && >+ (finfo->proto & BIT_SET(IPFLOW_IP))) >+ ret = 1; >+ switch (proto) { >+ case IPPROTO_ICMP: >+ if ((flow->icmp >= finfo->max) && >+ (finfo->proto & >+ BIT_SET(IPFLOW_ICMP))) >+ ret = 1; >+ break; >+ case IPPROTO_TCP: >+ if ((flow->tcp >= finfo->max) && >+ (finfo->proto & >+ BIT_SET(IPFLOW_TCP))) >+ ret = 1; >+ break; >+ case IPPROTO_UDP: >+ if ((flow->udp >= finfo->max) && >+ (finfo->proto & >+ BIT_SET(IPFLOW_UDP))) >+ ret = 1; >+ break; >+ } >+ } >+ READ_UNLOCK(&ip_cte_flow_lock); >+ } >+ return(ret); >+} >+ >+static int check(const char *tablename, >+ const struct ipt_ip *ip, >+ void *matchinfo, >+ unsigned int matchsize, >+ unsigned int hook_mask) >+{ >+ struct list_head *list; >+ struct ipt_flow_info *finfo = matchinfo; >+ >+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_flow_info))) >+ return 0; >+ >+ if (strcmp(tablename, "raw") == 0) { >+ printk(KERN_WARNING "flow: can not by used in the \"raw\" table\n"); >+ return 0; >+ } >+ >+ switch (ip->proto & ~BIT_SET(IPFLOW_NETWORK_MASK)) { >+ case IPPROTO_IP: >+ if ((finfo->proto & BIT_SET(IPFLOW_IP)) && >+ (finfo->proto & >+ ~BIT_SET(IPFLOW_NETWORK_MASK) & >+ ~BIT_SET(IPFLOW_IP))) >+ return 0; >+ break; >+ case IPPROTO_ICMP: >+ if ((finfo->proto & BIT_SET(IPFLOW_ICMP)) && >+ (finfo->proto & >+ ~BIT_SET(IPFLOW_NETWORK_MASK) & >+ ~BIT_SET(IPFLOW_ICMP))) >+ return 0; >+ break; >+ case IPPROTO_TCP: >+ if ((finfo->proto & BIT_SET(IPFLOW_TCP)) && >+ (finfo->proto & >+ ~BIT_SET(IPFLOW_NETWORK_MASK) & >+ ~BIT_SET(IPFLOW_TCP))) >+ return 0; >+ break; >+ case IPPROTO_UDP: >+ if ((finfo->proto & BIT_SET(IPFLOW_UDP)) && >+ (finfo->proto & >+ ~BIT_SET(IPFLOW_NETWORK_MASK) & >+ ~BIT_SET(IPFLOW_UDP))) >+ return 0; >+ break; >+ default: >+ return 0; >+ } >+ >+ if ((finfo->proto & BIT_SET(IPFLOW_NETWORK_MASK))) { >+ finfo->nm = vmalloc(sizeof(struct ipt_flow_nm)); > > I think that we can use kmalloc instead, the use of vmalloc is restricted to big allocations, like big arrays. >+ if (!finfo->nm) >+ return 0; >+ memset(finfo->nm, 0, sizeof(struct ipt_flow_nm)); >+ WRITE_LOCK(&ipt_flow_lock); >+ ipt_flow_existing(finfo); >+ list = (struct list_head *) finfo; >+ list_add(list, &ipt_flow_notifier_list); >+ if (atomic_read(&ipt_flow_notifier_list_count) == 0) { >+ ipt_flow_nb.notifier_call = ipt_flow_nm_notifier; >+ if (ip_cte_flow_register_notifier(&ipt_flow_nb)) { >+ vfree(finfo->nm); >+ return 0; >+ } >+ } >+ atomic_inc(&ipt_flow_notifier_list_count); >+ WRITE_UNLOCK(&ipt_flow_lock); >+ } >+ return 1; >+} >+ >+void destroy(void *matchinfo, >+ unsigned int matchsize) >+{ >+ struct list_head *list; >+ struct ipt_flow_info *finfo = matchinfo; >+ >+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_flow_info))) >+ return; >+ >+ if ((finfo->proto & BIT_SET(IPFLOW_NETWORK_MASK)) && finfo->nm) { >+ WRITE_LOCK(&ipt_flow_lock); >+ atomic_dec(&ipt_flow_notifier_list_count); >+ if (atomic_read(&ipt_flow_notifier_list_count) == 0) { >+ if (ip_cte_flow_unregister_notifier(&ipt_flow_nb)) >+ printk(KERN_ERR >+ "ip_cte_flow_unregister_notifier failed" >+ ", huh?\n"); >+ } >+ list = (struct list_head *) finfo; >+ list_del(list); >+ WRITE_UNLOCK(&ipt_flow_lock); >+ vfree(finfo->nm); >+ } >+} >+ >+static struct ipt_match flow_match = { >+ .name = "flow", >+ .match = &match, >+ .checkentry = &check, >+ .destroy = &destroy, >+ .me = THIS_MODULE, >+}; >+ >+static int __init init(void) >+{ >+ need_ip_conntrack(); >+ return ipt_register_match(&flow_match); >+} >+ >+static void __exit fini(void) >+{ >+ ipt_unregister_match(&flow_match); >+} >+ >+module_init(init); >+module_exit(fini); > > --Pablo