diff -X dontdiff -Nurp linux-2.6.8-rc3/include/linux/netfilter.h linux-2.6.8-rc3-linearize/include/linux/netfilter.h --- linux-2.6.8-rc3/include/linux/netfilter.h 2004-08-04 20:25:13.000000000 +0900 +++ linux-2.6.8-rc3-linearize/include/linux/netfilter.h 2004-08-08 02:01:07.973534136 +0900 @@ -46,6 +46,9 @@ typedef unsigned int nf_hookfn(unsigned struct nf_hook_ops { struct list_head list; + /* max protocol header required to linearize before executing hook + functions. */ + unsigned int max_lin_layer; /* User fills in from here down. */ nf_hookfn *hook; @@ -54,8 +57,14 @@ struct nf_hook_ops int hooknum; /* Hooks are ordered in ascending priority. */ int priority; + /* protocol header required to linearize */ + unsigned int lin_layer; + unsigned int lin_flags; }; +/* for lin_flags */ +#define NF_LIN_MAY_FRAG 0x0001 /* skb may be fragmented after executing hook */ + struct nf_sockopt_ops { struct list_head list; @@ -187,6 +196,8 @@ extern void nf_dump_skb(int pf, struct s /* FIXME: Before cache is ever used, this must be implemented for real. */ extern void nf_invalidate_cache(int pf); +extern int skb_make_readable(struct sk_buff **pskb, unsigned int readable_len); + #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) #endif /*CONFIG_NETFILTER*/ diff -X dontdiff -Nurp linux-2.6.8-rc3/include/linux/netfilter_ipv4.h linux-2.6.8-rc3-linearize/include/linux/netfilter_ipv4.h --- linux-2.6.8-rc3/include/linux/netfilter_ipv4.h 2004-06-16 14:19:52.000000000 +0900 +++ linux-2.6.8-rc3-linearize/include/linux/netfilter_ipv4.h 2004-08-08 02:01:07.973534136 +0900 @@ -85,6 +85,15 @@ extern int ip_route_me_harder(struct sk_ Returns true or false. */ extern int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len); + +/* Header required to linearize */ +/* Network protocol header */ +#define NF_IP_LIN_NET 100 +/* Transport protocol header */ +#define NF_IP_LIN_TRANS 200 +/* Whole of packet */ +#define NF_IP_LIN_ALL UINT_MAX + #endif /*__KERNEL__*/ #endif /*__LINUX_IP_NETFILTER_H*/ diff -X dontdiff -Nurp linux-2.6.8-rc3/net/core/netfilter.c linux-2.6.8-rc3-linearize/net/core/netfilter.c --- linux-2.6.8-rc3/net/core/netfilter.c 2004-06-16 14:19:22.000000000 +0900 +++ linux-2.6.8-rc3-linearize/net/core/netfilter.c 2004-08-08 02:01:07.974533984 +0900 @@ -49,6 +49,8 @@ struct list_head nf_hooks[NPROTO][NF_MAX static LIST_HEAD(nf_sockopts); static spinlock_t nf_hook_lock = SPIN_LOCK_UNLOCKED; +int (*nf_linearize[NPROTO])(struct sk_buff **pskb, unsigned int layer); + /* * A queue handler may be registered for each protocol. Each is protected by * long term mutex. The handler must provide an an outfn() to accept packets @@ -60,16 +62,42 @@ static struct nf_queue_handler_t { } queue_handler[NPROTO]; static rwlock_t queue_handler_lock = RW_LOCK_UNLOCKED; +/* Calculate highest protocol header required to linearize before executing + hook functions. locking is needed. */ +static void __calc_lin_layer(int pf, int hooknum) +{ + struct nf_hook_ops *elem = NULL; + unsigned int max_lin_layer = 0; + + list_for_each_entry_reverse(elem, &nf_hooks[pf][hooknum], list) { + /* The 1st condition means that skb may be rearranged by this + element. In this case, linearizing is needed one more after + this. Then it's not needed to linearize the higher layer + this element doesn't require */ + if ((elem->lin_flags & NF_LIN_MAY_FRAG) || + (max_lin_layer < elem->lin_layer)) + max_lin_layer = elem->lin_layer; + + elem->max_lin_layer = max_lin_layer; + } +} + int nf_register_hook(struct nf_hook_ops *reg) { struct list_head *i; + if (reg->lin_layer != 0 && nf_linearize[reg->pf] == NULL) + return -1; + + reg->max_lin_layer = reg->lin_layer; + spin_lock_bh(&nf_hook_lock); list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) { if (reg->priority < ((struct nf_hook_ops *)i)->priority) break; } list_add_rcu(®->list, i->prev); + __calc_lin_layer(reg->pf, reg->hooknum); spin_unlock_bh(&nf_hook_lock); synchronize_net(); @@ -80,6 +108,7 @@ void nf_unregister_hook(struct nf_hook_o { spin_lock_bh(&nf_hook_lock); list_del_rcu(®->list); + __calc_lin_layer(reg->pf, reg->hooknum); spin_unlock_bh(&nf_hook_lock); synchronize_net(); @@ -349,6 +378,8 @@ static unsigned int nf_iterate(struct li int (*okfn)(struct sk_buff *), int hook_thresh) { + unsigned int max_lin_layer = 0; + /* * The caller must not block between calls to this * function because of risk of continuing from deleted element. @@ -359,6 +390,23 @@ static unsigned int nf_iterate(struct li if (hook_thresh > elem->priority) continue; + /* Ordinarily linearizing is required only once. But may be + required if a element is added/deleted during the iteration + or the previous element rearranges skb. */ + if (max_lin_layer < elem->max_lin_layer) { + max_lin_layer = elem->max_lin_layer; + if(!nf_linearize[elem->pf](skb, max_lin_layer)) { + if(net_ratelimit()) + printk("failed to partially linearize " + "skb. dropping...\n"); + + return NF_DROP; + } + + if (elem->lin_flags & NF_LIN_MAY_FRAG) + max_lin_layer = 0; + } + /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ switch (elem->hook(hook, skb, indev, outdev, okfn)) { @@ -735,6 +783,27 @@ pull_skb: EXPORT_SYMBOL(skb_ip_make_writable); #endif /*CONFIG_INET*/ +int skb_make_readable(struct sk_buff **pskb, unsigned int readable_len) +{ + if (likely(readable_len <= skb_headlen(*pskb))) + return 1; + + if (unlikely(readable_len > (*pskb)->len)) + return 0; + + if (skb_shared(*pskb)) { + struct sk_buff *n; + + n = skb_copy(*pskb, GFP_ATOMIC); + if (!n) + return 0; + *pskb = n; + } else if (!pskb_may_pull(*pskb, readable_len)) + return 0; + + return 1; +} + /* Internal logging interface, which relies on the real LOG target modules */ @@ -808,10 +877,17 @@ EXPORT_SYMBOL(nf_log_packet); with it. */ void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *); +#ifdef CONFIG_INET +extern int ip_linearize_headers(struct sk_buff **pskb, unsigned int layer); +#endif + void __init netfilter_init(void) { int i, h; +#ifdef CONFIG_INET + nf_linearize[PF_INET] = ip_linearize_headers; +#endif for (i = 0; i < NPROTO; i++) { for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); diff -X dontdiff -Nurp linux-2.6.8-rc3/net/ipv4/netfilter/Makefile linux-2.6.8-rc3-linearize/net/ipv4/netfilter/Makefile --- linux-2.6.8-rc3/net/ipv4/netfilter/Makefile 2004-08-04 20:25:16.000000000 +0900 +++ linux-2.6.8-rc3-linearize/net/ipv4/netfilter/Makefile 2004-08-08 02:01:07.974533984 +0900 @@ -98,3 +98,5 @@ obj-$(CONFIG_IP_NF_COMPAT_IPCHAINS) += i obj-$(CONFIG_IP_NF_COMPAT_IPFWADM) += ipfwadm.o obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o + +obj-y += ip_linearize.o diff -X dontdiff -Nurp linux-2.6.8-rc3/net/ipv4/netfilter/ip_linearize.c linux-2.6.8-rc3-linearize/net/ipv4/netfilter/ip_linearize.c --- linux-2.6.8-rc3/net/ipv4/netfilter/ip_linearize.c 1970-01-01 09:00:00.000000000 +0900 +++ linux-2.6.8-rc3-linearize/net/ipv4/netfilter/ip_linearize.c 2004-08-08 02:01:07.975533832 +0900 @@ -0,0 +1,101 @@ +/* + * Copyright (C)2004 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: + * Yasuyuki Kozakai + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* + * linearize skb up to specified layer. If packet is too short, whole of skb is + * linearized. NOTICE: skb is readable but may not writable because of being + * shared or cloned. If you want to mangle the contents of skb, please use + * ip_make_writable(). + */ +int ip_linearize_headers(struct sk_buff **pskb, unsigned int layer) +{ + unsigned int totlen; + + if (layer <= NF_IP_LIN_NET) + return 1; + + totlen = (*pskb)->nh.iph->ihl*4; + + if (layer == NF_IP_LIN_TRANS) { + if (ntohs((*pskb)->nh.iph->frag_off) & IP_OFFSET) + return 1; + + switch ((*pskb)->nh.iph->protocol) { + case IPPROTO_TCP: { + struct tcphdr hdr; + int ret; + + /* truncated */ + if ((*pskb)->len - totlen < sizeof(hdr)) { + totlen = (*pskb)->len; + break; + } + + ret = skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4, + &hdr, sizeof(hdr)); + if (ret) { + DEBUGP("ip_linearize: failed to copy bits.\n"); + return 0; + } + + totlen += max_t(unsigned int, sizeof(hdr), hdr.doff*4); + break; + } + case IPPROTO_UDP: + totlen += sizeof(struct udphdr); + break; + case IPPROTO_ICMP: + totlen += sizeof(struct icmphdr); + break; + /* Insert other cases here as desired */ + } + } else if (layer == NF_IP_LIN_ALL) + totlen = (*pskb)->len; + else { + /* unknown layer */ + DEBUGP("ip_linearize: unknown layer\n"); + return 0; + } + + if (totlen > (*pskb)->len) + totlen = (*pskb)->len; + + return skb_make_readable(pskb, totlen); +} + +EXPORT_SYMBOL(ip_linearize_headers);