* Re: ROUTE patch
2009-02-24 15:33 ` Patrick McHardy
@ 2009-02-25 3:14 ` Abhishek Singh
2009-02-25 10:00 ` TEE patch [was: ROUTE patch] Jan Engelhardt
1 sibling, 0 replies; 13+ messages in thread
From: Abhishek Singh @ 2009-02-25 3:14 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Jan Engelhardt, netfilter-devel
[-- Attachment #1: Type: text/plain, Size: 13263 bytes --]
The current code for the patch is like this:
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/route.h>
#include <linux/version.h>
#include <linux/if_arp.h>
#include <linux/ip.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/nf_conntrack_common.h>
//#include <linux/netfilter_ipv4/ipt_ROUTE.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/icmp.h>
#include <net/checksum.h>
#include "ipt_ROUTE.h"
#if 0
#define DEBUGP printk
#else
#define DEBUGP(format, args...)
#endif
;
/* Try to route the packet according to the routing keys specified in
* route_info. Keys are :
* - ifindex :
* 0 if no oif preferred,
* otherwise set to the index of the desired oif
* - route_info->gw :
* 0 if no gateway specified,
* otherwise set to the next host to which the pkt must be routed
* If success, skb->dev is the output device to which the packet must
* be sent and skb->dst is not NULL
*
* RETURN: -1 if an error occured
* 1 if the packet was succesfully routed to the
* destination desired
* 0 if the kernel routing table could not route the packet
* according to the keys specified
*/
static int route(struct sk_buff *skb,
unsigned int ifindex,
const struct ipt_route_target_info *route_info)
{
int err;
struct rtable *rt;
struct iphdr *iph = ip_hdr(skb);
struct flowi fl = {
.oif = ifindex,
.nl_u = {
.ip4_u = {
.daddr = iph->daddr,
.saddr = 0,
.tos = RT_TOS(iph->tos),
.scope = RT_SCOPE_UNIVERSE,
}
}
};
/* The destination address may be overloaded by the target */
if (route_info->gw)
fl.fl4_dst = route_info->gw;
/* Trying to route the packet using the standard routing table. */
if ((err = ip_route_output_key(&init_net, &rt, &fl))) {
if (net_ratelimit())
DEBUGP("ipt_ROUTE: couldn't route pkt (err: %i)",err);
return -1;
}
/* Drop old route. */
dst_release(skb->dst);
skb->dst = NULL;
/* Success if no oif specified or if the oif correspond to the
* one desired */
if (!ifindex || rt->u.dst.dev->ifindex == ifindex) {
skb->dst = &rt->u.dst;
skb->dev = skb->dst->dev;
skb->protocol = htons(ETH_P_IP);
return 1;
}
/* The interface selected by the routing table is not the one
* specified by the user. This may happen because the dst address
* is one of our own addresses.
*/
if (net_ratelimit())
DEBUGP("ipt_ROUTE: failed to route as desired gw=%u.%u.%u.%u oif=%i
(got oif=%i)\n",
NIPQUAD(route_info->gw), ifindex, rt->u.dst.dev->ifindex);
return 0;
}
/* Stolen from ip_finish_output2
* PRE : skb->dev is set to the device we are leaving by
* skb->dst is not NULL
* POST: the packet is sent with the link layer header pushed
* the packet is destroyed
*/
static void ip_direct_send(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
struct rtable *rt = (struct rtable *)dst;
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
if (rt->rt_type == RTN_MULTICAST)
IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
else if (rt->rt_type == RTN_BROADCAST)
IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS);
/* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
struct sk_buff *skb2;
skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
if (skb2 == NULL) {
kfree_skb(skb);
return;
}
if (skb->sk)
skb_set_owner_w(skb2, skb->sk);
kfree_skb(skb);
skb = skb2;
}
if (dst->hh){
neigh_hh_output(dst->hh, skb);
return;
}
else if (dst->neighbour){
dst->neighbour->output(skb);
return;
}
if (net_ratelimit())
printk(KERN_DEBUG "ip_finish_output2: No header cache
and no neighbour!\n");
kfree_skb(skb);
return;
}
/* PRE : skb->dev is set to the device we are leaving by
* POST: - the packet is directly sent to the skb->dev device, without
* pushing the link layer header.
* - the packet is destroyed
*/
static inline int dev_direct_send(struct sk_buff *skb)
{
return dev_queue_xmit(skb);
}
static unsigned int route_oif(const struct ipt_route_target_info *route_info,
struct sk_buff *skb)
{
unsigned int ifindex = 0;
struct net_device *dev_out = NULL;
/* The user set the interface name to use.
* Getting the current interface index.
*/
if ((dev_out = dev_get_by_name(&init_net, route_info->oif))) {
ifindex = dev_out->ifindex;
} else {
/* Unknown interface name : packet dropped */
if (net_ratelimit())
DEBUGP("ipt_ROUTE: oif interface %s not found\n", route_info->oif);
return NF_DROP;
}
/* Trying the standard way of routing packets */
switch (route(skb, ifindex, route_info)) {
case 1:
dev_put(dev_out);
if (route_info->flags & IPT_ROUTE_CONTINUE)
return IPT_CONTINUE;
ip_direct_send(skb);
return NF_STOLEN;
case 0:
/* Failed to send to oif. Trying the hard way */
if (route_info->flags & IPT_ROUTE_CONTINUE)
return NF_DROP;
if (net_ratelimit())
DEBUGP("ipt_ROUTE: forcing the use of %i\n",
ifindex);
/* We have to force the use of an interface.
* This interface must be a tunnel interface since
* otherwise we can't guess the hw address for
* the packet. For a tunnel interface, no hw address
* is needed.
*/
if ((dev_out->type != ARPHRD_TUNNEL)
&& (dev_out->type != ARPHRD_IPGRE)) {
if (net_ratelimit())
DEBUGP("ipt_ROUTE: can't guess the hw addr !\n");
dev_put(dev_out);
return NF_DROP;
}
/* Send the packet. This will also free skb
* Do not go through the POST_ROUTING hook because
* skb->dst is not set and because it will probably
* get confused by the destination IP address.
*/
skb->dev = dev_out;
dev_direct_send(skb);
dev_put(dev_out);
return NF_STOLEN;
default:
/* Unexpected error */
dev_put(dev_out);
return NF_DROP;
}
}
static unsigned int route_iif(const struct ipt_route_target_info *route_info,
struct sk_buff *skb)
{
struct net_device *dev_in = NULL;
/* Getting the current interface index. */
if (!(dev_in = dev_get_by_name(&init_net, route_info->iif))) {
if (net_ratelimit())
DEBUGP("ipt_ROUTE: iif interface %s not found\n", route_info->iif);
return NF_DROP;
}
skb->dev = dev_in;
dst_release(skb->dst);
skb->dst = NULL;
netif_rx(skb);
dev_put(dev_in);
return NF_STOLEN;
}
static unsigned int route_gw(const struct ipt_route_target_info *route_info,
struct sk_buff *skb)
{
if (route(skb, 0, route_info)!=1)
return NF_DROP;
if (route_info->flags & IPT_ROUTE_CONTINUE)
return IPT_CONTINUE;
ip_direct_send(skb);
return NF_STOLEN;
}
/* To detect and deter routed packet loopback when using the --tee option,
* we take a page out of the raw.patch book: on the copied skb, we set up
* a fake ->nfct entry, pointing to the local &route_tee_track. We skip
* routing packets when we see they already have that ->nfct.
*/
static struct nf_conn route_tee_track;
static unsigned int route_tg(struct sk_buff *pskb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
const void *targinfo)
{
const struct ipt_route_target_info *route_info = targinfo;
struct sk_buff *skb;
unsigned int res;
skb = pskb;
if (skb->nfct == &route_tee_track.ct_general) {
/* Loopback - a packet we already routed, is to be
* routed another time. Avoid that, now.
*/
if (net_ratelimit())
DEBUGP(KERN_DEBUG "ipt_ROUTE: loopback - DROP!\n");
return NF_DROP;
}
/* If we are at PREROUTING or INPUT hook
* the TTL isn't decreased by the IP stack
*/
if (hooknum == NF_INET_PRE_ROUTING ||
hooknum == NF_INET_LOCAL_IN) {
struct iphdr *iph = ip_hdr(skb);
if (iph->ttl <= 1) {
struct rtable *rt;
struct flowi fl = {
.oif = 0,
.nl_u = {
.ip4_u = {
.daddr = iph->daddr,
.saddr = iph->saddr,
.tos = RT_TOS(iph->tos),
.scope = ((iph->tos & RTO_ONLINK) ?
RT_SCOPE_LINK :
RT_SCOPE_UNIVERSE)
}
}
};
if (ip_route_output_key(&init_net, &rt, &fl)) {
return NF_DROP;
}
if (skb->dev == rt->u.dst.dev) {
/* Drop old route. */
dst_release(skb->dst);
skb->dst = &rt->u.dst;
/* this will traverse normal stack, and
* thus call conntrack on the icmp packet */
icmp_send(skb, ICMP_TIME_EXCEEDED,
ICMP_EXC_TTL, 0);
}
return NF_DROP;
}
/*
* If we are at INPUT the checksum must be recalculated since
* the length could change as the result of a defragmentation.
*/
if(hooknum == NF_INET_LOCAL_IN) {
iph->ttl = iph->ttl - 1;
iph->check = 0;
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
} else {
ip_decrease_ttl(iph);
}
}
if ((route_info->flags & IPT_ROUTE_TEE)) {
/*
* Copy the *pskb, and route the copy. Will later return
* IPT_CONTINUE for the original skb, which should continue
* on its way as if nothing happened. The copy should be
* independantly delivered to the ROUTE --gw.
*/
skb = skb_copy(pskb, GFP_ATOMIC);
if (!skb) {
if (net_ratelimit())
DEBUGP(KERN_DEBUG "ipt_ROUTE: copy failed!\n");
return IPT_CONTINUE;
}
}
/* Tell conntrack to forget this packet since it may get confused
* when a packet is leaving with dst address == our address.
* Good idea ? Dunno. Need advice.
*
* NEW: mark the skb with our &route_tee_track, so we avoid looping
* on any already routed packet.
*/
if (!(route_info->flags & IPT_ROUTE_CONTINUE)) {
nf_conntrack_put(skb->nfct);
skb->nfct = &route_tee_track.ct_general;
skb->nfctinfo = IP_CT_NEW;
nf_conntrack_get(skb->nfct);
}
if (route_info->oif[0] != '\0') {
res = route_oif(route_info, skb);
} else if (route_info->iif[0] != '\0') {
res = route_iif(route_info, skb);
} else if (route_info->gw) {
res = route_gw(route_info, skb);
} else {
if (net_ratelimit())
DEBUGP(KERN_DEBUG "ipt_ROUTE: no parameter !\n");
res = IPT_CONTINUE;
}
if ((route_info->flags & IPT_ROUTE_TEE))
res = IPT_CONTINUE;
return res;
}
static bool
route_tg_checkentry(const char *tablename, const void *e_void,
const struct xt_target *target, void *targinfo,
unsigned int hook_mask)
{
if (strcmp(tablename, "mangle") != 0) {
printk("ipt_ROUTE: bad table `%s', use the `mangle' table.\n",
tablename);
return false;
}
/* No need of comparing the hook, they don't exist in kernel space anymore.
* Only exist in userspace. The check above for mangle table is also redundant
*/
if (hook_mask & ~( (1 << NF_INET_PRE_ROUTING)
| (1 << NF_INET_LOCAL_IN)
| (1 << NF_INET_FORWARD)
| (1 << NF_INET_LOCAL_OUT)
| (1 << NF_INET_POST_ROUTING))) {
printk("ipt_ROUTE: bad hook\n");
return false;
}
return true;
}
static struct xt_target route_tg_reg __read_mostly = {
.name = "ROUTE",
.family = AF_INET,
.target = route_tg,
.targetsize = sizeof(struct ipt_route_target_info),
.table = "mangle",
.checkentry = route_tg_checkentry,
.me = THIS_MODULE,
};
static int __init init(void)
{
/* Set up fake conntrack (stolen from raw.patch):
- to never be deleted, not in any hashes */
atomic_set(&route_tee_track.ct_general.use, 1);
/* - and look it like as a confirmed connection */
set_bit(IPS_CONFIRMED_BIT, &route_tee_track.status);
/* Initialize fake conntrack so that NAT will skip it */
route_tee_track.status |= IPS_NAT_DONE_MASK;
return xt_register_target(&route_tg_reg);
}
static void __exit fini(void)
{
xt_unregister_target(&route_tg_reg);
}
module_init(init);
module_exit(fini);
Regards
Abhishek
On Tue, Feb 24, 2009 at 9:03 PM, Patrick McHardy <kaber@trash.net> wrote:
> Jan Engelhardt wrote:
>>
>> On Tuesday 2009-02-24 14:41, Patrick McHardy wrote:
>>>
>>> Jan Engelhardt wrote:
>>>>
>>>> On Tuesday 2009-02-24 09:59, Abhishek Singh wrote:
>>>>>
>>>>> What I would like to know is that if someone would like to add it to
>>>>> the main iptables tree and the patchomatic repository. I am not sure
>>>>> how to go about it. If someone is interested, please let me know. I
>>>>> shall contribute the code and if would be happy to incorporate review
>>>>> comments by other developers.
>>>>
>>>> [omg timeline!]
>>>>
>>>> Short answer, no. There is iproute2 and xt_TEE which replace it,
>>>> and even patchomatic is gone.
>>>>
>>>> [/me takes a leap forward]
>>>
>>> Perhaps we can finally get this merged. IIRC the only reason against
>>> it is the IP layer duplication instead of simply using dst_output().
>>>
>> It cannot use dst_output because that would cause reentrancy into
>> iptablse.
>> Want a patch, though?
>
> I would like to have a look at the current patch, yes. Don't
> bother fixing anything though, I mainly want to have a look
> at the routing part.
>
[-- Attachment #2: ipt_ROUTE.c --]
[-- Type: application/octet-stream, Size: 12689 bytes --]
/*
* This implements the ROUTE target, which enables you to setup unusual
* routes not supported by the standard kernel routing table.
*
* Copyright (C) 2008 Abhishek Kumar Singh <abhishek1.singh@hsc.com>
*
* v 1.11 2008/12/12
*
* This software is distributed under GNU GPL v2, 1991
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/route.h>
#include <linux/version.h>
#include <linux/if_arp.h>
#include <linux/ip.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/nf_conntrack_common.h>
//#include <linux/netfilter_ipv4/ipt_ROUTE.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/icmp.h>
#include <net/checksum.h>
#include "ipt_ROUTE.h"
#if 0
#define DEBUGP printk
#else
#define DEBUGP(format, args...)
#endif
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Abhishek Kumar Singh <abhishek1.singh@hsc.com>");
MODULE_DESCRIPTION("iptables ROUTE target module");
/* Try to route the packet according to the routing keys specified in
* route_info. Keys are :
* - ifindex :
* 0 if no oif preferred,
* otherwise set to the index of the desired oif
* - route_info->gw :
* 0 if no gateway specified,
* otherwise set to the next host to which the pkt must be routed
* If success, skb->dev is the output device to which the packet must
* be sent and skb->dst is not NULL
*
* RETURN: -1 if an error occured
* 1 if the packet was succesfully routed to the
* destination desired
* 0 if the kernel routing table could not route the packet
* according to the keys specified
*/
static int route(struct sk_buff *skb,
unsigned int ifindex,
const struct ipt_route_target_info *route_info)
{
int err;
struct rtable *rt;
struct iphdr *iph = ip_hdr(skb);
struct flowi fl = {
.oif = ifindex,
.nl_u = {
.ip4_u = {
.daddr = iph->daddr,
.saddr = 0,
.tos = RT_TOS(iph->tos),
.scope = RT_SCOPE_UNIVERSE,
}
}
};
/* The destination address may be overloaded by the target */
if (route_info->gw)
fl.fl4_dst = route_info->gw;
/* Trying to route the packet using the standard routing table. */
if ((err = ip_route_output_key(&init_net, &rt, &fl))) {
if (net_ratelimit())
DEBUGP("ipt_ROUTE: couldn't route pkt (err: %i)",err);
return -1;
}
/* Drop old route. */
dst_release(skb->dst);
skb->dst = NULL;
/* Success if no oif specified or if the oif correspond to the
* one desired */
if (!ifindex || rt->u.dst.dev->ifindex == ifindex) {
skb->dst = &rt->u.dst;
skb->dev = skb->dst->dev;
skb->protocol = htons(ETH_P_IP);
return 1;
}
/* The interface selected by the routing table is not the one
* specified by the user. This may happen because the dst address
* is one of our own addresses.
*/
if (net_ratelimit())
DEBUGP("ipt_ROUTE: failed to route as desired gw=%u.%u.%u.%u oif=%i (got oif=%i)\n",
NIPQUAD(route_info->gw), ifindex, rt->u.dst.dev->ifindex);
return 0;
}
/* Stolen from ip_finish_output2
* PRE : skb->dev is set to the device we are leaving by
* skb->dst is not NULL
* POST: the packet is sent with the link layer header pushed
* the packet is destroyed
*/
static void ip_direct_send(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
struct rtable *rt = (struct rtable *)dst;
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
if (rt->rt_type == RTN_MULTICAST)
IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
else if (rt->rt_type == RTN_BROADCAST)
IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS);
/* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
struct sk_buff *skb2;
skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
if (skb2 == NULL) {
kfree_skb(skb);
return;
}
if (skb->sk)
skb_set_owner_w(skb2, skb->sk);
kfree_skb(skb);
skb = skb2;
}
if (dst->hh){
neigh_hh_output(dst->hh, skb);
return;
}
else if (dst->neighbour){
dst->neighbour->output(skb);
return;
}
if (net_ratelimit())
printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
kfree_skb(skb);
return;
}
/* PRE : skb->dev is set to the device we are leaving by
* POST: - the packet is directly sent to the skb->dev device, without
* pushing the link layer header.
* - the packet is destroyed
*/
static inline int dev_direct_send(struct sk_buff *skb)
{
return dev_queue_xmit(skb);
}
static unsigned int route_oif(const struct ipt_route_target_info *route_info,
struct sk_buff *skb)
{
unsigned int ifindex = 0;
struct net_device *dev_out = NULL;
/* The user set the interface name to use.
* Getting the current interface index.
*/
if ((dev_out = dev_get_by_name(&init_net, route_info->oif))) {
ifindex = dev_out->ifindex;
} else {
/* Unknown interface name : packet dropped */
if (net_ratelimit())
DEBUGP("ipt_ROUTE: oif interface %s not found\n", route_info->oif);
return NF_DROP;
}
/* Trying the standard way of routing packets */
switch (route(skb, ifindex, route_info)) {
case 1:
dev_put(dev_out);
if (route_info->flags & IPT_ROUTE_CONTINUE)
return IPT_CONTINUE;
ip_direct_send(skb);
return NF_STOLEN;
case 0:
/* Failed to send to oif. Trying the hard way */
if (route_info->flags & IPT_ROUTE_CONTINUE)
return NF_DROP;
if (net_ratelimit())
DEBUGP("ipt_ROUTE: forcing the use of %i\n",
ifindex);
/* We have to force the use of an interface.
* This interface must be a tunnel interface since
* otherwise we can't guess the hw address for
* the packet. For a tunnel interface, no hw address
* is needed.
*/
if ((dev_out->type != ARPHRD_TUNNEL)
&& (dev_out->type != ARPHRD_IPGRE)) {
if (net_ratelimit())
DEBUGP("ipt_ROUTE: can't guess the hw addr !\n");
dev_put(dev_out);
return NF_DROP;
}
/* Send the packet. This will also free skb
* Do not go through the POST_ROUTING hook because
* skb->dst is not set and because it will probably
* get confused by the destination IP address.
*/
skb->dev = dev_out;
dev_direct_send(skb);
dev_put(dev_out);
return NF_STOLEN;
default:
/* Unexpected error */
dev_put(dev_out);
return NF_DROP;
}
}
static unsigned int route_iif(const struct ipt_route_target_info *route_info,
struct sk_buff *skb)
{
struct net_device *dev_in = NULL;
/* Getting the current interface index. */
if (!(dev_in = dev_get_by_name(&init_net, route_info->iif))) {
if (net_ratelimit())
DEBUGP("ipt_ROUTE: iif interface %s not found\n", route_info->iif);
return NF_DROP;
}
skb->dev = dev_in;
dst_release(skb->dst);
skb->dst = NULL;
netif_rx(skb);
dev_put(dev_in);
return NF_STOLEN;
}
static unsigned int route_gw(const struct ipt_route_target_info *route_info,
struct sk_buff *skb)
{
if (route(skb, 0, route_info)!=1)
return NF_DROP;
if (route_info->flags & IPT_ROUTE_CONTINUE)
return IPT_CONTINUE;
ip_direct_send(skb);
return NF_STOLEN;
}
/* To detect and deter routed packet loopback when using the --tee option,
* we take a page out of the raw.patch book: on the copied skb, we set up
* a fake ->nfct entry, pointing to the local &route_tee_track. We skip
* routing packets when we see they already have that ->nfct.
*/
static struct nf_conn route_tee_track;
static unsigned int route_tg(struct sk_buff *pskb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
const void *targinfo)
{
const struct ipt_route_target_info *route_info = targinfo;
struct sk_buff *skb;
unsigned int res;
skb = pskb;
if (skb->nfct == &route_tee_track.ct_general) {
/* Loopback - a packet we already routed, is to be
* routed another time. Avoid that, now.
*/
if (net_ratelimit())
DEBUGP(KERN_DEBUG "ipt_ROUTE: loopback - DROP!\n");
return NF_DROP;
}
/* If we are at PREROUTING or INPUT hook
* the TTL isn't decreased by the IP stack
*/
if (hooknum == NF_INET_PRE_ROUTING ||
hooknum == NF_INET_LOCAL_IN) {
struct iphdr *iph = ip_hdr(skb);
if (iph->ttl <= 1) {
struct rtable *rt;
struct flowi fl = {
.oif = 0,
.nl_u = {
.ip4_u = {
.daddr = iph->daddr,
.saddr = iph->saddr,
.tos = RT_TOS(iph->tos),
.scope = ((iph->tos & RTO_ONLINK) ?
RT_SCOPE_LINK :
RT_SCOPE_UNIVERSE)
}
}
};
if (ip_route_output_key(&init_net, &rt, &fl)) {
return NF_DROP;
}
if (skb->dev == rt->u.dst.dev) {
/* Drop old route. */
dst_release(skb->dst);
skb->dst = &rt->u.dst;
/* this will traverse normal stack, and
* thus call conntrack on the icmp packet */
icmp_send(skb, ICMP_TIME_EXCEEDED,
ICMP_EXC_TTL, 0);
}
return NF_DROP;
}
/*
* If we are at INPUT the checksum must be recalculated since
* the length could change as the result of a defragmentation.
*/
if(hooknum == NF_INET_LOCAL_IN) {
iph->ttl = iph->ttl - 1;
iph->check = 0;
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
} else {
ip_decrease_ttl(iph);
}
}
if ((route_info->flags & IPT_ROUTE_TEE)) {
/*
* Copy the *pskb, and route the copy. Will later return
* IPT_CONTINUE for the original skb, which should continue
* on its way as if nothing happened. The copy should be
* independantly delivered to the ROUTE --gw.
*/
skb = skb_copy(pskb, GFP_ATOMIC);
if (!skb) {
if (net_ratelimit())
DEBUGP(KERN_DEBUG "ipt_ROUTE: copy failed!\n");
return IPT_CONTINUE;
}
}
/* Tell conntrack to forget this packet since it may get confused
* when a packet is leaving with dst address == our address.
* Good idea ? Dunno. Need advice.
*
* NEW: mark the skb with our &route_tee_track, so we avoid looping
* on any already routed packet.
*/
if (!(route_info->flags & IPT_ROUTE_CONTINUE)) {
nf_conntrack_put(skb->nfct);
skb->nfct = &route_tee_track.ct_general;
skb->nfctinfo = IP_CT_NEW;
nf_conntrack_get(skb->nfct);
}
if (route_info->oif[0] != '\0') {
res = route_oif(route_info, skb);
} else if (route_info->iif[0] != '\0') {
res = route_iif(route_info, skb);
} else if (route_info->gw) {
res = route_gw(route_info, skb);
} else {
if (net_ratelimit())
DEBUGP(KERN_DEBUG "ipt_ROUTE: no parameter !\n");
res = IPT_CONTINUE;
}
if ((route_info->flags & IPT_ROUTE_TEE))
res = IPT_CONTINUE;
return res;
}
/*
* We are using the mangle table but we are actually not modifying the packet,
* but modifying its output device. Hence we don't really need to check
* anything. For us, all packets are welcome.
*/
static bool
route_tg_checkentry(const char *tablename, const void *e_void,
const struct xt_target *target, void *targinfo,
unsigned int hook_mask)
{
if (strcmp(tablename, "mangle") != 0) {
printk("ipt_ROUTE: bad table `%s', use the `mangle' table.\n",
tablename);
return false;
}
/* No need of comparing the hook, they don't exist in kernel space anymore.
* Only exist in userspace. The check above for mangle table is also redundant
*/
if (hook_mask & ~( (1 << NF_INET_PRE_ROUTING)
| (1 << NF_INET_LOCAL_IN)
| (1 << NF_INET_FORWARD)
| (1 << NF_INET_LOCAL_OUT)
| (1 << NF_INET_POST_ROUTING))) {
printk("ipt_ROUTE: bad hook\n");
return false;
}
return true;
}
static struct xt_target route_tg_reg __read_mostly = {
.name = "ROUTE",
.family = AF_INET,
.target = route_tg,
.targetsize = sizeof(struct ipt_route_target_info),
.table = "mangle",
.checkentry = route_tg_checkentry,
.me = THIS_MODULE,
};
static int __init init(void)
{
/* Set up fake conntrack (stolen from raw.patch):
- to never be deleted, not in any hashes */
atomic_set(&route_tee_track.ct_general.use, 1);
/* - and look it like as a confirmed connection */
set_bit(IPS_CONFIRMED_BIT, &route_tee_track.status);
/* Initialize fake conntrack so that NAT will skip it */
route_tee_track.status |= IPS_NAT_DONE_MASK;
return xt_register_target(&route_tg_reg);
}
static void __exit fini(void)
{
xt_unregister_target(&route_tg_reg);
}
module_init(init);
module_exit(fini);
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: TEE patch [was: ROUTE patch]
2009-02-24 15:33 ` Patrick McHardy
2009-02-25 3:14 ` Abhishek Singh
@ 2009-02-25 10:00 ` Jan Engelhardt
2009-02-25 10:19 ` Patrick McHardy
1 sibling, 1 reply; 13+ messages in thread
From: Jan Engelhardt @ 2009-02-25 10:00 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Abhishek Singh, netfilter-devel
On Tuesday 2009-02-24 16:33, Patrick McHardy wrote:
>>> Perhaps we can finally get this merged. IIRC the only reason against
>>> it is the IP layer duplication instead of simply using dst_output().
>>>
>> It cannot use dst_output because that would cause reentrancy into iptablse.
>> Want a patch, though?
>
> I would like to have a look at the current patch, yes. Don't
> bother fixing anything though, I mainly want to have a look
> at the routing part.
>
parent 499c9627ecb75566801d96f0e91c9da356b6a8c8 (v2.6.29-rc4-31-g499c962)
commit 26f785f037f6fae3c1dad8644561e62e79cfe414
Author: Jan Engelhardt <jengelh@medozas.de>
Date: Wed Feb 25 11:00:17 2009 +0100
netfilter: xtables: import xt_TEE target
The target can be used to duplicate packets and reroute them to
another destination on the local Ethernet segment.
TODO: Still has some issues: no neighbor discovery is done for IPv6
(meaning packets go nowhere if there is no neighbor entry) when TEE
sends a packet and the neighbor has not been looked up yet. I have no
idea why IPv6 won't do that, because it does work for IPv4/ARP.
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
include/linux/netfilter/xt_TEE.h | 8 +
net/netfilter/Kconfig | 10 +
net/netfilter/Makefile | 1 +
net/netfilter/xt_TEE.c | 319 ++++++++++++++++++++++++++++++
4 files changed, 338 insertions(+), 0 deletions(-)
create mode 100644 include/linux/netfilter/xt_TEE.h
create mode 100644 net/netfilter/xt_TEE.c
diff --git a/include/linux/netfilter/xt_TEE.h b/include/linux/netfilter/xt_TEE.h
new file mode 100644
index 0000000..83fa768
--- /dev/null
+++ b/include/linux/netfilter/xt_TEE.h
@@ -0,0 +1,8 @@
+#ifndef _XT_TEE_TARGET_H
+#define _XT_TEE_TARGET_H
+
+struct xt_tee_tginfo {
+ union nf_inet_addr gw;
+};
+
+#endif /* _XT_TEE_TARGET_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 0eb98b4..eeb8258 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -430,6 +430,16 @@ config NETFILTER_XT_TARGET_RATEEST
To compile it as a module, choose M here. If unsure, say N.
+config NETFILTER_XT_TARGET_TEE
+ tristate '"TEE" target support'
+ depends on NETFILTER_XTABLES
+ depends on NETFILTER_ADVANCED
+ depends on IP_NF_MANGLE || IP6_NF_MANGLE
+ ---help---
+ This option adds a "TEE" target, which enables you to duplicate
+ packets and route those duplicates to a different gateway.
+ The target has to be used inside the mangle table.
+
config NETFILTER_XT_TARGET_TPROXY
tristate '"TPROXY" target support (EXPERIMENTAL)'
depends on EXPERIMENTAL
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index da73ed2..ad2950c 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o
obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
new file mode 100644
index 0000000..e7958a9
--- /dev/null
+++ b/net/netfilter/xt_TEE.c
@@ -0,0 +1,319 @@
+/*
+ * "TEE" target extension for Xtables
+ * Copyright © Sebastian Claßen <sebastian.classen [at] freenet de>, 2007
+ * Jan Engelhardt <jengelh [at] medozas de>, 2007 - 2008
+ *
+ * based on ipt_ROUTE.c from Cédric de Launois
+ * <delaunois [at] info ucl ac be>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2, as published by the Free Software Foundation.
+ */
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/route.h>
+#include <linux/skbuff.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ip6_route.h>
+#include <net/route.h>
+#include <linux/netfilter/x_tables.h>
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+# define WITH_CONNTRACK 1
+# include <net/netfilter/nf_conntrack.h>
+static struct nf_conn tee_track;
+#endif
+
+#include <linux/netfilter/xt_TEE.h>
+
+static const union nf_inet_addr tee_zero_address;
+
+/*
+ * Try to route the packet according to the routing keys specified in
+ * route_info. Keys are :
+ * - ifindex :
+ * 0 if no oif preferred,
+ * otherwise set to the index of the desired oif
+ * - route_info->gateway :
+ * 0 if no gateway specified,
+ * otherwise set to the next host to which the pkt must be routed
+ * If success, skb->dev is the output device to which the packet must
+ * be sent and skb->dst is not NULL
+ *
+ * RETURN: false - if an error occured
+ * true - if the packet was succesfully routed to the
+ * destination desired
+ */
+static bool
+tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
+{
+ int err;
+ struct rtable *rt;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+ fl.nl_u.ip4_u.daddr = info->gw.ip;
+ fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
+
+ /* Trying to route the packet using the standard routing table. */
+ err = ip_route_output_key(&init_net, &rt, &fl);
+ if (err != 0) {
+ if (net_ratelimit())
+ pr_debug(KBUILD_MODNAME
+ ": could not route packet (%d)", err);
+ return false;
+ }
+
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+ skb->dev = skb->dst->dev;
+ skb->protocol = htons(ETH_P_IP);
+ return true;
+}
+
+static inline bool dev_hh_avail(const struct net_device *dev)
+{
+ return dev->header_ops != NULL;
+}
+
+/*
+ * Stolen from ip_finish_output2
+ * PRE : skb->dev is set to the device we are leaving by
+ * skb->dst is not NULL
+ * POST: the packet is sent with the link layer header pushed
+ * the packet is destroyed
+ */
+static void tee_tg_send(struct sk_buff *skb)
+{
+ const struct dst_entry *dst = skb->dst;
+ const struct net_device *dev = dst->dev;
+ unsigned int hh_len = LL_RESERVED_SPACE(dev);
+
+ /* Be paranoid, rather than too clever. */
+ if (unlikely(skb_headroom(skb) < hh_len && dev_hh_avail(dev))) {
+ struct sk_buff *skb2;
+
+ skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
+ if (skb2 == NULL) {
+ kfree_skb(skb);
+ return;
+ }
+ if (skb->sk != NULL)
+ skb_set_owner_w(skb2, skb->sk);
+ kfree_skb(skb);
+ skb = skb2;
+ }
+
+ if (dst->hh != NULL) {
+ neigh_hh_output(dst->hh, skb);
+ } else if (dst->neighbour != NULL) {
+ dst->neighbour->output(skb);
+ } else {
+ if (net_ratelimit())
+ pr_debug(KBUILD_MODNAME "no hdr & no neighbour cache!\n");
+ kfree_skb(skb);
+ }
+}
+
+/*
+ * To detect and deter routed packet loopback when using the --tee option, we
+ * take a page out of the raw.patch book: on the copied skb, we set up a fake
+ * ->nfct entry, pointing to the local &route_tee_track. We skip routing
+ * packets when we see they already have that ->nfct.
+ */
+static unsigned int
+tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_tee_tginfo *info = par->targinfo;
+
+#ifdef WITH_CONNTRACK
+ if (skb->nfct == &tee_track.ct_general) {
+ /*
+ * Loopback - a packet we already routed, is to be
+ * routed another time. Avoid that, now.
+ */
+ if (net_ratelimit())
+ pr_debug(KBUILD_MODNAME "loopback - DROP!\n");
+ return NF_DROP;
+ }
+#endif
+
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ return NF_DROP;
+
+ /*
+ * If we are in INPUT, the checksum must be recalculated since
+ * the length could have changed as a result of defragmentation.
+ */
+ if (par->hooknum == NF_INET_LOCAL_IN) {
+ struct iphdr *iph = ip_hdr(skb);
+ iph->check = 0;
+ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+ }
+
+ /*
+ * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
+ * the original skb, which should continue on its way as if nothing has
+ * happened. The copy should be independantly delivered to the TEE --gw.
+ */
+ skb = skb_copy(skb, GFP_ATOMIC);
+ if (skb == NULL) {
+ if (net_ratelimit())
+ pr_debug(KBUILD_MODNAME "copy failed!\n");
+ return XT_CONTINUE;
+ }
+
+#ifdef WITH_CONNTRACK
+ /*
+ * Tell conntrack to forget this packet since it may get confused
+ * when a packet is leaving with dst address == our address.
+ * Good idea? Dunno. Need advice.
+ *
+ * NEW: mark the skb with our &tee_track, so we avoid looping
+ * on any already routed packet.
+ */
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = &tee_track.ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+ nf_conntrack_get(skb->nfct);
+#endif
+
+ /*
+ * Normally, we would just use ip_local_out. Because iph->check is
+ * already correct, we could take a shortcut and call dst_output
+ * [forwards to ip_output] directly. ip_output however will invoke
+ * Netfilter hooks and cause reentrancy. So we skip that too and go
+ * directly to ip_finish_output. Since we should not do XFRM, control
+ * passes to ip_finish_output2. That function is not exported, so it is
+ * copied here as tee_ip_direct_send.
+ *
+ * We do no XFRM on the cloned packet on purpose! The choice of
+ * iptables match options will control whether the raw packet or the
+ * transformed version is cloned.
+ *
+ * Also on purpose, no fragmentation is done, to preserve the
+ * packet as best as possible.
+ */
+ if (tee_tg_route4(skb, info))
+ tee_tg_send(skb);
+
+ return XT_CONTINUE;
+}
+
+static bool
+tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
+{
+ struct dst_entry *dst;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+ fl.nl_u.ip6_u.daddr = info->gw.in6;
+
+ dst = ip6_route_output(dev_net(skb->dev), NULL, &fl);
+ if (dst == NULL) {
+ if (net_ratelimit())
+ printk(KERN_ERR "ip6_route_output failed for tee\n");
+ return false;
+ }
+
+ dst_release(skb->dst);
+ skb->dst = dst;
+ skb->dev = skb->dst->dev;
+ skb->protocol = htons(ETH_P_IPV6);
+ return true;
+}
+
+static unsigned int
+tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_tee_tginfo *info = par->targinfo;
+
+ /* Try silence. */
+#ifdef WITH_CONNTRACK
+ if (skb->nfct == &tee_track.ct_general)
+ return NF_DROP;
+#endif
+
+ if ((skb = skb_copy(skb, GFP_ATOMIC)) == NULL)
+ return XT_CONTINUE;
+
+#ifdef WITH_CONNTRACK
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = &tee_track.ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+ nf_conntrack_get(skb->nfct);
+#endif
+ if (tee_tg_route6(skb, info))
+ tee_tg_send(skb);
+
+ return XT_CONTINUE;
+}
+
+static bool tee_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct xt_tee_tginfo *info = par->targinfo;
+
+ /* 0.0.0.0 and :: not allowed */
+ return memcmp(&info->gw, &tee_zero_address,
+ sizeof(tee_zero_address)) != 0;
+}
+
+static struct xt_target tee_tg_reg[] __read_mostly = {
+ {
+ .name = "TEE",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .table = "mangle",
+ .target = tee_tg4,
+ .targetsize = sizeof(struct xt_tee_tginfo),
+ .checkentry = tee_tg_check,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "TEE",
+ .revision = 0,
+ .family = NFPROTO_IPV6,
+ .table = "mangle",
+ .target = tee_tg6,
+ .targetsize = sizeof(struct xt_tee_tginfo),
+ .checkentry = tee_tg_check,
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init tee_tg_init(void)
+{
+#ifdef WITH_CONNTRACK
+ /*
+ * Set up fake conntrack (stolen from raw.patch):
+ * - to never be deleted, not in any hashes
+ */
+ atomic_set(&tee_track.ct_general.use, 1);
+
+ /* - and look it like as a confirmed connection */
+ set_bit(IPS_CONFIRMED_BIT, &tee_track.status);
+
+ /* Initialize fake conntrack so that NAT will skip it */
+ tee_track.status |= IPS_NAT_DONE_MASK;
+#endif
+
+ return xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+}
+
+static void __exit tee_tg_exit(void)
+{
+ xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+ /* [SC]: shoud not we cleanup tee_track here? */
+}
+
+module_init(tee_tg_init);
+module_exit(tee_tg_exit);
+MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: Reroute packet copy");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_TEE");
+MODULE_ALIAS("ip6t_TEE");
--
# Created with git-export-patch
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 13+ messages in thread