nf-next: reentrancy, new modules

netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* nf-next: reentrancy, new modules
@ 2010-04-13 12:37 Jan Engelhardt
  2010-04-13 12:37 ` [PATCH 1/8] netfilter: ipv6: move POSTROUTING invocation before fragmentation Jan Engelhardt
                   ` (8 more replies)
  0 siblings, 9 replies; 22+ messages in thread
From: Jan Engelhardt @ 2010-04-13 12:37 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel


Hi,


per request, the series reposted with fixes applied. I also tacked
sysrq and condition on.


The following changes since commit 9f93ff5be54108066372d1c4100c515d9d9acc1b:
  Alexey Dobriyan (1):
        Restore __ALIGN_MASK()

are available in the git repository at:

  git://dev.medozas.de/linux master

Jan Engelhardt (8):
      netfilter: ipv6: move POSTROUTING invocation before fragmentation
      netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING invocation
      netfilter: xtables: inclusion of xt_TEE
      netfilter: xtables2: make ip_tables reentrant
      netfilter: xt_TEE: have cloned packet travel through Xtables too
      netfilter: xtables: remove old comments about reentrancy
      netfilter: xtables: inclusion of xt_SYSRQ
      netfilter: xtables: inclusion of xt_condition

 include/linux/netfilter/Kbuild         |    2 +
 include/linux/netfilter/x_tables.h     |    7 +
 include/linux/netfilter/xt_TEE.h       |    8 +
 include/linux/netfilter/xt_condition.h |   14 ++
 net/ipv4/netfilter/arp_tables.c        |    6 +-
 net/ipv4/netfilter/ip_tables.c         |   67 +++---
 net/ipv4/netfilter/ipt_REJECT.c        |    3 -
 net/ipv6/ip6_output.c                  |   50 +++---
 net/ipv6/netfilter/ip6_tables.c        |   58 ++----
 net/ipv6/netfilter/ip6t_REJECT.c       |    3 -
 net/netfilter/Kconfig                  |   27 +++
 net/netfilter/Makefile                 |    3 +
 net/netfilter/x_tables.c               |   77 +++++++
 net/netfilter/xt_SYSRQ.c               |  354 ++++++++++++++++++++++++++++++++
 net/netfilter/xt_TEE.c                 |  228 ++++++++++++++++++++
 net/netfilter/xt_condition.c           |  243 ++++++++++++++++++++++
 16 files changed, 1048 insertions(+), 102 deletions(-)
 create mode 100644 include/linux/netfilter/xt_TEE.h
 create mode 100644 include/linux/netfilter/xt_condition.h
 create mode 100644 net/netfilter/xt_SYSRQ.c
 create mode 100644 net/netfilter/xt_TEE.c
 create mode 100644 net/netfilter/xt_condition.c

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/8] netfilter: ipv6: move POSTROUTING invocation before fragmentation
  2010-04-13 12:37 nf-next: reentrancy, new modules Jan Engelhardt
@ 2010-04-13 12:37 ` Jan Engelhardt
  2010-04-13 13:30   ` Patrick McHardy
  2010-04-13 12:37 ` [PATCH 2/8] netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING invocation Jan Engelhardt
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Jan Engelhardt @ 2010-04-13 12:37 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel

Patrick McHardy notes: "We used to invoke IPv4 POST_ROUTING after
fragmentation as well just to defragment the packets in conntrack
immediately afterwards, but that got changed during the
netfilter-ipsec integration. Ideally IPv6 would behave like IPv4."

This patch makes it so. Sending an oversized frame (e.g. `ping6
-s64000 -c1 ::1`) will now show up in POSTROUTING as a single skb
rather than multiple ones.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv6/ip6_output.c |   49 +++++++++++++++++++++++--------------------------
 1 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4535b7a..f314ba4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -82,22 +82,6 @@ int ip6_local_out(struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(ip6_local_out);
 
-static int ip6_output_finish(struct sk_buff *skb)
-{
-	struct dst_entry *dst = skb_dst(skb);
-
-	if (dst->hh)
-		return neigh_hh_output(dst->hh, skb);
-	else if (dst->neighbour)
-		return dst->neighbour->output(skb);
-
-	IP6_INC_STATS_BH(dev_net(dst->dev),
-			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
-	kfree_skb(skb);
-	return -EINVAL;
-
-}
-
 /* dev_loopback_xmit for use with netfilter. */
 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 {
@@ -111,8 +95,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 	return 0;
 }
 
-
-static int ip6_output2(struct sk_buff *skb)
+static int ip6_finish_output2(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *dev = dst->dev;
@@ -150,8 +133,15 @@ static int ip6_output2(struct sk_buff *skb)
 				skb->len);
 	}
 
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
-		       ip6_output_finish);
+	if (dst->hh)
+		return neigh_hh_output(dst->hh, skb);
+	else if (dst->neighbour)
+		return dst->neighbour->output(skb);
+
+	IP6_INC_STATS_BH(dev_net(dst->dev),
+			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+	kfree_skb(skb);
+	return -EINVAL;
 }
 
 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
@@ -162,21 +152,28 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 	       skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
 }
 
+static int ip6_finish_output(struct sk_buff *skb)
+{
+	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
+				dst_allfrag(skb_dst(skb)))
+		return ip6_fragment(skb, ip6_finish_output2);
+	else
+		return ip6_finish_output2(skb);
+}
+
 int ip6_output(struct sk_buff *skb)
 {
+	struct net_device *dev = skb_dst(skb)->dev;
 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 	if (unlikely(idev->cnf.disable_ipv6)) {
-		IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
+		IP6_INC_STATS(dev_net(dev), idev,
 			      IPSTATS_MIB_OUTDISCARDS);
 		kfree_skb(skb);
 		return 0;
 	}
 
-	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
-				dst_allfrag(skb_dst(skb)))
-		return ip6_fragment(skb, ip6_output2);
-	else
-		return ip6_output2(skb);
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
+		       ip6_finish_output);
 }
 
 /*
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/8] netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING invocation
  2010-04-13 12:37 nf-next: reentrancy, new modules Jan Engelhardt
  2010-04-13 12:37 ` [PATCH 1/8] netfilter: ipv6: move POSTROUTING invocation before fragmentation Jan Engelhardt
@ 2010-04-13 12:37 ` Jan Engelhardt
  2010-04-13 13:33   ` Patrick McHardy
  2010-04-13 12:37 ` [PATCH 3/8] netfilter: xtables: inclusion of xt_TEE Jan Engelhardt
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Jan Engelhardt @ 2010-04-13 12:37 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel

Similar to how IPv4's ip_output.c works, have ip6_output also check
the IPSKB_REROUTED flag. It will be set from xt_TEE for cloned packets
since Xtables can currently only deal with a single packet in flight
at a time.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Acked-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f314ba4..7e10f62 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -172,8 +172,9 @@ int ip6_output(struct sk_buff *skb)
 		return 0;
 	}
 
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
-		       ip6_finish_output);
+	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
+			    ip6_finish_output,
+			    !(IP6CB(skb)->flags & IPSKB_REROUTED));
 }
 
 /*
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/8] netfilter: xtables: inclusion of xt_TEE
  2010-04-13 12:37 nf-next: reentrancy, new modules Jan Engelhardt
  2010-04-13 12:37 ` [PATCH 1/8] netfilter: ipv6: move POSTROUTING invocation before fragmentation Jan Engelhardt
  2010-04-13 12:37 ` [PATCH 2/8] netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING invocation Jan Engelhardt
@ 2010-04-13 12:37 ` Jan Engelhardt
  2010-04-13 13:37   ` Patrick McHardy
  2010-04-13 13:45   ` Patrick McHardy
  2010-04-13 12:37 ` [PATCH 4/8] netfilter: xtables2: make ip_tables reentrant Jan Engelhardt
                   ` (5 subsequent siblings)
  8 siblings, 2 replies; 22+ messages in thread
From: Jan Engelhardt @ 2010-04-13 12:37 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel

xt_TEE can be used to clone and reroute a packet. This can for
example be used to copy traffic at a router for logging purposes
to another dedicated machine.

References: http://www.gossamer-threads.com/lists/iptables/devel/68781
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/Kbuild   |    1 +
 include/linux/netfilter/xt_TEE.h |    8 ++
 net/ipv4/ip_output.c             |    1 +
 net/ipv6/ip6_output.c            |    1 +
 net/netfilter/Kconfig            |    7 +
 net/netfilter/Makefile           |    1 +
 net/netfilter/xt_TEE.c           |  232 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 251 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netfilter/xt_TEE.h
 create mode 100644 net/netfilter/xt_TEE.c

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index a5a63e4..48767cd 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -16,6 +16,7 @@ header-y += xt_RATEEST.h
 header-y += xt_SECMARK.h
 header-y += xt_TCPMSS.h
 header-y += xt_TCPOPTSTRIP.h
+header-y += xt_TEE.h
 header-y += xt_TPROXY.h
 header-y += xt_comment.h
 header-y += xt_connbytes.h
diff --git a/include/linux/netfilter/xt_TEE.h b/include/linux/netfilter/xt_TEE.h
new file mode 100644
index 0000000..83fa768
--- /dev/null
+++ b/include/linux/netfilter/xt_TEE.h
@@ -0,0 +1,8 @@
+#ifndef _XT_TEE_TARGET_H
+#define _XT_TEE_TARGET_H
+
+struct xt_tee_tginfo {
+	union nf_inet_addr gw;
+};
+
+#endif /* _XT_TEE_TARGET_H */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f09135e..0abfdde 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -309,6 +309,7 @@ int ip_output(struct sk_buff *skb)
 			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
+EXPORT_SYMBOL_GPL(ip_output);
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7e10f62..307d8bf 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -176,6 +176,7 @@ int ip6_output(struct sk_buff *skb)
 			    ip6_finish_output,
 			    !(IP6CB(skb)->flags & IPSKB_REROUTED));
 }
+EXPORT_SYMBOL_GPL(ip6_output);
 
 /*
  *	xmit an sk_buff (used by TCP)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8055786..673a6c8 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -502,6 +502,13 @@ config NETFILTER_XT_TARGET_RATEEST
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_TEE
+	tristate '"TEE" - packet cloning to alternate destiantion'
+	depends on NETFILTER_ADVANCED
+	---help---
+	This option adds a "TEE" target with which a packet can be cloned and
+	this clone be rerouted to another nexthop.
+
 config NETFILTER_XT_TARGET_TPROXY
 	tristate '"TPROXY" target support (EXPERIMENTAL)'
 	depends on EXPERIMENTAL
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index cd31afe..14e3a8f 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
 
 # matches
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
new file mode 100644
index 0000000..79c5bf9
--- /dev/null
+++ b/net/netfilter/xt_TEE.c
@@ -0,0 +1,232 @@
+/*
+ *	"TEE" target extension for Xtables
+ *	Copyright Â© Sebastian ClaÃŸen <sebastian.classen [at] freenet de>, 2007
+ *	Jan Engelhardt <jengelh [at] medozas de>, 2007 - 2010
+ *
+ *	based on ipt_ROUTE.c from CÃ©dric de Launois
+ *	<delaunois@info.ucl.be>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 or later, as published by the Free Software Foundation.
+ */
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/route.h>
+#include <linux/skbuff.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/route.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_TEE.h>
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#	define WITH_CONNTRACK 1
+#	include <net/netfilter/nf_conntrack.h>
+static struct nf_conn tee_track;
+#endif
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#	define WITH_IPV6 1
+#endif
+
+static const union nf_inet_addr tee_zero_address;
+
+static bool
+tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	struct rtable *rt;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.nl_u.ip4_u.daddr = info->gw.ip;
+	fl.nl_u.ip4_u.tos   = RT_TOS(iph->tos);
+	fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
+
+	if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) != 0) {
+		kfree_skb(skb);
+		return false;
+	}
+	dst_release(skb_dst(skb));
+	skb_dst_set(skb, &rt->u.dst);
+	skb->dev      = rt->u.dst.dev;
+	skb->protocol = htons(ETH_P_IP);
+	return true;
+}
+
+static unsigned int
+tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+	struct iphdr *iph;
+
+	/*
+	 * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
+	 * the original skb, which should continue on its way as if nothing has
+	 * happened. The copy should be independently delivered to the TEE
+	 * --gateway.
+	 */
+	skb = skb_copy(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		return XT_CONTINUE;
+
+#ifdef WITH_CONNTRACK
+	nf_conntrack_put(skb->nfct);
+	skb->nfct     = &tee_track.ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+	nf_conntrack_get(skb->nfct);
+#endif
+	/*
+	 * If we are in PREROUTING/INPUT, the checksum must be recalculated
+	 * since the length could have changed as a result of defragmentation.
+	 *
+	 * We also decrease the TTL to mitigate potential TEE loops
+	 * between two hosts.
+	 *
+	 * Set %IP_DF so that the original source is notified of a potentially
+	 * decreased MTU on the clone route. IPv6 does this too.
+	 */
+	iph = ip_hdr(skb);
+	iph->frag_off |= htons(IP_DF);
+	if (par->hooknum == NF_INET_PRE_ROUTING ||
+	    par->hooknum == NF_INET_LOCAL_IN)
+		--iph->ttl;
+	ip_send_check(iph);
+
+	/*
+	 * Xtables is not reentrant currently, so a choice has to be made:
+	 * 1. return absolute verdict for the original and let the cloned
+	 *    packet travel through the chains
+	 * 2. let the original continue travelling and not pass the clone
+	 *    to Xtables.
+	 * #2 is chosen. Normally, we would use ip_local_out for the clone.
+	 * Because iph->check is already correct and we don't pass it to
+	 * Xtables anyway, a shortcut to dst_output [forwards to ip_output] can
+	 * be taken. %IPSKB_REROUTED needs to be set so that ip_output does not
+	 * invoke POSTROUTING on the cloned packet.
+	 */
+	IPCB(skb)->flags |= IPSKB_REROUTED;
+	if (tee_tg_route4(skb, info))
+		ip_output(skb);
+
+	return XT_CONTINUE;
+}
+
+#ifdef WITH_IPV6
+static bool
+tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct dst_entry *dst;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.nl_u.ip6_u.daddr = info->gw.in6;
+	fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
+				  (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
+
+	dst = ip6_route_output(dev_net(skb->dev), NULL, &fl);
+	if (dst == NULL) {
+		kfree_skb(skb);
+		return false;
+	}
+	dst_release(skb_dst(skb));
+	skb_dst_set(skb, dst);
+	skb->dev      = dst->dev;
+	skb->protocol = htons(ETH_P_IPV6);
+	return true;
+}
+
+static unsigned int
+tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+
+	if ((skb = skb_copy(skb, GFP_ATOMIC)) == NULL)
+		return XT_CONTINUE;
+
+#ifdef WITH_CONNTRACK
+	nf_conntrack_put(skb->nfct);
+	skb->nfct     = &tee_track.ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+	nf_conntrack_get(skb->nfct);
+#endif
+	if (par->hooknum == NF_INET_PRE_ROUTING ||
+	    par->hooknum == NF_INET_LOCAL_IN) {
+		struct ipv6hdr *iph = ipv6_hdr(skb);
+		--iph->hop_limit;
+	}
+	IP6CB(skb)->flags |= IPSKB_REROUTED;
+	if (tee_tg_route6(skb, info))
+		ip6_output(skb);
+
+	return XT_CONTINUE;
+}
+#endif /* WITH_IPV6 */
+
+static int tee_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+
+	/* 0.0.0.0 and :: not allowed */
+	return (memcmp(&info->gw, &tee_zero_address,
+	       sizeof(tee_zero_address)) == 0) ? -EINVAL : 0;
+}
+
+static struct xt_target tee_tg_reg[] __read_mostly = {
+	{
+		.name       = "TEE",
+		.revision   = 0,
+		.family     = NFPROTO_IPV4,
+		.target     = tee_tg4,
+		.targetsize = sizeof(struct xt_tee_tginfo),
+		.checkentry = tee_tg_check,
+		.me         = THIS_MODULE,
+	},
+#ifdef WITH_IPV6
+	{
+		.name       = "TEE",
+		.revision   = 0,
+		.family     = NFPROTO_IPV6,
+		.target     = tee_tg6,
+		.targetsize = sizeof(struct xt_tee_tginfo),
+		.checkentry = tee_tg_check,
+		.me         = THIS_MODULE,
+	},
+#endif
+};
+
+static int __init tee_tg_init(void)
+{
+#ifdef WITH_CONNTRACK
+	/*
+	 * Set up fake conntrack (stolen from raw.patch):
+	 * - to never be deleted, not in any hashes
+	 */
+	atomic_set(&tee_track.ct_general.use, 1);
+
+	/* - and look it like as a confirmed connection */
+	set_bit(IPS_CONFIRMED_BIT, &tee_track.status);
+
+	/* Initialize fake conntrack so that NAT will skip it */
+	tee_track.status |= IPS_NAT_DONE_MASK;
+#endif
+	return xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+}
+
+static void __exit tee_tg_exit(void)
+{
+	xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+}
+
+module_init(tee_tg_init);
+module_exit(tee_tg_exit);
+MODULE_AUTHOR("Sebastian ClaÃŸen <sebastian.classen@freenet.ag>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: Reroute packet copy");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_TEE");
+MODULE_ALIAS("ip6t_TEE");
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 4/8] netfilter: xtables2: make ip_tables reentrant
  2010-04-13 12:37 nf-next: reentrancy, new modules Jan Engelhardt
                   ` (2 preceding siblings ...)
  2010-04-13 12:37 ` [PATCH 3/8] netfilter: xtables: inclusion of xt_TEE Jan Engelhardt
@ 2010-04-13 12:37 ` Jan Engelhardt
  2010-04-13 12:37 ` [PATCH 5/8] netfilter: xt_TEE: have cloned packet travel through Xtables too Jan Engelhardt
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 22+ messages in thread
From: Jan Engelhardt @ 2010-04-13 12:37 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel

Currently, the table traverser stores return addresses in the ruleset
itself (struct ip6t_entry->comefrom). This has a well-known drawback:
the jumpstack is overwritten on reentry, making it necessary for
targets to return absolute verdicts. Also, the ruleset (which might
be heavy memory-wise) needs to be replicated for each CPU that can
possibly invoke ip6t_do_table.

This patch decouples the jumpstack from struct ip6t_entry and instead
puts it into xt_table_info. Not being restricted by 'comefrom'
anymore, we can set up a stack as needed. By default, there is room
allocated for two entries into the traverser. The setting is
configurable at runtime through sysfs and will take effect when a
table is replaced by a new one.

arp_tables is not touched though, because there is just one/two
modules and further patches seek to collapse the table traverser
anyhow.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h |    7 +++
 net/ipv4/netfilter/arp_tables.c    |    6 ++-
 net/ipv4/netfilter/ip_tables.c     |   65 ++++++++++++++++--------------
 net/ipv6/netfilter/ip6_tables.c    |   56 ++++++++++----------------
 net/netfilter/x_tables.c           |   77 ++++++++++++++++++++++++++++++++++++
 5 files changed, 145 insertions(+), 66 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 26ced0c..50c8672 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -401,6 +401,13 @@ struct xt_table_info {
 	unsigned int hook_entry[NF_INET_NUMHOOKS];
 	unsigned int underflow[NF_INET_NUMHOOKS];
 
+	/*
+	 * Number of user chains. Since tables cannot have loops, at most
+	 * @stacksize jumps (number of user chains) can possibly be made.
+	 */
+	unsigned int stacksize;
+	unsigned int *stackptr;
+	void ***jumpstack;
 	/* ipt_entry tables: one per CPU */
 	/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
 	void *entries[1];
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e8e363d..07a6990 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -649,6 +649,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
 		if (ret != 0)
 			break;
 		++i;
+		if (strcmp(arpt_get_target(iter)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 	duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
 	if (ret != 0)
@@ -1774,8 +1777,7 @@ struct xt_table *arpt_register_table(struct net *net,
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
 	struct xt_table *new_table;
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 18c5b15..70900ec 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -321,8 +321,6 @@ ipt_do_table(struct sk_buff *skb,
 	     const struct net_device *out,
 	     struct xt_table *table)
 {
-#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
-
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct iphdr *ip;
 	bool hotdrop = false;
@@ -330,7 +328,8 @@ ipt_do_table(struct sk_buff *skb,
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
 	const void *table_base;
-	struct ipt_entry *e, *back;
+	struct ipt_entry *e, **jumpstack;
+	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
 	struct xt_match_param mtpar;
 	struct xt_target_param tgpar;
@@ -356,19 +355,23 @@ ipt_do_table(struct sk_buff *skb,
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 	xt_info_rdlock_bh();
 	private = table->private;
-	table_base = private->entries[smp_processor_id()];
+	cpu        = smp_processor_id();
+	table_base = private->entries[cpu];
+	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
+	stackptr   = &private->stackptr[cpu];
+	origptr    = *stackptr;
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
-	/* For return from builtin chain */
-	back = get_entry(table_base, private->underflow[hook]);
+	pr_devel("Entering %s(hook %u); sp at %u (UF %p)\n",
+		 table->name, hook, origptr,
+		 get_entry(table_base, private->underflow[hook]));
 
 	do {
 		const struct ipt_entry_target *t;
 		const struct xt_entry_match *ematch;
 
 		IP_NF_ASSERT(e);
-		IP_NF_ASSERT(back);
 		if (!ip_packet_match(ip, indev, outdev,
 		    &e->ip, mtpar.fragoff)) {
  no_match:
@@ -403,17 +406,28 @@ ipt_do_table(struct sk_buff *skb,
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
-				e = back;
-				back = get_entry(table_base, back->comefrom);
+				if (*stackptr == 0) {
+					e = get_entry(table_base,
+					    private->underflow[hook]);
+					pr_devel("Underflow (this is normal) "
+						 "to %p\n", e);
+				} else {
+					e = jumpstack[--*stackptr];
+					pr_devel("Pulled %p out from pos %u\n",
+						 e, *stackptr);
+					e = ipt_next_entry(e);
+				}
 				continue;
 			}
 			if (table_base + v != ipt_next_entry(e) &&
 			    !(e->ip.flags & IPT_F_GOTO)) {
-				/* Save old back ptr in next entry */
-				struct ipt_entry *next = ipt_next_entry(e);
-				next->comefrom = (void *)back - table_base;
-				/* set back pointer to next entry */
-				back = next;
+				if (*stackptr >= private->stacksize) {
+					verdict = NF_DROP;
+					break;
+				}
+				jumpstack[(*stackptr)++] = e;
+				pr_devel("Pushed %p into pos %u\n",
+					 e, *stackptr - 1);
 			}
 
 			e = get_entry(table_base, v);
@@ -426,18 +440,7 @@ ipt_do_table(struct sk_buff *skb,
 		tgpar.targinfo = t->data;
 
 
-#ifdef CONFIG_NETFILTER_DEBUG
-		tb_comefrom = 0xeeeeeeec;
-#endif
 		verdict = t->u.kernel.target->target(skb, &tgpar);
-#ifdef CONFIG_NETFILTER_DEBUG
-		if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
-			printk("Target %s reentered!\n",
-			       t->u.kernel.target->name);
-			verdict = NF_DROP;
-		}
-		tb_comefrom = 0x57acc001;
-#endif
 		/* Target might have changed stuff. */
 		ip = ip_hdr(skb);
 		if (verdict == IPT_CONTINUE)
@@ -447,7 +450,9 @@ ipt_do_table(struct sk_buff *skb,
 			break;
 	} while (!hotdrop);
 	xt_info_rdunlock_bh();
-
+	pr_devel("Exiting %s; resetting sp from %u to %u\n",
+		 __func__, *stackptr, origptr);
+	*stackptr = origptr;
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
@@ -455,8 +460,6 @@ ipt_do_table(struct sk_buff *skb,
 		return NF_DROP;
 	else return verdict;
 #endif
-
-#undef tb_comefrom
 }
 
 /* Figures out from what hook each rule can be called: returns 0 if
@@ -838,6 +841,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		if (ret != 0)
 			return ret;
 		++i;
+		if (strcmp(ipt_get_target(iter)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 
 	if (i != repl->num_entries) {
@@ -2086,8 +2092,7 @@ struct xt_table *ipt_register_table(struct net *net,
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
 	struct xt_table *new_table;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f2b815e..2a2770b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -351,15 +351,14 @@ ip6t_do_table(struct sk_buff *skb,
 	      const struct net_device *out,
 	      struct xt_table *table)
 {
-#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom
-
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	bool hotdrop = false;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
 	const void *table_base;
-	struct ip6t_entry *e, *back;
+	struct ip6t_entry *e, **jumpstack;
+	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
 	struct xt_match_param mtpar;
 	struct xt_target_param tgpar;
@@ -383,19 +382,19 @@ ip6t_do_table(struct sk_buff *skb,
 
 	xt_info_rdlock_bh();
 	private = table->private;
-	table_base = private->entries[smp_processor_id()];
+	cpu        = smp_processor_id();
+	table_base = private->entries[cpu];
+	jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu];
+	stackptr   = &private->stackptr[cpu];
+	origptr    = *stackptr;
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
-	/* For return from builtin chain */
-	back = get_entry(table_base, private->underflow[hook]);
-
 	do {
 		const struct ip6t_entry_target *t;
 		const struct xt_entry_match *ematch;
 
 		IP_NF_ASSERT(e);
-		IP_NF_ASSERT(back);
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
 		    &mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
  no_match:
@@ -432,17 +431,20 @@ ip6t_do_table(struct sk_buff *skb,
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
-				e = back;
-				back = get_entry(table_base, back->comefrom);
+				if (*stackptr == 0)
+					e = get_entry(table_base,
+					    private->underflow[hook]);
+				else
+					e = ip6t_next_entry(jumpstack[--*stackptr]);
 				continue;
 			}
 			if (table_base + v != ip6t_next_entry(e) &&
 			    !(e->ipv6.flags & IP6T_F_GOTO)) {
-				/* Save old back ptr in next entry */
-				struct ip6t_entry *next = ip6t_next_entry(e);
-				next->comefrom = (void *)back - table_base;
-				/* set back pointer to next entry */
-				back = next;
+				if (*stackptr >= private->stacksize) {
+					verdict = NF_DROP;
+					break;
+				}
+				jumpstack[(*stackptr)++] = e;
 			}
 
 			e = get_entry(table_base, v);
@@ -454,19 +456,7 @@ ip6t_do_table(struct sk_buff *skb,
 		tgpar.target   = t->u.kernel.target;
 		tgpar.targinfo = t->data;
 
-#ifdef CONFIG_NETFILTER_DEBUG
-		tb_comefrom = 0xeeeeeeec;
-#endif
 		verdict = t->u.kernel.target->target(skb, &tgpar);
-
-#ifdef CONFIG_NETFILTER_DEBUG
-		if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) {
-			printk("Target %s reentered!\n",
-			       t->u.kernel.target->name);
-			verdict = NF_DROP;
-		}
-		tb_comefrom = 0x57acc001;
-#endif
 		if (verdict == IP6T_CONTINUE)
 			e = ip6t_next_entry(e);
 		else
@@ -474,10 +464,8 @@ ip6t_do_table(struct sk_buff *skb,
 			break;
 	} while (!hotdrop);
 
-#ifdef CONFIG_NETFILTER_DEBUG
-	tb_comefrom = NETFILTER_LINK_POISON;
-#endif
 	xt_info_rdunlock_bh();
+	*stackptr = origptr;
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
@@ -486,8 +474,6 @@ ip6t_do_table(struct sk_buff *skb,
 		return NF_DROP;
 	else return verdict;
 #endif
-
-#undef tb_comefrom
 }
 
 /* Figures out from what hook each rule can be called: returns 0 if
@@ -869,6 +855,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		if (ret != 0)
 			return ret;
 		++i;
+		if (strcmp(ip6t_get_target(iter)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 
 	if (i != repl->num_entries) {
@@ -2120,8 +2109,7 @@ struct xt_table *ip6t_register_table(struct net *net,
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
 	struct xt_table *new_table;
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8e23d8f..edde5c6 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -62,6 +62,9 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
 	[NFPROTO_IPV6]   = "ip6",
 };
 
+/* Allow this many total (re)entries. */
+static const unsigned int xt_jumpstack_multiplier = 2;
+
 /* Registration hooks for targets. */
 int
 xt_register_target(struct xt_target *target)
@@ -680,6 +683,26 @@ void xt_free_table_info(struct xt_table_info *info)
 		else
 			vfree(info->entries[cpu]);
 	}
+
+	if (info->jumpstack != NULL) {
+		if (sizeof(void *) * info->stacksize > PAGE_SIZE) {
+			for_each_possible_cpu(cpu)
+				vfree(info->jumpstack[cpu]);
+		} else {
+			for_each_possible_cpu(cpu)
+				kfree(info->jumpstack[cpu]);
+		}
+	}
+
+	if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE)
+		vfree(info->jumpstack);
+	else
+		kfree(info->jumpstack);
+	if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE)
+		vfree(info->stackptr);
+	else
+		kfree(info->stackptr);
+
 	kfree(info);
 }
 EXPORT_SYMBOL(xt_free_table_info);
@@ -724,6 +747,49 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock);
 DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
 EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
 
+static int xt_jumpstack_alloc(struct xt_table_info *i)
+{
+	unsigned int size;
+	int cpu;
+
+	size = sizeof(unsigned int) * nr_cpu_ids;
+	if (size > PAGE_SIZE)
+		i->stackptr = vmalloc(size);
+	else
+		i->stackptr = kmalloc(size, GFP_KERNEL);
+	if (i->stackptr == NULL)
+		return -ENOMEM;
+	memset(i->stackptr, 0, size);
+
+	size = sizeof(void **) * nr_cpu_ids;
+	if (size > PAGE_SIZE)
+		i->jumpstack = vmalloc(size);
+	else
+		i->jumpstack = kmalloc(size, GFP_KERNEL);
+	if (i->jumpstack == NULL)
+		return -ENOMEM;
+	memset(i->jumpstack, 0, size);
+
+	i->stacksize *= xt_jumpstack_multiplier;
+	size = sizeof(void *) * i->stacksize;
+	for_each_possible_cpu(cpu) {
+		if (size > PAGE_SIZE)
+			i->jumpstack[cpu] = vmalloc_node(size,
+				cpu_to_node(cpu));
+		else
+			i->jumpstack[cpu] = kmalloc_node(size,
+				GFP_KERNEL, cpu_to_node(cpu));
+		if (i->jumpstack[cpu] == NULL)
+			/*
+			 * Freeing will be done later on by the callers. The
+			 * chain is: xt_replace_table -> __do_replace ->
+			 * do_replace -> xt_free_table_info.
+			 */
+			return -ENOMEM;
+	}
+
+	return 0;
+}
 
 struct xt_table_info *
 xt_replace_table(struct xt_table *table,
@@ -732,6 +798,7 @@ xt_replace_table(struct xt_table *table,
 	      int *error)
 {
 	struct xt_table_info *private;
+	int ret;
 
 	/* Do the substitution. */
 	local_bh_disable();
@@ -746,6 +813,12 @@ xt_replace_table(struct xt_table *table,
 		return NULL;
 	}
 
+	ret = xt_jumpstack_alloc(newinfo);
+	if (ret < 0) {
+		*error = ret;
+		return NULL;
+	}
+
 	table->private = newinfo;
 	newinfo->initial_entries = private->initial_entries;
 
@@ -770,6 +843,10 @@ struct xt_table *xt_register_table(struct net *net,
 	struct xt_table_info *private;
 	struct xt_table *t, *table;
 
+	ret = xt_jumpstack_alloc(newinfo);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
 	/* Don't add one object to multiple lists. */
 	table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
 	if (!table) {
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 5/8] netfilter: xt_TEE: have cloned packet travel through Xtables too
  2010-04-13 12:37 nf-next: reentrancy, new modules Jan Engelhardt
                   ` (3 preceding siblings ...)
  2010-04-13 12:37 ` [PATCH 4/8] netfilter: xtables2: make ip_tables reentrant Jan Engelhardt
@ 2010-04-13 12:37 ` Jan Engelhardt
  2010-04-13 12:37 ` [PATCH 6/8] netfilter: xtables: remove old comments about reentrancy Jan Engelhardt
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 22+ messages in thread
From: Jan Engelhardt @ 2010-04-13 12:37 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel

Since Xtables is now reentrant/nestable, the cloned packet can also go
through Xtables and be subject to rules itself.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/ip_output.c   |    1 -
 net/ipv6/ip6_output.c  |    1 -
 net/netfilter/xt_TEE.c |   38 +++++++++++++++++---------------------
 3 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0abfdde..f09135e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -309,7 +309,6 @@ int ip_output(struct sk_buff *skb)
 			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
-EXPORT_SYMBOL_GPL(ip_output);
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 307d8bf..7e10f62 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -176,7 +176,6 @@ int ip6_output(struct sk_buff *skb)
 			    ip6_finish_output,
 			    !(IP6CB(skb)->flags & IPSKB_REROUTED));
 }
-EXPORT_SYMBOL_GPL(ip6_output);
 
 /*
  *	xmit an sk_buff (used by TCP)
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 79c5bf9..8ff8db4 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -12,6 +12,7 @@
  */
 #include <linux/ip.h>
 #include <linux/module.h>
+#include <linux/percpu.h>
 #include <linux/route.h>
 #include <linux/skbuff.h>
 #include <net/checksum.h>
@@ -33,6 +34,7 @@ static struct nf_conn tee_track;
 #endif
 
 static const union nf_inet_addr tee_zero_address;
+static DEFINE_PER_CPU(bool, tee_active);
 
 static bool
 tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
@@ -63,13 +65,15 @@ tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 	const struct xt_tee_tginfo *info = par->targinfo;
 	struct iphdr *iph;
 
+	if (percpu_read(tee_active))
+		return XT_CONTINUE;
 	/*
 	 * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
 	 * the original skb, which should continue on its way as if nothing has
 	 * happened. The copy should be independently delivered to the TEE
 	 * --gateway.
 	 */
-	skb = skb_copy(skb, GFP_ATOMIC);
+	skb = pskb_copy(skb, GFP_ATOMIC);
 	if (skb == NULL)
 		return XT_CONTINUE;
 
@@ -96,22 +100,11 @@ tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 		--iph->ttl;
 	ip_send_check(iph);
 
-	/*
-	 * Xtables is not reentrant currently, so a choice has to be made:
-	 * 1. return absolute verdict for the original and let the cloned
-	 *    packet travel through the chains
-	 * 2. let the original continue travelling and not pass the clone
-	 *    to Xtables.
-	 * #2 is chosen. Normally, we would use ip_local_out for the clone.
-	 * Because iph->check is already correct and we don't pass it to
-	 * Xtables anyway, a shortcut to dst_output [forwards to ip_output] can
-	 * be taken. %IPSKB_REROUTED needs to be set so that ip_output does not
-	 * invoke POSTROUTING on the cloned packet.
-	 */
-	IPCB(skb)->flags |= IPSKB_REROUTED;
-	if (tee_tg_route4(skb, info))
-		ip_output(skb);
-
+	if (tee_tg_route4(skb, info)) {
+		percpu_write(tee_active, true);
+		ip_local_out(skb);
+		percpu_write(tee_active, false);
+	}
 	return XT_CONTINUE;
 }
 
@@ -145,6 +138,8 @@ tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	const struct xt_tee_tginfo *info = par->targinfo;
 
+	if (percpu_read(tee_active))
+		return XT_CONTINUE;
 	if ((skb = skb_copy(skb, GFP_ATOMIC)) == NULL)
 		return XT_CONTINUE;
 
@@ -159,10 +154,11 @@ tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 		--iph->hop_limit;
 	}
-	IP6CB(skb)->flags |= IPSKB_REROUTED;
-	if (tee_tg_route6(skb, info))
-		ip6_output(skb);
-
+	if (tee_tg_route6(skb, info)) {
+		percpu_write(tee_active, true);
+		ip6_local_out(skb);
+		percpu_write(tee_active, false);
+	}
 	return XT_CONTINUE;
 }
 #endif /* WITH_IPV6 */
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 6/8] netfilter: xtables: remove old comments about reentrancy
  2010-04-13 12:37 nf-next: reentrancy, new modules Jan Engelhardt
                   ` (4 preceding siblings ...)
  2010-04-13 12:37 ` [PATCH 5/8] netfilter: xt_TEE: have cloned packet travel through Xtables too Jan Engelhardt
@ 2010-04-13 12:37 ` Jan Engelhardt
  2010-04-13 12:37 ` [PATCH 7/8] netfilter: xtables: inclusion of xt_SYSRQ Jan Engelhardt
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 22+ messages in thread
From: Jan Engelhardt @ 2010-04-13 12:37 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/ip_tables.c   |    2 --
 net/ipv4/netfilter/ipt_REJECT.c  |    3 ---
 net/ipv6/netfilter/ip6_tables.c  |    2 --
 net/ipv6/netfilter/ip6t_REJECT.c |    3 ---
 4 files changed, 0 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 70900ec..bb5e0d9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -434,8 +434,6 @@ ipt_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		/* Targets which reenter must return
-		   abs. verdicts */
 		tgpar.target   = t->u.kernel.target;
 		tgpar.targinfo = t->data;
 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index b026014..038fa0b 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -139,9 +139,6 @@ reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	const struct ipt_reject_info *reject = par->targinfo;
 
-	/* WARNING: This code causes reentry within iptables.
-	   This means that the iptables jump stack is now crap.  We
-	   must return an absolute verdict. --RR */
 	switch (reject->with) {
 	case IPT_ICMP_NET_UNREACHABLE:
 		send_unreach(skb, ICMP_NET_UNREACH);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 2a2770b..7afa117 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -451,8 +451,6 @@ ip6t_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		/* Targets which reenter must return
-		   abs. verdicts */
 		tgpar.target   = t->u.kernel.target;
 		tgpar.targinfo = t->data;
 
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 55b9b2d..dad9762 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -179,9 +179,6 @@ reject_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
 
 	pr_debug("%s: medium point\n", __func__);
-	/* WARNING: This code causes reentry within ip6tables.
-	   This means that the ip6tables jump stack is now crap.  We
-	   must return an absolute verdict. --RR */
 	switch (reject->with) {
 	case IP6T_ICMP6_NO_ROUTE:
 		send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum);
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 7/8] netfilter: xtables: inclusion of xt_SYSRQ
  2010-04-13 12:37 nf-next: reentrancy, new modules Jan Engelhardt
                   ` (5 preceding siblings ...)
  2010-04-13 12:37 ` [PATCH 6/8] netfilter: xtables: remove old comments about reentrancy Jan Engelhardt
@ 2010-04-13 12:37 ` Jan Engelhardt
  2010-04-13 12:37 ` [PATCH 8/8] netfilter: xtables: inclusion of xt_condition Jan Engelhardt
  2010-04-13 13:26 ` nf-next: reentrancy, new modules Patrick McHardy
  8 siblings, 0 replies; 22+ messages in thread
From: Jan Engelhardt @ 2010-04-13 12:37 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel

The SYSRQ target will allow to remotely invoke sysrq on the local
machine. Authentication is by means of a pre-shared key that can
either be transmitted plaintext or digest-secured.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/Kconfig    |   12 ++
 net/netfilter/Makefile   |    1 +
 net/netfilter/xt_SYSRQ.c |  354 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 367 insertions(+), 0 deletions(-)
 create mode 100644 net/netfilter/xt_SYSRQ.c

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 673a6c8..bfd9b6f 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -502,6 +502,18 @@ config NETFILTER_XT_TARGET_RATEEST
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_SYSRQ
+	tristate '"SYSRQ" - remote sysrq invocation'
+	depends on NETFILTER_ADVANCED
+	---help---
+	This option enables the "SYSRQ" target which can be used to trigger
+	sysrq from a remote machine using a magic UDP packet with a pre-shared
+	password. This is useful when the receiving host has locked up in an
+	Oops yet still can process incoming packets.
+
+	Besides plaintext packets, digest-secured SYSRQ requests will be
+	supported when CONFIG_CRYPTO is enabled.
+
 config NETFILTER_XT_TARGET_TEE
 	tristate '"TEE" - packet cloning to alternate destiantion'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 14e3a8f..f032195 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_SYSRQ) += xt_SYSRQ.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
diff --git a/net/netfilter/xt_SYSRQ.c b/net/netfilter/xt_SYSRQ.c
new file mode 100644
index 0000000..929b204
--- /dev/null
+++ b/net/netfilter/xt_SYSRQ.c
@@ -0,0 +1,354 @@
+/*
+ *	"SYSRQ" target extension for Netfilter
+ *	Copyright Â© Jan Engelhardt <jengelh [at] medozas de>, 2008 - 2010
+ *
+ *	Based upon the ipt_SYSRQ idea by Marek Zalem <marek [at] terminus sk>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 or later as published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/sysrq.h>
+#include <linux/udp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <net/ip.h>
+
+#if defined(CONFIG_CRYPTO) || defined(CRYPTO_CONFIG_MODULE)
+#	define WITH_CRYPTO 1
+#endif
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#	define WITH_IPV6 1
+#endif
+
+static bool sysrq_once;
+static char sysrq_password[64];
+static char sysrq_hash[16] = "sha1";
+static long sysrq_seqno;
+static int sysrq_debug;
+module_param_string(password, sysrq_password, sizeof(sysrq_password),
+	S_IRUSR | S_IWUSR);
+module_param_string(hash, sysrq_hash, sizeof(sysrq_hash), S_IRUSR);
+module_param_named(seqno, sysrq_seqno, long, S_IRUSR | S_IWUSR);
+module_param_named(debug, sysrq_debug, int, S_IRUSR | S_IWUSR);
+MODULE_PARM_DESC(password, "password for remote sysrq");
+MODULE_PARM_DESC(hash, "hash algorithm, default sha1");
+MODULE_PARM_DESC(seqno, "sequence number for remote sysrq");
+MODULE_PARM_DESC(debug, "debugging: 0=off, 1=on");
+
+#ifdef WITH_CRYPTO
+static struct crypto_hash *sysrq_tfm;
+static int sysrq_digest_size;
+static unsigned char *sysrq_digest_password;
+static unsigned char *sysrq_digest;
+static char *sysrq_hexdigest;
+
+/*
+ * The data is of the form "<requests>,<seqno>,<salt>,<hash>" where <requests>
+ * is a series of sysrq requests; <seqno> is a sequence number that must be
+ * greater than the last sequence number; <salt> is some random bytes; and
+ * <hash> is the hash of everything up to and including the preceding ","
+ * together with the password.
+ *
+ * For example
+ *
+ *   salt=$RANDOM
+ *   req="s,$(date +%s),$salt"
+ *   echo "$req,$(echo -n $req,secret | sha1sum | cut -c1-40)"
+ *
+ * You will want a better salt and password than that though :-)
+ */
+static unsigned int sysrq_tg(const void *pdata, uint16_t len)
+{
+	const char *data = pdata;
+	int i, n;
+	struct scatterlist sg[2];
+	struct hash_desc desc;
+	int ret;
+	long new_seqno = 0;
+
+	if (*sysrq_password == '\0') {
+		if (!sysrq_once)
+			pr_info("No password set\n");
+		sysrq_once = true;
+		return NF_DROP;
+	}
+	if (len == 0)
+		return NF_DROP;
+
+	for (i = 0; sysrq_password[i] != '\0' &&
+	     sysrq_password[i] != '\n'; ++i)
+		/* loop */;
+	sysrq_password[i] = '\0';
+
+	i = 0;
+	for (n = 0; n < len - 1; ++n) {
+		if (i == 1 && '0' <= data[n] && data[n] <= '9')
+			new_seqno = 10L * new_seqno + data[n] - '0';
+		if (data[n] == ',' && ++i == 3)
+			break;
+	}
+	++n;
+	if (i != 3) {
+		if (sysrq_debug)
+			pr_info("badly formatted request\n");
+		return NF_DROP;
+	}
+	if (sysrq_seqno >= new_seqno) {
+		if (sysrq_debug)
+			pr_info("old sequence number ignored\n");
+		return NF_DROP;
+	}
+
+	desc.tfm   = sysrq_tfm;
+	desc.flags = 0;
+	ret = crypto_hash_init(&desc);
+	if (ret != 0)
+		goto hash_fail;
+	sg_init_table(sg, 2);
+	sg_set_buf(&sg[0], data, n);
+	strcpy(sysrq_digest_password, sysrq_password);
+	i = strlen(sysrq_digest_password);
+	sg_set_buf(&sg[1], sysrq_digest_password, i);
+	ret = crypto_hash_digest(&desc, sg, n + i, sysrq_digest);
+	if (ret != 0)
+		goto hash_fail;
+
+	for (i = 0; i < sysrq_digest_size; ++i) {
+		sysrq_hexdigest[2*i] =
+			"0123456789abcdef"[(sysrq_digest[i] >> 4) & 0xf];
+		sysrq_hexdigest[2*i+1] =
+			"0123456789abcdef"[sysrq_digest[i] & 0xf];
+	}
+	sysrq_hexdigest[2*sysrq_digest_size] = '\0';
+	if (len - n < sysrq_digest_size) {
+		if (sysrq_debug)
+			pr_info("Short digest, expected %s\n",
+				sysrq_hexdigest);
+		return NF_DROP;
+	}
+	if (strncmp(data + n, sysrq_hexdigest, sysrq_digest_size) != 0) {
+		if (sysrq_debug)
+			pr_info("Bad digest, expected %s\n", sysrq_hexdigest);
+		return NF_DROP;
+	}
+
+	/* Now we trust the requester */
+	sysrq_seqno = new_seqno;
+	for (i = 0; i < len && data[i] != ','; ++i) {
+		pr_info("SysRq %c\n", data[i]);
+		handle_sysrq(data[i], NULL);
+	}
+	return NF_ACCEPT;
+
+ hash_fail:
+	pr_warning("digest failure\n");
+	return NF_DROP;
+}
+#else
+static unsigned int sysrq_tg(const void *pdata, uint16_t len)
+{
+	const char *data = pdata;
+	char c;
+
+	if (*sysrq_password == '\0') {
+		if (!sysrq_once)
+			pr_info("No password set\n");
+		sysrq_once = true;
+		return NF_DROP;
+	}
+
+	if (len == 0)
+		return NF_DROP;
+
+	c = *data;
+	if (strncmp(&data[1], sysrq_password, len - 1) != 0) {
+		pr_warning("Failed attempt - password mismatch\n");
+		return NF_DROP;
+	}
+
+	handle_sysrq(c, NULL);
+	return NF_ACCEPT;
+}
+#endif
+
+static unsigned int
+sysrq_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct iphdr *iph;
+	const struct udphdr *udph;
+	uint16_t len;
+
+	if (skb_linearize(skb) < 0)
+		return NF_DROP;
+
+	iph = ip_hdr(skb);
+	if (iph->protocol != IPPROTO_UDP && iph->protocol != IPPROTO_UDPLITE)
+		return NF_DROP;
+
+	udph = (const void *)iph + ip_hdrlen(skb);
+	len  = ntohs(udph->len) - sizeof(struct udphdr);
+
+	if (sysrq_debug)
+		pr_info(": %pI4:%u -> :%u len=%u\n", &iph->saddr,
+			htons(udph->source), htons(udph->dest), len);
+	return sysrq_tg((void *)udph + sizeof(struct udphdr), len);
+}
+
+#ifdef WITH_IPV6
+static unsigned int
+sysrq_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct ipv6hdr *iph;
+	const struct udphdr *udph;
+	unsigned short frag_off;
+	unsigned int th_off;
+	uint16_t len;
+
+	if (skb_linearize(skb) < 0)
+		return NF_DROP;
+
+	iph = ipv6_hdr(skb);
+	if (ipv6_find_hdr(skb, &th_off, IPPROTO_UDP, &frag_off) < 0 ||
+	    frag_off > 0)
+		return NF_ACCEPT; /* sink it */
+
+	udph = (const void *)iph + th_off;
+	len  = ntohs(udph->len) - sizeof(struct udphdr);
+
+	if (sysrq_debug)
+		pr_info("%pI6:%hu -> :%hu len=%u\n", &iph->saddr,
+			ntohs(udph->source), ntohs(udph->dest), len);
+	return sysrq_tg(udph + sizeof(struct udphdr), len);
+}
+#endif
+
+static int sysrq_tg_check(const struct xt_tgchk_param *par)
+{
+	if (par->target->family == NFPROTO_IPV4) {
+		const struct ipt_entry *entry = par->entryinfo;
+
+		if ((entry->ip.proto != IPPROTO_UDP &&
+		    entry->ip.proto != IPPROTO_UDPLITE) ||
+		    entry->ip.invflags & XT_INV_PROTO)
+			goto out;
+	} else if (par->target->family == NFPROTO_IPV6) {
+		const struct ip6t_entry *entry = par->entryinfo;
+
+		if ((entry->ipv6.proto != IPPROTO_UDP &&
+		    entry->ipv6.proto != IPPROTO_UDPLITE) ||
+		    entry->ipv6.invflags & XT_INV_PROTO)
+			goto out;
+	}
+
+	return true;
+
+ out:
+	pr_info("only available for UDP and UDP-Lite");
+	return false;
+}
+
+static struct xt_target sysrq_tg_reg[] __read_mostly = {
+	{
+		.name       = "SYSRQ",
+		.revision   = 1,
+		.family     = NFPROTO_IPV4,
+		.target     = sysrq_tg4,
+		.checkentry = sysrq_tg_check,
+		.me         = THIS_MODULE,
+	},
+#ifdef WITH_IPV6
+	{
+		.name       = "SYSRQ",
+		.revision   = 1,
+		.family     = NFPROTO_IPV6,
+		.target     = sysrq_tg6,
+		.checkentry = sysrq_tg_check,
+		.me         = THIS_MODULE,
+	},
+#endif
+};
+
+static void sysrq_crypto_exit(void)
+{
+#ifdef WITH_CRYPTO
+	if (sysrq_tfm)
+		crypto_free_hash(sysrq_tfm);
+	if (sysrq_digest)
+		kfree(sysrq_digest);
+	if (sysrq_hexdigest)
+		kfree(sysrq_hexdigest);
+	if (sysrq_digest_password)
+		kfree(sysrq_digest_password);
+#endif
+}
+
+static int __init sysrq_crypto_init(void)
+{
+#if defined(WITH_CRYPTO)
+	struct timeval now;
+	int ret;
+
+	sysrq_tfm = crypto_alloc_hash(sysrq_hash, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(sysrq_tfm)) {
+		pr_err("Could not find or load %s hash\n", sysrq_hash);
+		sysrq_tfm = NULL;
+		ret = PTR_ERR(sysrq_tfm);
+		goto fail;
+	}
+	sysrq_digest_size = crypto_hash_digestsize(sysrq_tfm);
+	sysrq_digest = kmalloc(sysrq_digest_size, GFP_KERNEL);
+	ret = -ENOMEM;
+	if (sysrq_digest == NULL)
+		goto fail;
+	sysrq_hexdigest = kmalloc(2 * sysrq_digest_size + 1, GFP_KERNEL);
+	if (sysrq_hexdigest == NULL)
+		goto fail;
+	sysrq_digest_password = kmalloc(sizeof(sysrq_password), GFP_KERNEL);
+	if (sysrq_digest_password == NULL)
+		goto fail;
+	do_gettimeofday(&now);
+	sysrq_seqno = now.tv_sec;
+	ret = xt_register_targets(sysrq_tg_reg, ARRAY_SIZE(sysrq_tg_reg));
+	if (ret < 0)
+		goto fail;
+	return ret;
+
+ fail:
+	sysrq_crypto_exit();
+	return ret;
+#else
+	pr_info("compiled without crypto\n");
+#endif
+	return -EINVAL;
+}
+
+static int __init sysrq_tg_init(void)
+{
+	if (sysrq_crypto_init() < 0)
+		pr_info("starting without crypto\n");
+	return xt_register_targets(sysrq_tg_reg, ARRAY_SIZE(sysrq_tg_reg));
+}
+
+static void __exit sysrq_tg_exit(void)
+{
+	sysrq_crypto_exit();
+	xt_unregister_targets(sysrq_tg_reg, ARRAY_SIZE(sysrq_tg_reg));
+}
+
+module_init(sysrq_tg_init);
+module_exit(sysrq_tg_exit);
+MODULE_DESCRIPTION("Xtables: triggering SYSRQ remotely");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_SYSRQ");
+MODULE_ALIAS("ip6t_SYSRQ");
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 8/8] netfilter: xtables: inclusion of xt_condition
  2010-04-13 12:37 nf-next: reentrancy, new modules Jan Engelhardt
                   ` (6 preceding siblings ...)
  2010-04-13 12:37 ` [PATCH 7/8] netfilter: xtables: inclusion of xt_SYSRQ Jan Engelhardt
@ 2010-04-13 12:37 ` Jan Engelhardt
  2010-04-13 13:26 ` nf-next: reentrancy, new modules Patrick McHardy
  8 siblings, 0 replies; 22+ messages in thread
From: Jan Engelhardt @ 2010-04-13 12:37 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel

xt_condition can be used by userspace to influence decisions in rules
by means of togglable variables without having to reload the entire
ruleset.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/Kbuild         |    1 +
 include/linux/netfilter/xt_condition.h |   14 ++
 net/netfilter/Kconfig                  |    8 +
 net/netfilter/Makefile                 |    1 +
 net/netfilter/xt_condition.c           |  243 ++++++++++++++++++++++++++++++++
 5 files changed, 267 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netfilter/xt_condition.h
 create mode 100644 net/netfilter/xt_condition.c

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 48767cd..6b67603 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -19,6 +19,7 @@ header-y += xt_TCPOPTSTRIP.h
 header-y += xt_TEE.h
 header-y += xt_TPROXY.h
 header-y += xt_comment.h
+header-y += xt_condition.h
 header-y += xt_connbytes.h
 header-y += xt_connlimit.h
 header-y += xt_connmark.h
diff --git a/include/linux/netfilter/xt_condition.h b/include/linux/netfilter/xt_condition.h
new file mode 100644
index 0000000..4faf3ca
--- /dev/null
+++ b/include/linux/netfilter/xt_condition.h
@@ -0,0 +1,14 @@
+#ifndef _XT_CONDITION_H
+#define _XT_CONDITION_H
+
+#include <linux/types.h>
+
+struct xt_condition_mtinfo {
+	char name[31];
+	__u8 invert;
+
+	/* Used internally by the kernel */
+	void *condvar __attribute__((aligned(8)));
+};
+
+#endif /* _XT_CONDITION_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index bfd9b6f..dd74e7d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -624,6 +624,14 @@ config NETFILTER_XT_MATCH_COMMENT
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_CONDITION
+	tristate '"condition" match support'
+	depends on NETFILTER_ADVANCED
+	depends on PROC_FS
+	---help---
+	This option allows you to match firewall rules against condition
+	variables stored in the /proc/net/nf_condition directory.
+
 config NETFILTER_XT_MATCH_CONNBYTES
 	tristate  '"connbytes" per-connection counter match support'
 	depends on NF_CONNTRACK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index f032195..e75d5fa 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -66,6 +66,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
 # matches
 obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_CONDITION) += xt_condition.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
diff --git a/net/netfilter/xt_condition.c b/net/netfilter/xt_condition.c
new file mode 100644
index 0000000..d3dcaa4
--- /dev/null
+++ b/net/netfilter/xt_condition.c
@@ -0,0 +1,243 @@
+/*
+ *	"condition" match extension for Xtables
+ *
+ *	Description: This module allows firewall rules to match using
+ *	condition variables available through procfs.
+ *
+ *	Authors:
+ *	Stephane Ouellette <ouellettes [at] videotron ca>, 2002-10-22
+ *	Massimiliano Hofer <max [at] nucleus it>, 2006-05-15
+ *
+ *	This program is free software; you can redistribute it and/or modify it
+ *	under the terms of the GNU General Public License; either version 2
+ *	or 3 of the License, as published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/version.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_condition.h>
+#include <asm/uaccess.h>
+
+/* Defaults, these can be overridden on the module command-line. */
+static unsigned int condition_list_perms = S_IRUSR | S_IWUSR;
+static unsigned int condition_uid_perms;
+static unsigned int condition_gid_perms;
+
+MODULE_AUTHOR("Stephane Ouellette <ouellettes@videotron.ca>");
+MODULE_AUTHOR("Massimiliano Hofer <max@nucleus.it>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Allows rules to match against condition variables");
+MODULE_LICENSE("GPL");
+module_param(condition_list_perms, uint, S_IRUSR | S_IWUSR);
+MODULE_PARM_DESC(condition_list_perms, "default permissions on /proc/net/nf_condition/* files");
+module_param(condition_uid_perms, uint, S_IRUSR | S_IWUSR);
+MODULE_PARM_DESC(condition_uid_perms, "default user owner of /proc/net/nf_condition/* files");
+module_param(condition_gid_perms, uint, S_IRUSR | S_IWUSR);
+MODULE_PARM_DESC(condition_gid_perms, "default group owner of /proc/net/nf_condition/* files");
+MODULE_ALIAS("ipt_condition");
+MODULE_ALIAS("ip6t_condition");
+
+struct condition_variable {
+	struct list_head list;
+	struct proc_dir_entry *status_proc;
+	unsigned int refcount;
+	bool enabled;
+};
+
+/* proc_lock is a user context only semaphore used for write access */
+/*           to the conditions' list.                               */
+static struct mutex proc_lock;
+
+static LIST_HEAD(conditions_list);
+static struct proc_dir_entry *proc_net_condition;
+
+static int condition_proc_read(char __user *buffer, char **start, off_t offset,
+			       int length, int *eof, void *data)
+{
+	const struct condition_variable *var = data;
+
+	buffer[0] = var->enabled ? '1' : '0';
+	buffer[1] = '\n';
+	if (length >= 2)
+		*eof = true;
+	return 2;
+}
+
+static int condition_proc_write(struct file *file, const char __user *buffer,
+				unsigned long length, void *data)
+{
+	struct condition_variable *var = data;
+	char newval;
+
+	if (length > 0) {
+		if (get_user(newval, buffer) != 0)
+			return -EFAULT;
+		/* Match only on the first character */
+		switch (newval) {
+		case '0':
+			var->enabled = false;
+			break;
+		case '1':
+			var->enabled = true;
+			break;
+		}
+	}
+	return length;
+}
+
+static bool
+condition_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+	const struct xt_condition_mtinfo *info = par->matchinfo;
+	const struct condition_variable *var   = info->condvar;
+	bool x;
+
+	rcu_read_lock();
+	x = rcu_dereference(var->enabled);
+	rcu_read_unlock();
+
+	return x ^ info->invert;
+}
+
+static int condition_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_condition_mtinfo *info = par->matchinfo;
+	struct condition_variable *var;
+
+	/* Forbid certain names */
+	if (*info->name == '\0' || *info->name == '.' ||
+	    info->name[sizeof(info->name)-1] != '\0' ||
+	    memchr(info->name, '/', sizeof(info->name)) != NULL) {
+		pr_info("name not allowed or too long: \"%.*s\"\n",
+			(unsigned int)sizeof(info->name), info->name);
+		return -EINVAL;
+	}
+	/*
+	 * Let's acquire the lock, check for the condition and add it
+	 * or increase the reference counter.
+	 */
+	if (mutex_lock_interruptible(&proc_lock) != 0)
+		return -EINTR;
+
+	list_for_each_entry(var, &conditions_list, list) {
+		if (strcmp(info->name, var->status_proc->name) == 0) {
+			++var->refcount;
+			mutex_unlock(&proc_lock);
+			info->condvar = var;
+			return 0;
+		}
+	}
+
+	/* At this point, we need to allocate a new condition variable. */
+	var = kmalloc(sizeof(struct condition_variable), GFP_KERNEL);
+	if (var == NULL) {
+		mutex_unlock(&proc_lock);
+		return -ENOMEM;
+	}
+
+	/* Create the condition variable's proc file entry. */
+	var->status_proc = create_proc_entry(info->name, condition_list_perms,
+			   proc_net_condition);
+	if (var->status_proc == NULL) {
+		kfree(var);
+		mutex_unlock(&proc_lock);
+		return -ENOMEM;
+	}
+
+	var->refcount = 1;
+	var->enabled  = false;
+	var->status_proc->data = var;
+	wmb();
+	var->status_proc->read_proc  = condition_proc_read;
+	var->status_proc->write_proc = condition_proc_write;
+	list_add_rcu(&var->list, &conditions_list);
+	var->status_proc->uid = condition_uid_perms;
+	var->status_proc->gid = condition_gid_perms;
+	mutex_unlock(&proc_lock);
+	info->condvar = var;
+	return 0;
+}
+
+static void condition_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_condition_mtinfo *info = par->matchinfo;
+	struct condition_variable *var = info->condvar;
+
+	mutex_lock(&proc_lock);
+	if (--var->refcount == 0) {
+		list_del_rcu(&var->list);
+		remove_proc_entry(var->status_proc->name, proc_net_condition);
+		mutex_unlock(&proc_lock);
+		/*
+		 * synchronize_rcu() would be good enough, but
+		 * synchronize_net() guarantees that no packet
+		 * will go out with the old rule after
+		 * succesful removal.
+		 */
+		synchronize_net();
+		kfree(var);
+		return;
+	}
+	mutex_unlock(&proc_lock);
+}
+
+static struct xt_match condition_mt_reg __read_mostly = {
+	.name       = "condition",
+	.revision   = 1,
+	.family     = NFPROTO_UNSPEC,
+	.matchsize  = sizeof(struct xt_condition_mtinfo),
+	.match      = condition_mt,
+	.checkentry = condition_mt_check,
+	.destroy    = condition_mt_destroy,
+	.me         = THIS_MODULE,
+};
+
+static const char *const dir_name = "nf_condition";
+
+static int __net_init condnet_mt_init(struct net *net)
+{
+	int ret;
+
+	proc_net_condition = proc_mkdir(dir_name, net->proc_net);
+	if (proc_net_condition == NULL)
+		return -EACCES;
+
+	ret = xt_register_match(&condition_mt_reg);
+	if (ret < 0) {
+		remove_proc_entry(dir_name, net->proc_net);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void __net_exit condnet_mt_exit(struct net *net)
+{
+	xt_unregister_match(&condition_mt_reg);
+	remove_proc_entry(dir_name, net->proc_net);
+}
+
+static struct pernet_operations condition_mt_netops = {
+	.init = condnet_mt_init,
+	.exit = condnet_mt_exit,
+};
+
+static int __init condition_mt_init(void)
+{
+	mutex_init(&proc_lock);
+	return register_pernet_subsys(&condition_mt_netops);
+}
+
+static void __exit condition_mt_exit(void)
+{
+	unregister_pernet_subsys(&condition_mt_netops);
+}
+
+module_init(condition_mt_init);
+module_exit(condition_mt_exit);
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: nf-next: reentrancy, new modules
  2010-04-13 12:37 nf-next: reentrancy, new modules Jan Engelhardt
                   ` (7 preceding siblings ...)
  2010-04-13 12:37 ` [PATCH 8/8] netfilter: xtables: inclusion of xt_condition Jan Engelhardt
@ 2010-04-13 13:26 ` Patrick McHardy
  8 siblings, 0 replies; 22+ messages in thread
From: Patrick McHardy @ 2010-04-13 13:26 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: netfilter-devel

Jan Engelhardt wrote:
> per request, the series reposted with fixes applied. I also tacked
> sysrq and condition on.

One thing at a time. Generally, I don't want series that we were just
finished discussing extended but seperate submissions for new stuff.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/8] netfilter: ipv6: move POSTROUTING invocation before fragmentation
  2010-04-13 12:37 ` [PATCH 1/8] netfilter: ipv6: move POSTROUTING invocation before fragmentation Jan Engelhardt
@ 2010-04-13 13:30   ` Patrick McHardy
  0 siblings, 0 replies; 22+ messages in thread
From: Patrick McHardy @ 2010-04-13 13:30 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: netfilter-devel

Jan Engelhardt wrote:
> Patrick McHardy notes: "We used to invoke IPv4 POST_ROUTING after
> fragmentation as well just to defragment the packets in conntrack
> immediately afterwards, but that got changed during the
> netfilter-ipsec integration. Ideally IPv6 would behave like IPv4."
> 
> This patch makes it so. Sending an oversized frame (e.g. `ping6
> -s64000 -c1 ::1`) will now show up in POSTROUTING as a single skb
> rather than multiple ones.

Applied, thanks. It seems you could now change the IPv6 conntrack
LOCAL_OUT hook to skip defragmentation.


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/8] netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING invocation
  2010-04-13 12:37 ` [PATCH 2/8] netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING invocation Jan Engelhardt
@ 2010-04-13 13:33   ` Patrick McHardy
  2010-04-13 13:34     ` Jan Engelhardt
  0 siblings, 1 reply; 22+ messages in thread
From: Patrick McHardy @ 2010-04-13 13:33 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: netfilter-devel

[-- Attachment #1: Type: text/plain, Size: 1096 bytes --]

Jan Engelhardt wrote:
> Similar to how IPv4's ip_output.c works, have ip6_output also check
> the IPSKB_REROUTED flag. It will be set from xt_TEE for cloned packets
> since Xtables can currently only deal with a single packet in flight
> at a time.
> 
> Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
> Acked-by: David S. Miller <davem@davemloft.net>
> ---
>  net/ipv6/ip6_output.c |    5 +++--
>  1 files changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> index f314ba4..7e10f62 100644
> --- a/net/ipv6/ip6_output.c
> +++ b/net/ipv6/ip6_output.c
> @@ -172,8 +172,9 @@ int ip6_output(struct sk_buff *skb)
>  		return 0;
>  	}
>  
> -	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
> -		       ip6_finish_output);
> +	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
> +			    ip6_finish_output,
> +			    !(IP6CB(skb)->flags & IPSKB_REROUTED));

This needs to use an IP6SKB value to avoid clashes. I've fixed it up
as follows:

I'll also fix up the TEE patch to use the proper value for IPv6.

[-- Attachment #2: x --]
[-- Type: text/plain, Size: 1505 bytes --]

commit 9c6eb28aca52d562f3ffbaebaa56385df9972a43
Author: Jan Engelhardt <jengelh@medozas.de>
Date:   Tue Apr 13 15:32:16 2010 +0200

    netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING invocation
    
    Similar to how IPv4's ip_output.c works, have ip6_output also check
    the IPSKB_REROUTED flag. It will be set from xt_TEE for cloned packets
    since Xtables can currently only deal with a single packet in flight
    at a time.
    
    Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
    Acked-by: David S. Miller <davem@davemloft.net>
    [Patrick: changed to use an IP6SKB value instead of IPSKB]
    Signed-off-by: Patrick McHardy <kaber@trash.net>

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index e0cc9a7..7bdf6ff 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -250,6 +250,7 @@ struct inet6_skb_parm {
 
 #define IP6SKB_XFRM_TRANSFORMED	1
 #define IP6SKB_FORWARDED	2
+#define IP6SKB_REROUTED		4
 };
 
 #define IP6CB(skb)	((struct inet6_skb_parm*)((skb)->cb))
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 236ac78..c10a38a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -172,8 +172,9 @@ int ip6_output(struct sk_buff *skb)
 		return 0;
 	}
 
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
-		       ip6_finish_output);
+	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
+			    ip6_finish_output,
+			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
 /*

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/8] netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING invocation
  2010-04-13 13:33   ` Patrick McHardy
@ 2010-04-13 13:34     ` Jan Engelhardt
  2010-04-13 13:36       ` Patrick McHardy
  0 siblings, 1 reply; 22+ messages in thread
From: Jan Engelhardt @ 2010-04-13 13:34 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netfilter-devel


On Tuesday 2010-04-13 15:33, Patrick McHardy wrote:
>> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
>> index f314ba4..7e10f62 100644
>> --- a/net/ipv6/ip6_output.c
>> +++ b/net/ipv6/ip6_output.c
>> @@ -172,8 +172,9 @@ int ip6_output(struct sk_buff *skb)
>>  		return 0;
>>  	}
>>  
>> -	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
>> -		       ip6_finish_output);
>> +	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
>> +			    ip6_finish_output,
>> +			    !(IP6CB(skb)->flags & IPSKB_REROUTED));
>
>This needs to use an IP6SKB value to avoid clashes. I've fixed it up
>as follows:

Would merging the IPSKB and IP6SKB flags be worth it?

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/8] netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING invocation
  2010-04-13 13:34     ` Jan Engelhardt
@ 2010-04-13 13:36       ` Patrick McHardy
  0 siblings, 0 replies; 22+ messages in thread
From: Patrick McHardy @ 2010-04-13 13:36 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: netfilter-devel

Jan Engelhardt wrote:
> On Tuesday 2010-04-13 15:33, Patrick McHardy wrote:
>>> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
>>> index f314ba4..7e10f62 100644
>>> --- a/net/ipv6/ip6_output.c
>>> +++ b/net/ipv6/ip6_output.c
>>> @@ -172,8 +172,9 @@ int ip6_output(struct sk_buff *skb)
>>>  		return 0;
>>>  	}
>>>  
>>> -	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
>>> -		       ip6_finish_output);
>>> +	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
>>> +			    ip6_finish_output,
>>> +			    !(IP6CB(skb)->flags & IPSKB_REROUTED));
>> This needs to use an IP6SKB value to avoid clashes. I've fixed it up
>> as follows:
> 
> Would merging the IPSKB and IP6SKB flags be worth it?
> 

I don't think so, the CB is by definition private to each layer.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/8] netfilter: xtables: inclusion of xt_TEE
  2010-04-13 12:37 ` [PATCH 3/8] netfilter: xtables: inclusion of xt_TEE Jan Engelhardt
@ 2010-04-13 13:37   ` Patrick McHardy
  2010-04-13 13:45   ` Patrick McHardy
  1 sibling, 0 replies; 22+ messages in thread
From: Patrick McHardy @ 2010-04-13 13:37 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: netfilter-devel

Jan Engelhardt wrote:
> +++ b/include/linux/netfilter/xt_TEE.h
> @@ -0,0 +1,8 @@
> +#ifndef _XT_TEE_TARGET_H
> +#define _XT_TEE_TARGET_H
> +
> +struct xt_tee_tginfo {
> +	union nf_inet_addr gw;
> +};

Mhh, quoting what I wrote earlier:

> That might make it unnessarily complicated to use src-based routing
> when using TEE. I guess you'd usually have a host for logging or IDS
> somewhere on a private network and TEE packets there. So specifying
> oif and gateway seems most useful to me.

This really should support oif in my opinion. Please add this
(and update to use IP6SKB while at it). I've pushed out the
patches I've applied so far.

BTW, no need to resend the other patches unless there are clashes.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/8] netfilter: xtables: inclusion of xt_TEE
  2010-04-13 12:37 ` [PATCH 3/8] netfilter: xtables: inclusion of xt_TEE Jan Engelhardt
  2010-04-13 13:37   ` Patrick McHardy
@ 2010-04-13 13:45   ` Patrick McHardy
  2010-04-13 16:09     ` Jan Engelhardt
  1 sibling, 1 reply; 22+ messages in thread
From: Patrick McHardy @ 2010-04-13 13:45 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: netfilter-devel

Jan Engelhardt wrote:
> +static bool
> +tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
> +{
> +	const struct iphdr *iph = ip_hdr(skb);
> +	struct rtable *rt;
> +	struct flowi fl;
> +
> +	memset(&fl, 0, sizeof(fl));
> +	fl.nl_u.ip4_u.daddr = info->gw.ip;
> +	fl.nl_u.ip4_u.tos   = RT_TOS(iph->tos);
> +	fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
> +
> +	if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) != 0) {

You can't use skb->dev in modules that are valid to use in LOCAL_OUT.

> +#ifdef WITH_CONNTRACK
> +	nf_conntrack_put(skb->nfct);
> +	skb->nfct     = &tee_track.ct_general;
> +	skb->nfctinfo = IP_CT_NEW;
> +	nf_conntrack_get(skb->nfct);
> +#endif

Why do we still need this? I thought the reentrancy-counter should take
care of this?

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/8] netfilter: xtables: inclusion of xt_TEE
  2010-04-13 13:45   ` Patrick McHardy
@ 2010-04-13 16:09     ` Jan Engelhardt
  2010-04-13 16:17       ` Jan Engelhardt
  2010-04-13 16:32       ` Patrick McHardy
  0 siblings, 2 replies; 22+ messages in thread
From: Jan Engelhardt @ 2010-04-13 16:09 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netfilter-devel

On Tuesday 2010-04-13 15:45, Patrick McHardy wrote:

>Jan Engelhardt wrote:
>> +static bool
>> +tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
>> +{
>> +	const struct iphdr *iph = ip_hdr(skb);
>> +	struct rtable *rt;
>> +	struct flowi fl;
>> +
>> +	memset(&fl, 0, sizeof(fl));
>> +	fl.nl_u.ip4_u.daddr = info->gw.ip;
>> +	fl.nl_u.ip4_u.tos   = RT_TOS(iph->tos);
>> +	fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
>> +
>> +	if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) != 0) {
>
>You can't use skb->dev in modules that are valid to use in LOCAL_OUT.
>
>> +#ifdef WITH_CONNTRACK
>> +	nf_conntrack_put(skb->nfct);
>> +	skb->nfct     = &tee_track.ct_general;
>> +	skb->nfctinfo = IP_CT_NEW;
>> +	nf_conntrack_get(skb->nfct);
>> +#endif
>
>Why do we still need this? I thought the reentrancy-counter should take
>care of this?

Did I really delete that commit... it's done so that conntrack
does not count the duplicated packets towards the original
connection.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/8] netfilter: xtables: inclusion of xt_TEE
  2010-04-13 16:09     ` Jan Engelhardt
@ 2010-04-13 16:17       ` Jan Engelhardt
  2010-04-13 16:33         ` Patrick McHardy
  2010-04-13 16:32       ` Patrick McHardy
  1 sibling, 1 reply; 22+ messages in thread
From: Jan Engelhardt @ 2010-04-13 16:17 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netfilter-devel


On Tuesday 2010-04-13 18:09, Jan Engelhardt wrote:
>>
>>> +#ifdef WITH_CONNTRACK
>>> +	nf_conntrack_put(skb->nfct);
>>> +	skb->nfct     = &tee_track.ct_general;
>>> +	skb->nfctinfo = IP_CT_NEW;
>>> +	nf_conntrack_get(skb->nfct);
>>> +#endif
>>
>>Why do we still need this? I thought the reentrancy-counter should take
>>care of this?
>
>Did I really delete that commit... it's done so that conntrack
>does not count the duplicated packets towards the original
>connection.

While at that, would retaining the old skb's nfctinfo make any sense?

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/8] netfilter: xtables: inclusion of xt_TEE
  2010-04-13 16:09     ` Jan Engelhardt
  2010-04-13 16:17       ` Jan Engelhardt
@ 2010-04-13 16:32       ` Patrick McHardy
  2010-04-13 17:28         ` Jan Engelhardt
  1 sibling, 1 reply; 22+ messages in thread
From: Patrick McHardy @ 2010-04-13 16:32 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: netfilter-devel

Jan Engelhardt wrote:
> On Tuesday 2010-04-13 15:45, Patrick McHardy wrote:
> 
>> Jan Engelhardt wrote:
>>> +static bool
>>> +tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
>>> +{
>>> +	const struct iphdr *iph = ip_hdr(skb);
>>> +	struct rtable *rt;
>>> +	struct flowi fl;
>>> +
>>> +	memset(&fl, 0, sizeof(fl));
>>> +	fl.nl_u.ip4_u.daddr = info->gw.ip;
>>> +	fl.nl_u.ip4_u.tos   = RT_TOS(iph->tos);
>>> +	fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
>>> +
>>> +	if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) != 0) {
>> You can't use skb->dev in modules that are valid to use in LOCAL_OUT.
>>
>>> +#ifdef WITH_CONNTRACK
>>> +	nf_conntrack_put(skb->nfct);
>>> +	skb->nfct     = &tee_track.ct_general;
>>> +	skb->nfctinfo = IP_CT_NEW;
>>> +	nf_conntrack_get(skb->nfct);
>>> +#endif
>> Why do we still need this? I thought the reentrancy-counter should take
>> care of this?
> 
> Did I really delete that commit... it's done so that conntrack
> does not count the duplicated packets towards the original
> connection.

Simply untrack it perhaps?

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/8] netfilter: xtables: inclusion of xt_TEE
  2010-04-13 16:17       ` Jan Engelhardt
@ 2010-04-13 16:33         ` Patrick McHardy
  0 siblings, 0 replies; 22+ messages in thread
From: Patrick McHardy @ 2010-04-13 16:33 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: netfilter-devel

Jan Engelhardt wrote:
> On Tuesday 2010-04-13 18:09, Jan Engelhardt wrote:
>>>> +#ifdef WITH_CONNTRACK
>>>> +	nf_conntrack_put(skb->nfct);
>>>> +	skb->nfct     = &tee_track.ct_general;
>>>> +	skb->nfctinfo = IP_CT_NEW;
>>>> +	nf_conntrack_get(skb->nfct);
>>>> +#endif
>>> Why do we still need this? I thought the reentrancy-counter should take
>>> care of this?
>> Did I really delete that commit... it's done so that conntrack
>> does not count the duplicated packets towards the original
>> connection.
> 
> While at that, would retaining the old skb's nfctinfo make any sense?

I don't think so.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/8] netfilter: xtables: inclusion of xt_TEE
  2010-04-13 16:32       ` Patrick McHardy
@ 2010-04-13 17:28         ` Jan Engelhardt
  2010-04-13 17:31           ` Patrick McHardy
  0 siblings, 1 reply; 22+ messages in thread
From: Jan Engelhardt @ 2010-04-13 17:28 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netfilter-devel


On Tuesday 2010-04-13 18:32, Patrick McHardy wrote:

>>>> +tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
>>>> +{
>>>> +	const struct iphdr *iph = ip_hdr(skb);
>>>> +	struct rtable *rt;
>>>> +	struct flowi fl;
>>>> +
>>>> +	memset(&fl, 0, sizeof(fl));
>>>> +	fl.nl_u.ip4_u.daddr = info->gw.ip;
>>>> +	fl.nl_u.ip4_u.tos   = RT_TOS(iph->tos);
>>>> +	fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
>>>> +
>>>> +	if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) != 0) {
>>> You can't use skb->dev in modules that are valid to use in LOCAL_OUT.

Let's use dev_net(skb_dst(skb)->dev). skb_dst seems fine,
as __ip_local_out already makes use of it.

>>>> +#ifdef WITH_CONNTRACK
>>>> +	nf_conntrack_put(skb->nfct);
>>>> +	skb->nfct     = &tee_track.ct_general;
>>>> +	skb->nfctinfo = IP_CT_NEW;
>>>> +	nf_conntrack_get(skb->nfct);
>>>> +#endif
>>> Why do we still need this? I thought the reentrancy-counter should take
>>> care of this?
>> 
>> Did I really delete that commit... it's done so that conntrack
>> does not count the duplicated packets towards the original
>> connection.
>
>Simply untrack it perhaps?

Well, that's what the four lines do, that's what NOTRACK does -
assigning to a fake nfct, isn't it?


thanks,
Jan

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/8] netfilter: xtables: inclusion of xt_TEE
  2010-04-13 17:28         ` Jan Engelhardt
@ 2010-04-13 17:31           ` Patrick McHardy
  0 siblings, 0 replies; 22+ messages in thread
From: Patrick McHardy @ 2010-04-13 17:31 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: netfilter-devel

Jan Engelhardt wrote:
> On Tuesday 2010-04-13 18:32, Patrick McHardy wrote:
> 
>>>>> +#ifdef WITH_CONNTRACK
>>>>> +	nf_conntrack_put(skb->nfct);
>>>>> +	skb->nfct     = &tee_track.ct_general;
>>>>> +	skb->nfctinfo = IP_CT_NEW;
>>>>> +	nf_conntrack_get(skb->nfct);
>>>>> +#endif
>>>> Why do we still need this? I thought the reentrancy-counter should take
>>>> care of this?
>>> Did I really delete that commit... it's done so that conntrack
>>> does not count the duplicated packets towards the original
>>> connection.
>> Simply untrack it perhaps?
> 
> Well, that's what the four lines do, that's what NOTRACK does -
> assigning to a fake nfct, isn't it?

Yeah, but a different one from the untracked conntrack, which is
already explicitly checked for where necessary.

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2010-04-13 17:31 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-13 12:37 nf-next: reentrancy, new modules Jan Engelhardt
2010-04-13 12:37 ` [PATCH 1/8] netfilter: ipv6: move POSTROUTING invocation before fragmentation Jan Engelhardt
2010-04-13 13:30   ` Patrick McHardy
2010-04-13 12:37 ` [PATCH 2/8] netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING invocation Jan Engelhardt
2010-04-13 13:33   ` Patrick McHardy
2010-04-13 13:34     ` Jan Engelhardt
2010-04-13 13:36       ` Patrick McHardy
2010-04-13 12:37 ` [PATCH 3/8] netfilter: xtables: inclusion of xt_TEE Jan Engelhardt
2010-04-13 13:37   ` Patrick McHardy
2010-04-13 13:45   ` Patrick McHardy
2010-04-13 16:09     ` Jan Engelhardt
2010-04-13 16:17       ` Jan Engelhardt
2010-04-13 16:33         ` Patrick McHardy
2010-04-13 16:32       ` Patrick McHardy
2010-04-13 17:28         ` Jan Engelhardt
2010-04-13 17:31           ` Patrick McHardy
2010-04-13 12:37 ` [PATCH 4/8] netfilter: xtables2: make ip_tables reentrant Jan Engelhardt
2010-04-13 12:37 ` [PATCH 5/8] netfilter: xt_TEE: have cloned packet travel through Xtables too Jan Engelhardt
2010-04-13 12:37 ` [PATCH 6/8] netfilter: xtables: remove old comments about reentrancy Jan Engelhardt
2010-04-13 12:37 ` [PATCH 7/8] netfilter: xtables: inclusion of xt_SYSRQ Jan Engelhardt
2010-04-13 12:37 ` [PATCH 8/8] netfilter: xtables: inclusion of xt_condition Jan Engelhardt
2010-04-13 13:26 ` nf-next: reentrancy, new modules Patrick McHardy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).