From: kaber@trash.net
To: davem@davemloft.net
Cc: netfilter-devel@vger.kernel.org, netdev@vger.kernel.org
Subject: [PATCH 69/84] netfilter: bridge-netfilter: Fix MAC header handling with IP DNAT
Date: Mon, 10 May 2010 22:18:40 +0200 [thread overview]
Message-ID: <1273522735-24672-70-git-send-email-kaber@trash.net> (raw)
In-Reply-To: <1273522735-24672-1-git-send-email-kaber@trash.net>
From: Bart De Schuymer <bdschuym@pandora.be>
- fix IP DNAT on vlan- or pppoe-encapsulated traffic: The functions
neigh_hh_output() or dst->neighbour->output() overwrite the complete
Ethernet header, although we only need the destination MAC address.
For encapsulated packets, they ended up overwriting the encapsulating
header. The new code copies the Ethernet source MAC address and
protocol number before calling dst->neighbour->output(). The Ethernet
source MAC and protocol number are copied back in place in
br_nf_pre_routing_finish_bridge_slow(). This also makes the IP DNAT
more transparent because in the old scheme the source MAC of the
bridge was copied into the source address in the Ethernet header. We
also let skb->protocol equal ETH_P_IP resp. ETH_P_IPV6 during the
execution of the PF_INET resp. PF_INET6 hooks.
- Speed up IP DNAT by calling neigh_hh_bridge() instead of
neigh_hh_output(): if dst->hh is available, we already know the MAC
address so we can just copy it.
Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
include/linux/netfilter_bridge.h | 5 ++-
include/net/neighbour.h | 14 ++++++
net/bridge/br_netfilter.c | 90 +++++++++++++++++++++++++++----------
3 files changed, 83 insertions(+), 26 deletions(-)
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index ffab6c4..ea0e44b 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -43,7 +43,8 @@ enum nf_br_hook_priorities {
#define BRNF_BRIDGED_DNAT 0x02
#define BRNF_BRIDGED 0x04
#define BRNF_NF_BRIDGE_PREROUTING 0x08
-
+#define BRNF_8021Q 0x10
+#define BRNF_PPPoE 0x20
/* Only used in br_forward.c */
extern int nf_bridge_copy_header(struct sk_buff *skb);
@@ -75,6 +76,8 @@ static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
skb_pull(skb, ETH_HLEN);
nf_bridge->mask ^= BRNF_BRIDGED_DNAT;
+ skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN),
+ skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
skb->dev = nf_bridge->physindev;
return br_handle_frame_finish(skb);
}
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index da1d58b..eb21340 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -299,6 +299,20 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
return 0;
}
+#ifdef CONFIG_BRIDGE_NETFILTER
+static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
+{
+ unsigned seq, hh_alen;
+
+ do {
+ seq = read_seqbegin(&hh->hh_lock);
+ hh_alen = HH_DATA_ALIGN(ETH_HLEN);
+ memcpy(skb->data - hh_alen, hh->hh_data, ETH_ALEN + hh_alen - ETH_HLEN);
+ } while (read_seqretry(&hh->hh_lock, seq));
+ return 0;
+}
+#endif
+
static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
{
unsigned seq;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 05dc630..b7e405d 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -196,15 +196,24 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
skb->nf_bridge->data, header_size);
}
-/*
- * When forwarding bridge frames, we save a copy of the original
- * header before processing.
+static inline void nf_bridge_update_protocol(struct sk_buff *skb)
+{
+ if (skb->nf_bridge->mask & BRNF_8021Q)
+ skb->protocol = htons(ETH_P_8021Q);
+ else if (skb->nf_bridge->mask & BRNF_PPPoE)
+ skb->protocol = htons(ETH_P_PPP_SES);
+}
+
+/* Fill in the header for fragmented IP packets handled by
+ * the IPv4 connection tracking code.
*/
int nf_bridge_copy_header(struct sk_buff *skb)
{
int err;
- int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+ unsigned int header_size;
+ nf_bridge_update_protocol(skb);
+ header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
err = skb_cow_head(skb, header_size);
if (err)
return err;
@@ -238,6 +247,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
skb_dst_set(skb, &rt->u.dst);
skb->dev = nf_bridge->physindev;
+ nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
br_handle_frame_finish, 1);
@@ -245,6 +255,38 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
return 0;
}
+/* Obtain the correct destination MAC address, while preserving the original
+ * source MAC address. If we already know this address, we just copy it. If we
+ * don't, we use the neighbour framework to find out. In both cases, we make
+ * sure that br_handle_frame_finish() is called afterwards.
+ */
+static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
+{
+ struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct dst_entry *dst;
+
+ skb->dev = bridge_parent(skb->dev);
+ if (!skb->dev)
+ goto free_skb;
+ dst = skb_dst(skb);
+ if (dst->hh) {
+ neigh_hh_bridge(dst->hh, skb);
+ skb->dev = nf_bridge->physindev;
+ return br_handle_frame_finish(skb);
+ } else if (dst->neighbour) {
+ /* the neighbour function below overwrites the complete
+ * MAC header, so we save the Ethernet source address and
+ * protocol number. */
+ skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
+ /* tell br_dev_xmit to continue with forwarding */
+ nf_bridge->mask |= BRNF_BRIDGED_DNAT;
+ return dst->neighbour->output(skb);
+ }
+free_skb:
+ kfree_skb(skb);
+ return 0;
+}
+
/* This requires some explaining. If DNAT has taken place,
* we will need to fix up the destination Ethernet address.
*
@@ -283,25 +325,6 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
* device, we proceed as if ip_route_input() succeeded. If it differs from the
* logical bridge port or if ip_route_output_key() fails we drop the packet.
*/
-
-static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
-{
- skb->dev = bridge_parent(skb->dev);
- if (skb->dev) {
- struct dst_entry *dst = skb_dst(skb);
-
- nf_bridge_pull_encap_header(skb);
- skb->nf_bridge->mask |= BRNF_BRIDGED_DNAT;
-
- if (dst->hh)
- return neigh_hh_output(dst->hh, skb);
- else if (dst->neighbour)
- return dst->neighbour->output(skb);
- }
- kfree_skb(skb);
- return 0;
-}
-
static int br_nf_pre_routing_finish(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
@@ -354,6 +377,7 @@ free_skb:
if (skb_dst(skb)->dev == dev) {
bridged_dnat:
skb->dev = nf_bridge->physindev;
+ nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
NF_HOOK_THRESH(NFPROTO_BRIDGE,
NF_BR_PRE_ROUTING,
@@ -376,6 +400,7 @@ bridged_dnat:
}
skb->dev = nf_bridge->physindev;
+ nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
br_handle_frame_finish, 1);
@@ -396,6 +421,10 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
nf_bridge->physindev = skb->dev;
skb->dev = bridge_parent(skb->dev);
+ if (skb->protocol == htons(ETH_P_8021Q))
+ nf_bridge->mask |= BRNF_8021Q;
+ else if (skb->protocol == htons(ETH_P_PPP_SES))
+ nf_bridge->mask |= BRNF_PPPoE;
return skb->dev;
}
@@ -494,6 +523,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
if (!setup_pre_routing(skb))
return NF_DROP;
+ skb->protocol = htons(ETH_P_IPV6);
NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
br_nf_pre_routing_finish_ipv6);
@@ -566,6 +596,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
if (!setup_pre_routing(skb))
return NF_DROP;
store_orig_dstaddr(skb);
+ skb->protocol = htons(ETH_P_IP);
NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
br_nf_pre_routing_finish);
@@ -614,7 +645,9 @@ static int br_nf_forward_finish(struct sk_buff *skb)
} else {
in = *((struct net_device **)(skb->cb));
}
+ nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
+
NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in,
skb->dev, br_forward_finish, 1);
return 0;
@@ -666,6 +699,10 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
/* The physdev module checks on this */
nf_bridge->mask |= BRNF_BRIDGED;
nf_bridge->physoutdev = skb->dev;
+ if (pf == PF_INET)
+ skb->protocol = htons(ETH_P_IP);
+ else
+ skb->protocol = htons(ETH_P_IPV6);
NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent,
br_nf_forward_finish);
@@ -706,8 +743,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
static int br_nf_dev_queue_xmit(struct sk_buff *skb)
{
- if (skb->nfct != NULL &&
- (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) &&
+ if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
skb->len > skb->dev->mtu &&
!skb_is_gso(skb))
return ip_fragment(skb, br_dev_queue_push_xmit);
@@ -755,6 +791,10 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
nf_bridge_pull_encap_header(skb);
nf_bridge_save_header(skb);
+ if (pf == PF_INET)
+ skb->protocol = htons(ETH_P_IP);
+ else
+ skb->protocol = htons(ETH_P_IPV6);
NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev,
br_nf_dev_queue_xmit);
--
1.7.0.4
next prev parent reply other threads:[~2010-05-10 20:18 UTC|newest]
Thread overview: 89+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-10 20:17 [PATCH 00/84] netfilter: netfilter update for 2.6.35 kaber
2010-05-10 20:17 ` [PATCH 01/84] netfilter: include/linux/netfilter/nf_conntrack_tuple_common.h: Checkpatch cleanup kaber
2010-05-10 20:17 ` [PATCH 02/84] netfilter: ebt_ip6: Use ipv6_masked_addr_cmp() kaber
2010-05-10 20:17 ` [PATCH 03/84] netfilter: remove stale declaration for ip6_masked_addrcmp() kaber
2010-05-10 20:17 ` [PATCH 04/84] netfilter: net/netfilter/ipvs/ip_vs_ftp.c: Remove use of NIPQUAD kaber
2010-05-10 20:17 ` [PATCH 05/84] netfilter: xt_CT: par->family is an nfproto kaber
2010-05-10 20:17 ` [PATCH 06/84] netfilter: xt_NFQUEUE: consolidate v4/v6 targets into one kaber
2010-05-10 20:17 ` [PATCH 07/84] netfilter: xtables: add comment markers to Xtables Kconfig kaber
2010-05-10 20:17 ` [PATCH 08/84] netfilter: xtables: merge xt_MARK into xt_mark kaber
2010-05-10 20:17 ` [PATCH 09/84] netfilter: xtables: merge xt_CONNMARK into xt_connmark kaber
2010-05-10 20:17 ` [PATCH 10/84] netfilter: xtables: schedule xt_NOTRACK for removal kaber
2010-05-10 20:17 ` [PATCH 11/84] netfilter: update my email address kaber
2010-05-10 20:17 ` [PATCH 12/84] netfilter: ebt_ip6: add principal maintainer in a MODULE_AUTHOR tag kaber
2010-05-10 20:17 ` [PATCH 13/84] netfilter: xt_recent: update description kaber
2010-05-10 20:17 ` [PATCH 14/84] netfilter: xt_recent: remove old proc directory kaber
2010-05-10 20:17 ` [PATCH 15/84] netfilter: xt_recent: add an entry reaper kaber
2010-05-10 20:17 ` [PATCH 16/84] netfilter: xt_recent: check for unsupported user space flags kaber
2010-05-10 20:17 ` [PATCH 17/84] netfilter: remove unused headers in net/netfilter/nfnetlink.c kaber
2010-05-10 20:17 ` [PATCH 18/84] netfilter: xtables: do without explicit XT_ALIGN kaber
2010-05-10 20:17 ` [PATCH 19/84] netfilter: xtables: clean up xt_mac match routine kaber
2010-05-10 20:17 ` [PATCH 20/84] netfilter: xtables: limit xt_mac to ethernet devices kaber
2010-05-10 20:17 ` [PATCH 21/84] netfilter: xtables: resort osf kconfig text kaber
2010-05-10 20:17 ` [PATCH 22/84] netfilter: xtables: make use of caller family rather than match family kaber
2010-05-10 20:17 ` [PATCH 23/84] netfilter: update documentation fields of x_tables.h kaber
2010-05-10 20:17 ` [PATCH 24/84] netfilter: xtables: remove almost-unused xt_match_param.data member kaber
2010-05-10 20:17 ` [PATCH 25/84] netfilter: xtables: reduce holes in struct xt_target kaber
2010-05-10 20:17 ` [PATCH 26/84] netfilter: xtables: do not print any messages on ENOMEM kaber
2010-05-10 20:17 ` [PATCH 27/84] netfilter: xtables: replace custom duprintf with pr_debug kaber
2010-05-10 20:17 ` [PATCH 28/84] netfilter: xt extensions: use pr_<level> kaber
2010-05-10 20:18 ` [PATCH 29/84] netfilter: remove unused headers in net/ipv6/netfilter/ip6t_LOG.c kaber
2010-05-10 20:18 ` [PATCH 30/84] netfilter: remove unused headers in net/ipv4/netfilter/nf_nat_h323.c kaber
2010-05-10 20:18 ` [PATCH 31/84] netfilter: xtables: make use of caller family rather than target family kaber
2010-05-10 20:18 ` [PATCH 32/84] netfilter: xt extensions: use pr_<level> (2) kaber
2010-05-10 20:18 ` [PATCH 33/84] netfilter: xtables: make use of xt_request_find_target kaber
2010-05-10 20:18 ` [PATCH 34/84] netfilter: xtables: consolidate code into xt_request_find_match kaber
2010-05-10 20:18 ` [PATCH 35/84] netfilter: xt_recent: allow changing ip_list_[ug]id at runtime kaber
2010-05-10 20:18 ` [PATCH 36/84] netfilter: bridge: use NFPROTO values for NF_HOOK invocation kaber
2010-05-10 20:18 ` [PATCH 37/84] netfilter: ipv4: " kaber
2010-05-10 20:18 ` [PATCH 38/84] netfilter: ipv6: " kaber
2010-05-10 20:18 ` [PATCH 39/84] netfilter: decnet: " kaber
2010-05-10 20:18 ` [PATCH 40/84] netfilter: ipvs: " kaber
2010-05-11 3:07 ` Simon Horman
2010-05-10 20:18 ` [PATCH 41/84] netfilter: xtables: untangle spaghetti if clauses in checkentry kaber
2010-05-10 20:18 ` [PATCH 42/84] netfilter: xtables: change xt_match.checkentry return type kaber
2010-05-10 20:18 ` [PATCH 43/84] netfilter: xtables: change xt_target.checkentry " kaber
2010-05-10 20:18 ` [PATCH 44/84] netfilter: xtables: change matches to return error code kaber
2010-05-10 20:18 ` [PATCH 45/84] netfilter: xtables: change targets " kaber
2010-05-10 20:18 ` [PATCH 46/84] netfilter: xtables: slightly better error reporting kaber
2010-05-10 20:18 ` [PATCH 47/84] netfilter: xtables: shorten up return clause kaber
2010-05-10 20:18 ` [PATCH 48/84] netfilter: xtables: remove xt_hashlimit revision 0 kaber
2010-05-10 20:18 ` [PATCH 49/84] netfilter: xtables: remove xt_multiport " kaber
2010-05-10 20:18 ` [PATCH 50/84] netfilter: xtables: remove xt_string " kaber
2010-05-10 20:18 ` [PATCH 51/84] netfilter: xtables: merge registration structure to NFPROTO_UNSPEC kaber
2010-05-10 20:18 ` [PATCH 52/84] netfilter: ctnetlink: compute message size properly kaber
2010-05-10 20:18 ` [PATCH 53/84] netfilter: CLUSTERIP: clusterip_seq_stop() fix kaber
2010-05-10 20:18 ` [PATCH 54/84] netfilter: xt_hashlimit: RCU conversion kaber
2010-05-10 20:18 ` [PATCH 55/84] IPVS: fix potential stack overflow with overly long protocol names kaber
2010-05-10 20:18 ` [PATCH 56/84] netfilter: only do skb_checksum_help on CHECKSUM_PARTIAL in ip_queue kaber
2010-05-10 20:18 ` [PATCH 57/84] netfilter: only do skb_checksum_help on CHECKSUM_PARTIAL in ip6_queue kaber
2010-05-10 20:18 ` [PATCH 58/84] netfilter: only do skb_checksum_help on CHECKSUM_PARTIAL in nfnetlink_queue kaber
2010-05-10 20:18 ` [PATCH 59/84] netfilter: remove invalid rcu_dereference() calls kaber
2010-05-10 20:18 ` [PATCH 60/84] netfilter: xt_LED: add refcounts to LED target kaber
2010-05-10 20:18 ` [PATCH 61/84] netfilter: xtables: make XT_ALIGN() usable in exported headers by exporting __ALIGN_KERNEL() kaber
2010-05-10 20:18 ` [PATCH 62/84] netfilter: fix some coding styles and remove moduleparam.h kaber
2010-05-10 20:18 ` [PATCH 63/84] netfilter: bridge-netfilter: cleanup br_netfilter.c kaber
2010-05-10 20:18 ` [PATCH 64/84] netfilter: bridge-netfilter: update a comment in br_forward.c about ip_fragment() kaber
2010-05-10 20:18 ` [PATCH 65/84] Restore __ALIGN_MASK() kaber
2010-05-10 20:18 ` [PATCH 66/84] netfilter: ipv6: move POSTROUTING invocation before fragmentation kaber
2010-05-10 20:18 ` [PATCH 67/84] netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING invocation kaber
2010-05-10 20:18 ` [PATCH 68/84] netfilter: bridge-netfilter: simplify IP DNAT kaber
2010-05-10 20:18 ` kaber [this message]
2010-05-10 20:18 ` [PATCH 70/84] netfilter: ipv6: move xfrm_lookup at end of ip6_route_me_harder kaber
2010-05-10 20:18 ` [PATCH 71/84] netfilter: ipt_LOG/ip6t_LOG: use more appropriate log level as default kaber
2010-05-10 20:18 ` [PATCH 72/84] netfilter: xtables: inclusion of xt_TEE kaber
2010-05-10 20:52 ` Eric Dumazet
2010-05-11 11:42 ` Patrick McHardy
2010-05-10 20:18 ` [PATCH 73/84] netfilter: xtables: make ip_tables reentrant kaber
2010-05-10 20:18 ` [PATCH 74/84] netfilter: xt_TEE: have cloned packet travel through Xtables too kaber
2010-05-10 20:18 ` [PATCH 75/84] netfilter: xtables: remove old comments about reentrancy kaber
2010-05-10 20:18 ` [PATCH 76/84] netfilter: xt_TEE: resolve oif using netdevice notifiers kaber
2010-05-10 20:18 ` [PATCH 77/84] netfilter: bridge-netfilter: fix refragmenting IP traffic encapsulated in PPPoE traffic kaber
2010-05-10 20:18 ` [PATCH 78/84] netfilter: x_tables: move sleeping allocation outside BH-disabled region kaber
2010-05-10 20:18 ` [PATCH 79/84] netfilter: ip_tables: convert pr_devel() to pr_debug() kaber
2010-05-10 20:18 ` [PATCH 80/84] netfilter: nf_conntrack: extend with extra stat counter kaber
2010-05-10 20:18 ` [PATCH 81/84] netfilter: x_tables: rectify XT_FUNCTION_MAXNAMELEN usage kaber
2010-05-10 20:18 ` [PATCH 82/84] netfilter: nf_ct_h323: switch "incomplete TPKT" message to pr_debug() kaber
2010-05-10 20:18 ` [PATCH 83/84] netfilter: nf_conntrack_proto: fix warning with CONFIG_PROVE_RCU kaber
2010-05-10 20:18 ` [PATCH 84/84] netfilter: use rcu_dereference_protected() kaber
2010-05-11 6:14 ` [PATCH 00/84] netfilter: netfilter update for 2.6.35 David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1273522735-24672-70-git-send-email-kaber@trash.net \
--to=kaber@trash.net \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).