[PATCH v2 nf-next 1/6] net: untangle ip_fragment and bridge netfilter

netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Florian Westphal <fw@strlen.de>
To: netfilter-devel@vger.kernel.org
Cc: netdev@vger.kernel.org, Florian Westphal <fw@strlen.de>,
	Andy Zhou <azhou@nicira.com>
Subject: [PATCH v2 nf-next 1/6] net: untangle ip_fragment and bridge netfilter
Date: Thu, 12 Mar 2015 18:05:20 +0100	[thread overview]
Message-ID: <1426179925-18220-2-git-send-email-fw@strlen.de> (raw)
In-Reply-To: <1426179925-18220-1-git-send-email-fw@strlen.de>

Long time ago it was possible for the netfilter ip_conntrack
core to call ip_fragment in POST_ROUTING hook.

This is no longer the case, so the only case where bridge netfilter
ends up calling ip_fragment is the direct call site in br_netfilter.c.

Add ll and mtu arguments for ip_fragment and then get rid of the bridge
netfilter specific helpers from ip_fragment.

Cc: Andy Zhou <azhou@nicira.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/linux/netfilter_bridge.h | 17 -----------------
 include/net/ip.h                 |  4 ++--
 net/bridge/br_netfilter.c        | 23 ++++++++++++++++++++---
 net/ipv4/ip_output.c             | 37 +++++++++++++++++++++----------------
 4 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index ed0d3bf..fbbd5de 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -35,24 +35,8 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
 	}
 }
 
-static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
-{
-	if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
-		return PPPOE_SES_HLEN;
-	return 0;
-}
-
 int br_handle_frame_finish(struct sk_buff *skb);
 
-/* This is called by the IP fragmenting code and it ensures there is
- * enough room for the encapsulating header (if there is one). */
-static inline unsigned int nf_bridge_pad(const struct sk_buff *skb)
-{
-	if (skb->nf_bridge)
-		return nf_bridge_encap_header_len(skb);
-	return 0;
-}
-
 static inline void br_drop_fake_rtable(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
@@ -62,7 +46,6 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb)
 }
 
 #else
-#define nf_bridge_pad(skb)			(0)
 #define br_drop_fake_rtable(skb)	        do { } while (0)
 #endif /* CONFIG_BRIDGE_NETFILTER */
 
diff --git a/include/net/ip.h b/include/net/ip.h
index 025c61c..2905a4b 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -108,8 +108,8 @@ int ip_local_deliver(struct sk_buff *skb);
 int ip_mr_input(struct sk_buff *skb);
 int ip_output(struct sock *sk, struct sk_buff *skb);
 int ip_mc_output(struct sock *sk, struct sk_buff *skb);
-int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
-int ip_do_nat(struct sk_buff *skb);
+int ip_fragment(struct sk_buff *skb, unsigned int mtu,
+		unsigned int ll_rs, int (*output)(struct sk_buff *));
 void ip_send_check(struct iphdr *ip);
 int __ip_local_out(struct sk_buff *skb);
 int ip_local_out_sk(struct sock *sk, struct sk_buff *skb);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index bd2d24d..550ee19 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -812,26 +812,43 @@ static int br_nf_push_frag_xmit(struct sk_buff *skb)
 	return br_dev_queue_push_xmit(skb);
 }
 
+static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
+{
+	if (skb->nf_bridge->mask & BRNF_PPPoE)
+		return PPPOE_SES_HLEN;
+	return 0;
+}
+
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
 	int ret;
 	int frag_max_size;
-	unsigned int mtu_reserved;
+	unsigned int mtu_reserved, mtu;
 
 	if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP))
 		return br_dev_queue_push_xmit(skb);
 
 	mtu_reserved = nf_bridge_mtu_reduction(skb);
+	mtu = min(skb->dev->mtu, IP_MAX_MTU);
 	/* This is wrong! We should preserve the original fragment
 	 * boundaries by preserving frag_list rather than refragmenting.
 	 */
-	if (skb->len + mtu_reserved > skb->dev->mtu) {
+	if (skb->len + mtu_reserved > mtu) {
+		unsigned int llrs;
+
 		frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
 		if (br_parse_ip_options(skb))
 			/* Drop invalid packet */
 			return NF_DROP;
 		IPCB(skb)->frag_max_size = frag_max_size;
-		ret = ip_fragment(skb, br_nf_push_frag_xmit);
+
+		/* for bridged IP traffic encapsulated inside f.e. a vlan header,
+		 * we need to make room for the encapsulating header
+		 */
+		llrs = nf_bridge_encap_header_len(skb);
+
+		mtu -= mtu_reserved;
+		ret = ip_fragment(skb, mtu, llrs, br_nf_push_frag_xmit);
 	} else
 		ret = br_dev_queue_push_xmit(skb);
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a7aea20..fe5ec3f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -216,6 +216,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
 	netdev_features_t features;
 	struct sk_buff *segs;
 	int ret = 0;
+	unsigned int mtu;
 
 	/* common case: locally created skb or seglen is <= mtu */
 	if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
@@ -236,6 +237,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
 		return -ENOMEM;
 	}
 
+	mtu = ip_skb_dst_mtu(skb);
 	consume_skb(skb);
 
 	do {
@@ -243,7 +245,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
 		int err;
 
 		segs->next = NULL;
-		err = ip_fragment(segs, ip_finish_output2);
+		err = ip_fragment(segs, mtu, 0, ip_finish_output2);
 
 		if (err && ret == 0)
 			ret = err;
@@ -255,6 +257,8 @@ static int ip_finish_output_gso(struct sk_buff *skb)
 
 static int ip_finish_output(struct sk_buff *skb)
 {
+	unsigned int mtu;
+
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 	/* Policy lookup after SNAT yielded a new policy */
 	if (skb_dst(skb)->xfrm != NULL) {
@@ -265,8 +269,9 @@ static int ip_finish_output(struct sk_buff *skb)
 	if (skb_is_gso(skb))
 		return ip_finish_output_gso(skb);
 
-	if (skb->len > ip_skb_dst_mtu(skb))
-		return ip_fragment(skb, ip_finish_output2);
+	mtu = ip_skb_dst_mtu(skb);
+	if (skb->len > mtu)
+		return ip_fragment(skb, mtu, 0, ip_finish_output2);
 
 	return ip_finish_output2(skb);
 }
@@ -472,20 +477,28 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	skb_copy_secmark(to, from);
 }
 
-/*
+/**
+ *	ip_fragment - fragment IP datagram or send ICMP error
+ *
+ *	@skb: the skb to fragment
+ *	@mtu: mtu to use for fragmentation
+ *	@ll_rs: extra linklayer space required
+ *	@output: transmit function used to send fragments
+ *
  *	This IP datagram is too large to be sent in one piece.  Break it up into
  *	smaller pieces (each of size equal to IP header plus
  *	a block of the data of the original IP data part) that will yet fit in a
  *	single device frame, and queue such a frame for sending.
  */
-
-int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
+int ip_fragment(struct sk_buff *skb,
+		unsigned int mtu, unsigned int ll_rs,
+		int (*output)(struct sk_buff *))
 {
 	struct iphdr *iph;
 	int ptr;
 	struct net_device *dev;
 	struct sk_buff *skb2;
-	unsigned int mtu, hlen, left, len, ll_rs;
+	unsigned int hlen, left, len;
 	int offset;
 	__be16 not_last_frag;
 	struct rtable *rt = skb_rtable(skb);
@@ -499,7 +512,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 	iph = ip_hdr(skb);
 
-	mtu = ip_skb_dst_mtu(skb);
 	if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
 		     (IPCB(skb)->frag_max_size &&
 		      IPCB(skb)->frag_max_size > mtu))) {
@@ -516,10 +528,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 	hlen = iph->ihl * 4;
 	mtu = mtu - hlen;	/* Size of data space */
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	if (skb->nf_bridge)
-		mtu -= nf_bridge_mtu_reduction(skb);
-#endif
 	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
 
 	/* When frag_list is given, use it. First, check its validity:
@@ -636,10 +644,7 @@ slow_path:
 	left = skb->len - hlen;		/* Space per frame */
 	ptr = hlen;		/* Where to start from */
 
-	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
-	 * we need to make room for the encapsulating header
-	 */
-	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));
+	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, ll_rs);
 
 	/*
 	 *	Fragment the datagram.
-- 
2.0.5

next prev parent reply	other threads:[~2015-03-12 17:05 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-03-12 17:05 [PATCH v2 nf-next 0/6] more bridge netfilter refactoring Florian Westphal
2015-03-12 17:05 ` Florian Westphal [this message]
2015-03-13  0:38   ` [PATCH v2 nf-next 1/6] net: untangle ip_fragment and bridge netfilter Andy Zhou
2015-03-16 22:55   ` Pablo Neira Ayuso
2015-03-17  4:42     ` David Miller
2015-03-17 10:11       ` Florian Westphal
2015-03-17 17:12         ` David Miller
2015-03-17 20:40           ` Florian Westphal
2015-03-17 21:38             ` David Miller
2015-03-12 17:05 ` [PATCH v2 nf-next 2/6] netfilter: bridge: don't use nf_bridge_info to store mac header Florian Westphal
2015-03-12 17:05 ` [PATCH v2 nf-next 3/6] netfilter: bridge: use skb->cb to track otherhost mangling Florian Westphal
2015-03-12 18:02   ` Oliver Hartkopp
2015-03-12 18:31     ` Florian Westphal
2015-03-12 18:35       ` Florian Westphal
2015-03-12 18:40         ` Oliver Hartkopp
2015-03-12 17:05 ` [PATCH v2 nf-next 4/6] netfilter: bridge: don't use nf_bridge_info to store proto value Florian Westphal
2015-03-12 17:05 ` [PATCH v2 nf-next 5/6] netfilter: bridge: replace remaining flags with state enum Florian Westphal
2015-03-12 17:05 ` [PATCH nf-next 6/6] netfilter: bridge: don't use nf_bridge storage during neigh resolution Florian Westphal

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:ed0d3bf dfblob:fbbd5de dfblob:025c61c dfblob:2905a4b
dfblob:bd2d24d dfblob:550ee19 dfblob:a7aea20 dfblob:fe5ec3f )
 OR (
bs:"[PATCH v2 nf-next 1/6] net: untangle ip_fragment and bridge netfilter" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1426179925-18220-2-git-send-email-fw@strlen.de \
    --to=fw@strlen.de \
    --cc=azhou@nicira.com \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).