From: Florian Westphal <fw@strlen.de>
To: netfilter-devel@vger.kernel.org
Cc: netdev@vger.kernel.org, Florian Westphal <fw@strlen.de>,
Andy Zhou <azhou@nicira.com>
Subject: [PATCH v2 nf-next 1/6] net: untangle ip_fragment and bridge netfilter
Date: Thu, 12 Mar 2015 18:05:20 +0100 [thread overview]
Message-ID: <1426179925-18220-2-git-send-email-fw@strlen.de> (raw)
In-Reply-To: <1426179925-18220-1-git-send-email-fw@strlen.de>
Long time ago it was possible for the netfilter ip_conntrack
core to call ip_fragment in POST_ROUTING hook.
This is no longer the case, so the only case where bridge netfilter
ends up calling ip_fragment is the direct call site in br_netfilter.c.
Add ll and mtu arguments for ip_fragment and then get rid of the bridge
netfilter specific helpers from ip_fragment.
Cc: Andy Zhou <azhou@nicira.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
include/linux/netfilter_bridge.h | 17 -----------------
include/net/ip.h | 4 ++--
net/bridge/br_netfilter.c | 23 ++++++++++++++++++++---
net/ipv4/ip_output.c | 37 +++++++++++++++++++++----------------
4 files changed, 43 insertions(+), 38 deletions(-)
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index ed0d3bf..fbbd5de 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -35,24 +35,8 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
}
}
-static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
-{
- if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
- return PPPOE_SES_HLEN;
- return 0;
-}
-
int br_handle_frame_finish(struct sk_buff *skb);
-/* This is called by the IP fragmenting code and it ensures there is
- * enough room for the encapsulating header (if there is one). */
-static inline unsigned int nf_bridge_pad(const struct sk_buff *skb)
-{
- if (skb->nf_bridge)
- return nf_bridge_encap_header_len(skb);
- return 0;
-}
-
static inline void br_drop_fake_rtable(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -62,7 +46,6 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb)
}
#else
-#define nf_bridge_pad(skb) (0)
#define br_drop_fake_rtable(skb) do { } while (0)
#endif /* CONFIG_BRIDGE_NETFILTER */
diff --git a/include/net/ip.h b/include/net/ip.h
index 025c61c..2905a4b 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -108,8 +108,8 @@ int ip_local_deliver(struct sk_buff *skb);
int ip_mr_input(struct sk_buff *skb);
int ip_output(struct sock *sk, struct sk_buff *skb);
int ip_mc_output(struct sock *sk, struct sk_buff *skb);
-int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
-int ip_do_nat(struct sk_buff *skb);
+int ip_fragment(struct sk_buff *skb, unsigned int mtu,
+ unsigned int ll_rs, int (*output)(struct sk_buff *));
void ip_send_check(struct iphdr *ip);
int __ip_local_out(struct sk_buff *skb);
int ip_local_out_sk(struct sock *sk, struct sk_buff *skb);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index bd2d24d..550ee19 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -812,26 +812,43 @@ static int br_nf_push_frag_xmit(struct sk_buff *skb)
return br_dev_queue_push_xmit(skb);
}
+static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
+{
+ if (skb->nf_bridge->mask & BRNF_PPPoE)
+ return PPPOE_SES_HLEN;
+ return 0;
+}
+
static int br_nf_dev_queue_xmit(struct sk_buff *skb)
{
int ret;
int frag_max_size;
- unsigned int mtu_reserved;
+ unsigned int mtu_reserved, mtu;
if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP))
return br_dev_queue_push_xmit(skb);
mtu_reserved = nf_bridge_mtu_reduction(skb);
+ mtu = min(skb->dev->mtu, IP_MAX_MTU);
/* This is wrong! We should preserve the original fragment
* boundaries by preserving frag_list rather than refragmenting.
*/
- if (skb->len + mtu_reserved > skb->dev->mtu) {
+ if (skb->len + mtu_reserved > mtu) {
+ unsigned int llrs;
+
frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
if (br_parse_ip_options(skb))
/* Drop invalid packet */
return NF_DROP;
IPCB(skb)->frag_max_size = frag_max_size;
- ret = ip_fragment(skb, br_nf_push_frag_xmit);
+
+ /* for bridged IP traffic encapsulated inside f.e. a vlan header,
+ * we need to make room for the encapsulating header
+ */
+ llrs = nf_bridge_encap_header_len(skb);
+
+ mtu -= mtu_reserved;
+ ret = ip_fragment(skb, mtu, llrs, br_nf_push_frag_xmit);
} else
ret = br_dev_queue_push_xmit(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a7aea20..fe5ec3f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -216,6 +216,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
netdev_features_t features;
struct sk_buff *segs;
int ret = 0;
+ unsigned int mtu;
/* common case: locally created skb or seglen is <= mtu */
if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
@@ -236,6 +237,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
return -ENOMEM;
}
+ mtu = ip_skb_dst_mtu(skb);
consume_skb(skb);
do {
@@ -243,7 +245,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
int err;
segs->next = NULL;
- err = ip_fragment(segs, ip_finish_output2);
+ err = ip_fragment(segs, mtu, 0, ip_finish_output2);
if (err && ret == 0)
ret = err;
@@ -255,6 +257,8 @@ static int ip_finish_output_gso(struct sk_buff *skb)
static int ip_finish_output(struct sk_buff *skb)
{
+ unsigned int mtu;
+
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
if (skb_dst(skb)->xfrm != NULL) {
@@ -265,8 +269,9 @@ static int ip_finish_output(struct sk_buff *skb)
if (skb_is_gso(skb))
return ip_finish_output_gso(skb);
- if (skb->len > ip_skb_dst_mtu(skb))
- return ip_fragment(skb, ip_finish_output2);
+ mtu = ip_skb_dst_mtu(skb);
+ if (skb->len > mtu)
+ return ip_fragment(skb, mtu, 0, ip_finish_output2);
return ip_finish_output2(skb);
}
@@ -472,20 +477,28 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
-/*
+/**
+ * ip_fragment - fragment IP datagram or send ICMP error
+ *
+ * @skb: the skb to fragment
+ * @mtu: mtu to use for fragmentation
+ * @ll_rs: extra linklayer space required
+ * @output: transmit function used to send fragments
+ *
* This IP datagram is too large to be sent in one piece. Break it up into
* smaller pieces (each of size equal to IP header plus
* a block of the data of the original IP data part) that will yet fit in a
* single device frame, and queue such a frame for sending.
*/
-
-int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
+int ip_fragment(struct sk_buff *skb,
+ unsigned int mtu, unsigned int ll_rs,
+ int (*output)(struct sk_buff *))
{
struct iphdr *iph;
int ptr;
struct net_device *dev;
struct sk_buff *skb2;
- unsigned int mtu, hlen, left, len, ll_rs;
+ unsigned int hlen, left, len;
int offset;
__be16 not_last_frag;
struct rtable *rt = skb_rtable(skb);
@@ -499,7 +512,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
iph = ip_hdr(skb);
- mtu = ip_skb_dst_mtu(skb);
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
@@ -516,10 +528,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
hlen = iph->ihl * 4;
mtu = mtu - hlen; /* Size of data space */
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (skb->nf_bridge)
- mtu -= nf_bridge_mtu_reduction(skb);
-#endif
IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
/* When frag_list is given, use it. First, check its validity:
@@ -636,10 +644,7 @@ slow_path:
left = skb->len - hlen; /* Space per frame */
ptr = hlen; /* Where to start from */
- /* for bridged IP traffic encapsulated inside f.e. a vlan header,
- * we need to make room for the encapsulating header
- */
- ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));
+ ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, ll_rs);
/*
* Fragment the datagram.
--
2.0.5
next prev parent reply other threads:[~2015-03-12 17:05 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-03-12 17:05 [PATCH v2 nf-next 0/6] more bridge netfilter refactoring Florian Westphal
2015-03-12 17:05 ` Florian Westphal [this message]
2015-03-13 0:38 ` [PATCH v2 nf-next 1/6] net: untangle ip_fragment and bridge netfilter Andy Zhou
2015-03-16 22:55 ` Pablo Neira Ayuso
2015-03-17 4:42 ` David Miller
2015-03-17 10:11 ` Florian Westphal
2015-03-17 17:12 ` David Miller
2015-03-17 20:40 ` Florian Westphal
2015-03-17 21:38 ` David Miller
2015-03-12 17:05 ` [PATCH v2 nf-next 2/6] netfilter: bridge: don't use nf_bridge_info to store mac header Florian Westphal
2015-03-12 17:05 ` [PATCH v2 nf-next 3/6] netfilter: bridge: use skb->cb to track otherhost mangling Florian Westphal
2015-03-12 18:02 ` Oliver Hartkopp
2015-03-12 18:31 ` Florian Westphal
2015-03-12 18:35 ` Florian Westphal
2015-03-12 18:40 ` Oliver Hartkopp
2015-03-12 17:05 ` [PATCH v2 nf-next 4/6] netfilter: bridge: don't use nf_bridge_info to store proto value Florian Westphal
2015-03-12 17:05 ` [PATCH v2 nf-next 5/6] netfilter: bridge: replace remaining flags with state enum Florian Westphal
2015-03-12 17:05 ` [PATCH nf-next 6/6] netfilter: bridge: don't use nf_bridge storage during neigh resolution Florian Westphal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1426179925-18220-2-git-send-email-fw@strlen.de \
--to=fw@strlen.de \
--cc=azhou@nicira.com \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).