From: Pablo Neira Ayuso <pablo@netfilter.org>
To: David Miller <davem@davemloft.net>
Cc: Florian Westphal <fw@strlen.de>,
netfilter-devel@vger.kernel.org, netdev@vger.kernel.org,
Andy Zhou <azhou@nicira.com>
Subject: Re: [PATCH v2 nf-next 1/6] net: untangle ip_fragment and bridge netfilter
Date: Mon, 16 Mar 2015 23:55:45 +0100 [thread overview]
Message-ID: <20150316225545.GA4454@salvia> (raw)
In-Reply-To: <1426179925-18220-2-git-send-email-fw@strlen.de>
Hi David,
This patch changes the interface ip_fragment(). If this sounds
reasonable to you, please ack it and I'll route it to you in the next
nf-next batch.
Thanks.
On Thu, Mar 12, 2015 at 06:05:20PM +0100, Florian Westphal wrote:
> Long time ago it was possible for the netfilter ip_conntrack
> core to call ip_fragment in POST_ROUTING hook.
>
> This is no longer the case, so the only case where bridge netfilter
> ends up calling ip_fragment is the direct call site in br_netfilter.c.
>
> Add ll and mtu arguments for ip_fragment and then get rid of the bridge
> netfilter specific helpers from ip_fragment.
>
> Cc: Andy Zhou <azhou@nicira.com>
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
> include/linux/netfilter_bridge.h | 17 -----------------
> include/net/ip.h | 4 ++--
> net/bridge/br_netfilter.c | 23 ++++++++++++++++++++---
> net/ipv4/ip_output.c | 37 +++++++++++++++++++++----------------
> 4 files changed, 43 insertions(+), 38 deletions(-)
>
> diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
> index ed0d3bf..fbbd5de 100644
> --- a/include/linux/netfilter_bridge.h
> +++ b/include/linux/netfilter_bridge.h
> @@ -35,24 +35,8 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
> }
> }
>
> -static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
> -{
> - if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
> - return PPPOE_SES_HLEN;
> - return 0;
> -}
> -
> int br_handle_frame_finish(struct sk_buff *skb);
>
> -/* This is called by the IP fragmenting code and it ensures there is
> - * enough room for the encapsulating header (if there is one). */
> -static inline unsigned int nf_bridge_pad(const struct sk_buff *skb)
> -{
> - if (skb->nf_bridge)
> - return nf_bridge_encap_header_len(skb);
> - return 0;
> -}
> -
> static inline void br_drop_fake_rtable(struct sk_buff *skb)
> {
> struct dst_entry *dst = skb_dst(skb);
> @@ -62,7 +46,6 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb)
> }
>
> #else
> -#define nf_bridge_pad(skb) (0)
> #define br_drop_fake_rtable(skb) do { } while (0)
> #endif /* CONFIG_BRIDGE_NETFILTER */
>
> diff --git a/include/net/ip.h b/include/net/ip.h
> index 025c61c..2905a4b 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -108,8 +108,8 @@ int ip_local_deliver(struct sk_buff *skb);
> int ip_mr_input(struct sk_buff *skb);
> int ip_output(struct sock *sk, struct sk_buff *skb);
> int ip_mc_output(struct sock *sk, struct sk_buff *skb);
> -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
> -int ip_do_nat(struct sk_buff *skb);
> +int ip_fragment(struct sk_buff *skb, unsigned int mtu,
> + unsigned int ll_rs, int (*output)(struct sk_buff *));
> void ip_send_check(struct iphdr *ip);
> int __ip_local_out(struct sk_buff *skb);
> int ip_local_out_sk(struct sock *sk, struct sk_buff *skb);
> diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
> index bd2d24d..550ee19 100644
> --- a/net/bridge/br_netfilter.c
> +++ b/net/bridge/br_netfilter.c
> @@ -812,26 +812,43 @@ static int br_nf_push_frag_xmit(struct sk_buff *skb)
> return br_dev_queue_push_xmit(skb);
> }
>
> +static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
> +{
> + if (skb->nf_bridge->mask & BRNF_PPPoE)
> + return PPPOE_SES_HLEN;
> + return 0;
> +}
> +
> static int br_nf_dev_queue_xmit(struct sk_buff *skb)
> {
> int ret;
> int frag_max_size;
> - unsigned int mtu_reserved;
> + unsigned int mtu_reserved, mtu;
>
> if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP))
> return br_dev_queue_push_xmit(skb);
>
> mtu_reserved = nf_bridge_mtu_reduction(skb);
> + mtu = min(skb->dev->mtu, IP_MAX_MTU);
> /* This is wrong! We should preserve the original fragment
> * boundaries by preserving frag_list rather than refragmenting.
> */
> - if (skb->len + mtu_reserved > skb->dev->mtu) {
> + if (skb->len + mtu_reserved > mtu) {
> + unsigned int llrs;
> +
> frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
> if (br_parse_ip_options(skb))
> /* Drop invalid packet */
> return NF_DROP;
> IPCB(skb)->frag_max_size = frag_max_size;
> - ret = ip_fragment(skb, br_nf_push_frag_xmit);
> +
> + /* for bridged IP traffic encapsulated inside f.e. a vlan header,
> + * we need to make room for the encapsulating header
> + */
> + llrs = nf_bridge_encap_header_len(skb);
> +
> + mtu -= mtu_reserved;
> + ret = ip_fragment(skb, mtu, llrs, br_nf_push_frag_xmit);
> } else
> ret = br_dev_queue_push_xmit(skb);
>
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index a7aea20..fe5ec3f 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -216,6 +216,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
> netdev_features_t features;
> struct sk_buff *segs;
> int ret = 0;
> + unsigned int mtu;
>
> /* common case: locally created skb or seglen is <= mtu */
> if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
> @@ -236,6 +237,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
> return -ENOMEM;
> }
>
> + mtu = ip_skb_dst_mtu(skb);
> consume_skb(skb);
>
> do {
> @@ -243,7 +245,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
> int err;
>
> segs->next = NULL;
> - err = ip_fragment(segs, ip_finish_output2);
> + err = ip_fragment(segs, mtu, 0, ip_finish_output2);
>
> if (err && ret == 0)
> ret = err;
> @@ -255,6 +257,8 @@ static int ip_finish_output_gso(struct sk_buff *skb)
>
> static int ip_finish_output(struct sk_buff *skb)
> {
> + unsigned int mtu;
> +
> #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
> /* Policy lookup after SNAT yielded a new policy */
> if (skb_dst(skb)->xfrm != NULL) {
> @@ -265,8 +269,9 @@ static int ip_finish_output(struct sk_buff *skb)
> if (skb_is_gso(skb))
> return ip_finish_output_gso(skb);
>
> - if (skb->len > ip_skb_dst_mtu(skb))
> - return ip_fragment(skb, ip_finish_output2);
> + mtu = ip_skb_dst_mtu(skb);
> + if (skb->len > mtu)
> + return ip_fragment(skb, mtu, 0, ip_finish_output2);
>
> return ip_finish_output2(skb);
> }
> @@ -472,20 +477,28 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
> skb_copy_secmark(to, from);
> }
>
> -/*
> +/**
> + * ip_fragment - fragment IP datagram or send ICMP error
> + *
> + * @skb: the skb to fragment
> + * @mtu: mtu to use for fragmentation
> + * @ll_rs: extra linklayer space required
> + * @output: transmit function used to send fragments
> + *
> * This IP datagram is too large to be sent in one piece. Break it up into
> * smaller pieces (each of size equal to IP header plus
> * a block of the data of the original IP data part) that will yet fit in a
> * single device frame, and queue such a frame for sending.
> */
> -
> -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
> +int ip_fragment(struct sk_buff *skb,
> + unsigned int mtu, unsigned int ll_rs,
> + int (*output)(struct sk_buff *))
> {
> struct iphdr *iph;
> int ptr;
> struct net_device *dev;
> struct sk_buff *skb2;
> - unsigned int mtu, hlen, left, len, ll_rs;
> + unsigned int hlen, left, len;
> int offset;
> __be16 not_last_frag;
> struct rtable *rt = skb_rtable(skb);
> @@ -499,7 +512,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
>
> iph = ip_hdr(skb);
>
> - mtu = ip_skb_dst_mtu(skb);
> if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
> (IPCB(skb)->frag_max_size &&
> IPCB(skb)->frag_max_size > mtu))) {
> @@ -516,10 +528,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
>
> hlen = iph->ihl * 4;
> mtu = mtu - hlen; /* Size of data space */
> -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> - if (skb->nf_bridge)
> - mtu -= nf_bridge_mtu_reduction(skb);
> -#endif
> IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
>
> /* When frag_list is given, use it. First, check its validity:
> @@ -636,10 +644,7 @@ slow_path:
> left = skb->len - hlen; /* Space per frame */
> ptr = hlen; /* Where to start from */
>
> - /* for bridged IP traffic encapsulated inside f.e. a vlan header,
> - * we need to make room for the encapsulating header
> - */
> - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));
> + ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, ll_rs);
>
> /*
> * Fragment the datagram.
> --
> 2.0.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2015-03-16 22:55 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-03-12 17:05 [PATCH v2 nf-next 0/6] more bridge netfilter refactoring Florian Westphal
2015-03-12 17:05 ` [PATCH v2 nf-next 1/6] net: untangle ip_fragment and bridge netfilter Florian Westphal
2015-03-13 0:38 ` Andy Zhou
2015-03-16 22:55 ` Pablo Neira Ayuso [this message]
2015-03-17 4:42 ` David Miller
2015-03-17 10:11 ` Florian Westphal
2015-03-17 17:12 ` David Miller
2015-03-17 20:40 ` Florian Westphal
2015-03-17 21:38 ` David Miller
2015-03-12 17:05 ` [PATCH v2 nf-next 2/6] netfilter: bridge: don't use nf_bridge_info to store mac header Florian Westphal
2015-03-12 17:05 ` [PATCH v2 nf-next 3/6] netfilter: bridge: use skb->cb to track otherhost mangling Florian Westphal
2015-03-12 18:02 ` Oliver Hartkopp
2015-03-12 18:31 ` Florian Westphal
2015-03-12 18:35 ` Florian Westphal
2015-03-12 18:40 ` Oliver Hartkopp
2015-03-12 17:05 ` [PATCH v2 nf-next 4/6] netfilter: bridge: don't use nf_bridge_info to store proto value Florian Westphal
2015-03-12 17:05 ` [PATCH v2 nf-next 5/6] netfilter: bridge: replace remaining flags with state enum Florian Westphal
2015-03-12 17:05 ` [PATCH nf-next 6/6] netfilter: bridge: don't use nf_bridge storage during neigh resolution Florian Westphal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150316225545.GA4454@salvia \
--to=pablo@netfilter.org \
--cc=azhou@nicira.com \
--cc=davem@davemloft.net \
--cc=fw@strlen.de \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).