From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Florian Westphal <fw@strlen.de>
Cc: netfilter-devel@vger.kernel.org
Subject: Re: [PATCH nf-next v2 4/4] netfilter: merge ctinfo into nfct pointer storage area
Date: Wed, 18 Jan 2017 20:13:03 +0100 [thread overview]
Message-ID: <20170118191303.GA13621@salvia> (raw)
In-Reply-To: <1483615609-29155-5-git-send-email-fw@strlen.de>
Hi Florian,
Sorry for taking a while to look into this.
On Thu, Jan 05, 2017 at 12:26:49PM +0100, Florian Westphal wrote:
> Merge conntrack related status bits into skb->_nfct.
> After this change conntrack operations (lookup, creation, matching from
> ruleset) only accesses one instead of two sk_buff cache lines.
>
> This works for normal conntracks because we use a slab cache that
> guarantees hw cacheline or 8byte alignment (whatever is larger) so the
> 3 bits needed for ctinfo won't overlap with nf_conn addresses.
>
> For the conntrack templates we need an 8 byte kmalloc minalign, in case
> we have arches where this isn't true a build-bug on test will catch
> this.
I think it would be good if we can split this patch in two by
introducing several helper functions in first place. This change would
be smaller.
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
> Changes since v2:
> - kbuild robot fixes:
> 1. Avoid 'unused variable' warning if NF_CONNTRACK=n
> 2. remove erroneous ';' in a 'if (foo);' construct in nf_defrag_ipv4.c
>
> include/linux/skbuff.h | 30 +++++++++++++++-----------
> include/net/ip_vs.h | 4 ++--
> include/net/netfilter/nf_conntrack.h | 10 ++++++---
> include/net/netfilter/nf_conntrack_core.h | 2 +-
> net/core/skbuff.c | 2 +-
> net/ipv4/netfilter/ipt_SYNPROXY.c | 11 +++++-----
> net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 6 +++---
> net/ipv4/netfilter/nf_defrag_ipv4.c | 4 ++--
> net/ipv4/netfilter/nf_dup_ipv4.c | 9 +++++---
> net/ipv6/netfilter/ip6t_SYNPROXY.c | 11 +++++-----
> net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 12 +++++------
> net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 4 ++--
> net/ipv6/netfilter/nf_dup_ipv6.c | 10 ++++++---
> net/netfilter/core.c | 2 +-
> net/netfilter/nf_conntrack_core.c | 23 ++++++++++----------
> net/netfilter/nf_conntrack_standalone.c | 4 ++++
> net/netfilter/nf_nat_helper.c | 2 +-
> net/netfilter/nft_ct.c | 3 +--
> net/netfilter/xt_CT.c | 13 ++++++-----
> net/openvswitch/conntrack.c | 22 +++++++++----------
> net/sched/cls_flow.c | 2 +-
> 21 files changed, 100 insertions(+), 86 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index b53c0cfd417e..fee935fea6f9 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -585,7 +585,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
> * @cloned: Head may be cloned (check refcnt to be sure)
> * @ip_summed: Driver fed us an IP checksum
> * @nohdr: Payload reference only, must not modify header
> - * @nfctinfo: Relationship of this skb to the connection
> * @pkt_type: Packet class
> * @fclone: skbuff clone status
> * @ipvs_property: skbuff is owned by ipvs
> @@ -594,7 +593,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
> * @nf_trace: netfilter packet trace flag
> * @protocol: Packet protocol from driver
> * @destructor: Destruct function
> - * @nfct: Associated connection, if any
> + * @_nfct: Associated connection, if any (with nfctinfo bits)
> * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
> * @skb_iif: ifindex of device we arrived on
> * @tc_index: Traffic control index
> @@ -668,7 +667,7 @@ struct sk_buff {
> struct sec_path *sp;
> #endif
> #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> - struct nf_conntrack *nfct;
> + unsigned long _nfct;
> #endif
> #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> struct nf_bridge_info *nf_bridge;
> @@ -721,7 +720,6 @@ struct sk_buff {
> __u8 pkt_type:3;
> __u8 pfmemalloc:1;
> __u8 ignore_df:1;
> - __u8 nfctinfo:3;
>
> __u8 nf_trace:1;
> __u8 ip_summed:2;
> @@ -836,6 +834,7 @@ static inline bool skb_pfmemalloc(const struct sk_buff *skb)
> #define SKB_DST_NOREF 1UL
> #define SKB_DST_PTRMASK ~(SKB_DST_NOREF)
>
> +#define SKB_NFCT_PTRMASK ~(7UL)
> /**
> * skb_dst - returns skb dst_entry
> * @skb: buffer
> @@ -3555,6 +3554,15 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr,
>
> #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> void nf_conntrack_destroy(struct nf_conntrack *nfct);
> +static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb)
> +{
> + struct nf_conntrack *nfct;
> +
> + nfct = (void *) (skb->_nfct & SKB_NFCT_PTRMASK);
> +
> + return nfct;
> +}
> +
> static inline void nf_conntrack_put(struct nf_conntrack *nfct)
> {
> if (nfct && atomic_dec_and_test(&nfct->use))
> @@ -3581,8 +3589,8 @@ static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
> static inline void nf_reset(struct sk_buff *skb)
> {
> #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> - nf_conntrack_put(skb->nfct);
> - skb->nfct = NULL;
> + nf_conntrack_put(skb_nfct(skb));
> + skb->_nfct = 0;
> #endif
> #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> nf_bridge_put(skb->nf_bridge);
> @@ -3602,10 +3610,8 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
> bool copy)
> {
> #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> - dst->nfct = src->nfct;
> - nf_conntrack_get(src->nfct);
> - if (copy)
> - dst->nfctinfo = src->nfctinfo;
> + dst->_nfct = src->_nfct;
> + nf_conntrack_get(skb_nfct(src));
> #endif
> #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> dst->nf_bridge = src->nf_bridge;
> @@ -3620,7 +3626,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
> static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
> {
> #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> - nf_conntrack_put(dst->nfct);
> + nf_conntrack_put(skb_nfct(dst));
> #endif
> #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> nf_bridge_put(dst->nf_bridge);
> @@ -3653,7 +3659,7 @@ static inline bool skb_irq_freeable(const struct sk_buff *skb)
> !skb->sp &&
> #endif
> #if IS_ENABLED(CONFIG_NF_CONNTRACK)
> - !skb->nfct &&
> + !skb->_nfct &&
static inline bool nf_ct_is_set(const struct sk_buff *skb)
{
return skb->_nfct;
}
> #endif
> !skb->_skb_refdst &&
> !skb_has_frag_list(skb);
> diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
> index 2a344ebd7ebe..cef639dd2d6e 100644
> --- a/include/net/ip_vs.h
> +++ b/include/net/ip_vs.h
> @@ -1559,8 +1559,8 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
> nf_conntrack_put(&ct->ct_general);
> untracked = nf_ct_untracked_get();
> nf_conntrack_get(&untracked->ct_general);
> - skb->nfct = &untracked->ct_general;
> - skb->nfctinfo = IP_CT_NEW;
> + skb->_nfct = (unsigned long)
> + &nf_ct_untracked_get()->ct_general | IP_CT_NEW;
nf_ct_set(skb, nf_ct_untracked_get()->ct_general, IP_CT_NEW);
> }
> #endif
> }
> diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
> index 5916aa9ab3f0..ccc18981f46b 100644
> --- a/include/net/netfilter/nf_conntrack.h
> +++ b/include/net/netfilter/nf_conntrack.h
> @@ -75,7 +75,7 @@ struct nf_conn {
> /* Usage count in here is 1 for hash table, 1 per skb,
> * plus 1 for any connection(s) we are `master' for
> *
> - * Hint, SKB address this struct and refcnt via skb->nfct and
> + * Hint, SKB address this struct and refcnt via skb->_nfct and
> * helpers nf_conntrack_get() and nf_conntrack_put().
> * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt,
> * beware nf_ct_get() is different and don't inc refcnt.
> @@ -162,12 +162,16 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
> int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
> const struct nf_conn *ignored_conntrack);
>
> +#define NFCT_INFOMASK 7UL
> +#define NFCT_PTRMASK ~(NFCT_INFOMASK)
> +
> /* Return conntrack_info and tuple hash for given skb. */
> static inline struct nf_conn *
> nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
> {
> - *ctinfo = skb->nfctinfo;
> - return (struct nf_conn *)skb->nfct;
> + *ctinfo = skb->_nfct & NFCT_INFOMASK;
> +
> + return (struct nf_conn *)(skb->_nfct & NFCT_PTRMASK);
> }
>
> /* decrement reference count on a conntrack */
> diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
> index 62e17d1319ff..10223ca4c96e 100644
> --- a/include/net/netfilter/nf_conntrack_core.h
> +++ b/include/net/netfilter/nf_conntrack_core.h
> @@ -62,7 +62,7 @@ int __nf_conntrack_confirm(struct sk_buff *skb);
> /* Confirm a connection: returns NF_DROP if packet must be dropped. */
> static inline int nf_conntrack_confirm(struct sk_buff *skb)
> {
> - struct nf_conn *ct = (struct nf_conn *)skb->nfct;
> + struct nf_conn *ct = (struct nf_conn *)(skb->_nfct & NFCT_PTRMASK);
struct nf_conn *ct = nf_conn_get(skb);
Would be like nf_ct_get() but returns no ctinfo.
> int ret = NF_ACCEPT;
>
> if (ct && !nf_ct_is_untracked(ct)) {
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 5a03730fbc1a..cac3ebfb4b45 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -655,7 +655,7 @@ static void skb_release_head_state(struct sk_buff *skb)
> skb->destructor(skb);
> }
> #if IS_ENABLED(CONFIG_NF_CONNTRACK)
> - nf_conntrack_put(skb->nfct);
> + nf_conntrack_put(skb_nfct(skb));
> #endif
> #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> nf_bridge_put(skb->nf_bridge);
> diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
> index 30c0de53e254..b9e3d34680c6 100644
> --- a/net/ipv4/netfilter/ipt_SYNPROXY.c
> +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
> @@ -57,8 +57,7 @@ synproxy_send_tcp(struct net *net,
> goto free_nskb;
>
> if (nfct) {
> - nskb->nfct = nfct;
> - nskb->nfctinfo = ctinfo;
> + nskb->_nfct = (unsigned long)nfct | ctinfo;
Again nf_ct_set().
> nf_conntrack_get(nfct);
> }
>
> @@ -107,8 +106,8 @@ synproxy_send_client_synack(struct net *net,
>
> synproxy_build_options(nth, opts);
>
> - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
> - niph, nth, tcp_hdr_size);
> + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb),
> + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size);
> }
>
> static void
> @@ -230,8 +229,8 @@ synproxy_send_client_ack(struct net *net,
>
> synproxy_build_options(nth, opts);
>
> - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
> - niph, nth, tcp_hdr_size);
> + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb),
> + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size);
> }
>
> static bool
> diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
> index 566afac98a88..613596cc65dc 100644
> --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
> +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
> @@ -137,7 +137,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
> enum ip_conntrack_info ctinfo;
> struct nf_conntrack_zone tmp;
>
> - NF_CT_ASSERT(skb->nfct == NULL);
> + NF_CT_ASSERT(skb->_nfct == 0);
nf_ct_is_set()
> zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
>
> /* Are they talking about one of our connections? */
> @@ -172,8 +172,8 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
> ctinfo += IP_CT_IS_REPLY;
>
> /* Update skb to refer to this connection */
> - skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
> - skb->nfctinfo = ctinfo;
> + skb->_nfct = (unsigned long)
> + &nf_ct_tuplehash_to_ctrack(h)->ct_general | ctinfo;
nf_ct_set()
> return NF_ACCEPT;
> }
>
> diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
> index 49bd6a54404f..8f3859b37ac7 100644
> --- a/net/ipv4/netfilter/nf_defrag_ipv4.c
> +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
> @@ -45,7 +45,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
> {
> u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
> #if IS_ENABLED(CONFIG_NF_CONNTRACK)
> - if (skb->nfct) {
> + if (skb->_nfct) {
nf_ct_is_set()
> enum ip_conntrack_info ctinfo;
> const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
>
> @@ -75,7 +75,7 @@ static unsigned int ipv4_conntrack_defrag(void *priv,
> #if !IS_ENABLED(CONFIG_NF_NAT)
> /* Previously seen (loopback)? Ignore. Do this before
> fragment check. */
> - if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
> + if (skb->_nfct && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb)))
nf_ct_is_set()
> return NF_ACCEPT;
> #endif
> #endif
> diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
> index a981ef7151ca..f4e661a1b230 100644
> --- a/net/ipv4/netfilter/nf_dup_ipv4.c
> +++ b/net/ipv4/netfilter/nf_dup_ipv4.c
> @@ -53,6 +53,9 @@ static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb,
> void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
> const struct in_addr *gw, int oif)
> {
> +#if IS_ENABLED(CONFIG_NF_CONNTRACK)
> + struct nf_conn *untracked;
> +#endif
> struct iphdr *iph;
>
> if (this_cpu_read(nf_skb_duplicated))
> @@ -68,10 +71,10 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
>
> #if IS_ENABLED(CONFIG_NF_CONNTRACK)
> /* Avoid counting cloned packets towards the original connection. */
> + untracked = nf_ct_untracked_get();
> nf_reset(skb);
> - skb->nfct = &nf_ct_untracked_get()->ct_general;
> - skb->nfctinfo = IP_CT_NEW;
> - nf_conntrack_get(skb->nfct);
> + nf_conntrack_get(&untracked->ct_general);
> + skb->_nfct = (unsigned long)untracked | IP_CT_NEW;
nf_ct_set()
> #endif
> /*
> * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential
> diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
> index 98c8dd38575a..2df5611c7b82 100644
> --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
> +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
> @@ -71,8 +71,7 @@ synproxy_send_tcp(struct net *net,
> skb_dst_set(nskb, dst);
>
> if (nfct) {
> - nskb->nfct = nfct;
> - nskb->nfctinfo = ctinfo;
> + nskb->_nfct = (unsigned long)nfct | ctinfo;
nf_ct_set()
> nf_conntrack_get(nfct);
> }
>
> @@ -121,8 +120,8 @@ synproxy_send_client_synack(struct net *net,
>
> synproxy_build_options(nth, opts);
>
> - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
> - niph, nth, tcp_hdr_size);
> + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb),
> + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size);
> }
>
> static void
> @@ -244,8 +243,8 @@ synproxy_send_client_ack(struct net *net,
>
> synproxy_build_options(nth, opts);
>
> - synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
> - niph, nth, tcp_hdr_size);
> + synproxy_send_tcp(net, skb, nskb, skb_nfct(skb),
> + IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size);
> }
>
> static bool
> diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
> index 44b9af3f813e..7c464a7de14e 100644
> --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
> +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
> @@ -153,7 +153,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
> enum ip_conntrack_info ctinfo;
> struct nf_conntrack_zone tmp;
>
> - NF_CT_ASSERT(skb->nfct == NULL);
> + NF_CT_ASSERT(skb->_nfct == 0);
nf_ct_is_set()
>
> /* Are they talking about one of our connections? */
> if (!nf_ct_get_tuplepr(skb,
> @@ -189,8 +189,8 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
> }
>
> /* Update skb to refer to this connection */
> - skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
> - skb->nfctinfo = ctinfo;
> + skb->_nfct = (unsigned long)
> + &nf_ct_tuplehash_to_ctrack(h)->ct_general | ctinfo;
nf_ct_set()
> return NF_ACCEPT;
> }
>
> @@ -222,9 +222,9 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
> type = icmp6h->icmp6_type - 130;
> if (type >= 0 && type < sizeof(noct_valid_new) &&
> noct_valid_new[type]) {
> - skb->nfct = &nf_ct_untracked_get()->ct_general;
> - skb->nfctinfo = IP_CT_NEW;
> - nf_conntrack_get(skb->nfct);
> + skb->_nfct = (unsigned long)
> + &nf_ct_untracked_get()->ct_general | IP_CT_NEW;
nf_ct_set()
> + nf_conntrack_get(skb_nfct(skb));
> return NF_ACCEPT;
> }
>
> diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
> index 8e0bdd058787..e6c74e136f7a 100644
> --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
> +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
> @@ -37,7 +37,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
> {
> u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
> #if IS_ENABLED(CONFIG_NF_CONNTRACK)
> - if (skb->nfct) {
> + if (skb->_nfct) {
> enum ip_conntrack_info ctinfo;
> const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
>
> @@ -61,7 +61,7 @@ static unsigned int ipv6_defrag(void *priv,
>
> #if IS_ENABLED(CONFIG_NF_CONNTRACK)
> /* Previously seen (loopback)? */
> - if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
> + if (skb->_nfct && !nf_ct_is_template((struct nf_conn *)(skb_nfct(skb))))
> return NF_ACCEPT;
> #endif
>
> diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
> index 5f52e5f90e7e..a53932b1dd15 100644
> --- a/net/ipv6/netfilter/nf_dup_ipv6.c
> +++ b/net/ipv6/netfilter/nf_dup_ipv6.c
> @@ -50,6 +50,10 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb,
> void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
> const struct in6_addr *gw, int oif)
> {
> +#if IS_ENABLED(CONFIG_NF_CONNTRACK)
> + struct nf_conn *untracked;
> +#endif
> +
> if (this_cpu_read(nf_skb_duplicated))
> return;
> skb = pskb_copy(skb, GFP_ATOMIC);
> @@ -57,10 +61,10 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
> return;
>
> #if IS_ENABLED(CONFIG_NF_CONNTRACK)
> + untracked = nf_ct_untracked_get();
> nf_reset(skb);
> - skb->nfct = &nf_ct_untracked_get()->ct_general;
> - skb->nfctinfo = IP_CT_NEW;
> - nf_conntrack_get(skb->nfct);
> + nf_conntrack_get(&untracked->ct_general);
> + skb->_nfct = (unsigned long)untracked | IP_CT_NEW;
nf_ct_set()
> #endif
> if (hooknum == NF_INET_PRE_ROUTING ||
> hooknum == NF_INET_LOCAL_IN) {
> diff --git a/net/netfilter/core.c b/net/netfilter/core.c
> index ce6adfae521a..a87a6f8a74d8 100644
> --- a/net/netfilter/core.c
> +++ b/net/netfilter/core.c
> @@ -375,7 +375,7 @@ void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
> {
> void (*attach)(struct sk_buff *, const struct sk_buff *);
>
> - if (skb->nfct) {
> + if (skb->_nfct) {
nf_ct_is_set()
> rcu_read_lock();
> attach = rcu_dereference(ip_ct_attach);
> if (attach)
> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
> index adb7af3a4c4c..cf5c1dac3057 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -694,7 +694,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
> /* Assign conntrack already in hashes to this skbuff. Don't
> * modify skb->nfctinfo to ensure consistent stateful filtering.
> */
> - skb->nfct = &ct->ct_general;
> + skb->_nfct = (unsigned long)ct | oldinfo;
nf_ct_set()
> return NF_ACCEPT;
> }
> NF_CT_STAT_INC(net, drop);
> @@ -1223,7 +1223,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
> return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
> }
>
> -/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
> +/* On success, returns conntrack ptr, sets skb->_nfct | ctinfo */
> static inline struct nf_conn *
> resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
> struct sk_buff *skb,
> @@ -1282,8 +1282,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
> }
> *set_reply = 0;
> }
> - skb->nfct = &ct->ct_general;
> - skb->nfctinfo = *ctinfo;
> + skb->_nfct = (unsigned long)&ct->ct_general | *ctinfo;
nf_ct_set()
> return ct;
> }
>
> @@ -1308,7 +1307,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
> NF_CT_STAT_INC_ATOMIC(net, ignore);
> return NF_ACCEPT;
> }
> - skb->nfct = NULL;
> + skb->_nfct = 0;
> }
>
> /* rcu_read_lock()ed by nf_hook_thresh */
> @@ -1337,7 +1336,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
> goto out;
> }
> /* ICMP[v6] protocol trackers may assign one conntrack. */
> - if (skb->nfct)
> + if (skb->_nfct)
nf_ct_is_set()
> goto out;
> }
> repeat:
> @@ -1357,7 +1356,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
> goto out;
> }
>
> - NF_CT_ASSERT(skb->nfct);
> + NF_CT_ASSERT(skb->_nfct);
nf_ct_is_set()
>
> /* Decide what timeout policy we want to apply to this flow. */
> timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
> @@ -1368,7 +1367,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
> * the netfilter core what to do */
> pr_debug("nf_conntrack_in: Can't track with proto module\n");
> nf_conntrack_put(&ct->ct_general);
> - skb->nfct = NULL;
> + skb->_nfct = 0;
nf_conn_reset(skb) ?
> NF_CT_STAT_INC_ATOMIC(net, invalid);
> if (ret == -NF_DROP)
> NF_CT_STAT_INC_ATOMIC(net, drop);
> @@ -1526,9 +1525,8 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
> ctinfo = IP_CT_RELATED;
>
> /* Attach to new skbuff, and increment count */
> - nskb->nfct = &ct->ct_general;
> - nskb->nfctinfo = ctinfo;
> - nf_conntrack_get(nskb->nfct);
> + nskb->_nfct = (unsigned long)&ct->ct_general | ctinfo;
nf_ct_set()
> + nf_conntrack_get(skb_nfct(nskb));
> }
>
> /* Bring out ya dead! */
> @@ -1864,7 +1862,8 @@ int nf_conntrack_init_start(void)
> nf_conntrack_max = max_factor * nf_conntrack_htable_size;
>
> nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
> - sizeof(struct nf_conn), 0,
> + sizeof(struct nf_conn),
> + NFCT_INFOMASK + 1,
> SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
> if (!nf_conntrack_cachep)
> goto err_cachep;
> diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
> index d009ae663453..c1e5fea20fb2 100644
> --- a/net/netfilter/nf_conntrack_standalone.c
> +++ b/net/netfilter/nf_conntrack_standalone.c
> @@ -642,6 +642,10 @@ static int __init nf_conntrack_standalone_init(void)
> if (ret < 0)
> goto out_start;
>
> + BUILD_BUG_ON(SKB_NFCT_PTRMASK != NFCT_PTRMASK);
> + BUILD_BUG_ON(NFCT_INFOMASK <= IP_CT_NUMBER);
> + BUILD_BUG_ON(NFCT_INFOMASK >= ARCH_KMALLOC_MINALIGN);
> +
> #ifdef CONFIG_SYSCTL
> nf_ct_netfilter_header =
> register_net_sysctl(&init_net, "net", nf_ct_netfilter_table);
> diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
> index 2840abb5bb99..211661cb2c90 100644
> --- a/net/netfilter/nf_nat_helper.c
> +++ b/net/netfilter/nf_nat_helper.c
> @@ -60,7 +60,7 @@ static void mangle_contents(struct sk_buff *skb,
> __skb_trim(skb, skb->len + rep_len - match_len);
> }
>
> - if (nf_ct_l3num((struct nf_conn *)skb->nfct) == NFPROTO_IPV4) {
> + if (nf_ct_l3num((struct nf_conn *)skb_nfct(skb)) == NFPROTO_IPV4) {
> /* fix IP hdr checksum information */
> ip_hdr(skb)->tot_len = htons(skb->len);
> ip_send_check(ip_hdr(skb));
> diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
> index d774d7823688..e05865c98ae0 100644
> --- a/net/netfilter/nft_ct.c
> +++ b/net/netfilter/nft_ct.c
> @@ -554,8 +554,7 @@ static void nft_notrack_eval(const struct nft_expr *expr,
>
> ct = nf_ct_untracked_get();
> atomic_inc(&ct->ct_general.use);
> - skb->nfct = &ct->ct_general;
> - skb->nfctinfo = IP_CT_NEW;
> + skb->_nfct = (unsigned long)ct | IP_CT_NEW;
nf_ct_set()
> }
>
> static struct nft_expr_type nft_notrack_type;
> diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
> index 95c750358747..de4ef36e2a6e 100644
> --- a/net/netfilter/xt_CT.c
> +++ b/net/netfilter/xt_CT.c
> @@ -23,15 +23,14 @@
> static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
> {
> /* Previously seen (loopback)? Ignore. */
> - if (skb->nfct != NULL)
> + if (skb->_nfct != 0)
> return XT_CONTINUE;
>
> /* special case the untracked ct : we want the percpu object */
> if (!ct)
> ct = nf_ct_untracked_get();
> atomic_inc(&ct->ct_general.use);
> - skb->nfct = &ct->ct_general;
> - skb->nfctinfo = IP_CT_NEW;
> + skb->_nfct = (unsigned long)&ct->ct_general | IP_CT_NEW;
nf_ct_set()
>
> return XT_CONTINUE;
> }
> @@ -407,12 +406,12 @@ static unsigned int
> notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
> {
> /* Previously seen (loopback)? Ignore. */
> - if (skb->nfct != NULL)
> + if (skb->_nfct != 0)
nf_ct_is_set()
> return XT_CONTINUE;
>
> - skb->nfct = &nf_ct_untracked_get()->ct_general;
> - skb->nfctinfo = IP_CT_NEW;
> - nf_conntrack_get(skb->nfct);
> + skb->_nfct = (unsigned long)
> + &nf_ct_untracked_get()->ct_general | IP_CT_NEW;
nf_ct_set()
> + nf_conntrack_get(skb_nfct(skb));
>
> return XT_CONTINUE;
> }
> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> index 6b78bab27755..63c86d130008 100644
> --- a/net/openvswitch/conntrack.c
> +++ b/net/openvswitch/conntrack.c
> @@ -157,7 +157,7 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
> ovs_ct_get_labels(ct, &key->ct.labels);
> }
>
> -/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
> +/* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has
> * previously sent the packet to conntrack via the ct action. If
> * 'keep_nat_flags' is true, the existing NAT flags retained, else they are
> * initialized from the connection status.
> @@ -421,11 +421,11 @@ ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
>
> /* Find an existing connection which this packet belongs to without
> * re-attributing statistics or modifying the connection state. This allows an
> - * skb->nfct lost due to an upcall to be recovered during actions execution.
> + * skb->_nfct lost due to an upcall to be recovered during actions execution.
> *
> * Must be called with rcu_read_lock.
> *
> - * On success, populates skb->nfct and skb->nfctinfo, and returns the
> + * On success, populates skb->_nfct and returns the
> * connection. Returns NULL if there is no existing entry.
> */
> static struct nf_conn *
> @@ -460,12 +460,11 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
>
> ct = nf_ct_tuplehash_to_ctrack(h);
>
> - skb->nfct = &ct->ct_general;
> - skb->nfctinfo = ovs_ct_get_info(h);
> + skb->_nfct = (unsigned long)ct | ovs_ct_get_info(h);
nf_ct_set()
> return ct;
> }
>
> -/* Determine whether skb->nfct is equal to the result of conntrack lookup. */
> +/* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
> static bool skb_nfct_cached(struct net *net,
> const struct sw_flow_key *key,
> const struct ovs_conntrack_info *info,
> @@ -476,7 +475,7 @@ static bool skb_nfct_cached(struct net *net,
>
> ct = nf_ct_get(skb, &ctinfo);
> /* If no ct, check if we have evidence that an existing conntrack entry
> - * might be found for this skb. This happens when we lose a skb->nfct
> + * might be found for this skb. This happens when we lose a skb->_nfct
> * due to an upcall. If the connection was not confirmed, it is not
> * cached and needs to be run through conntrack again.
> */
> @@ -721,11 +720,10 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
>
> /* Associate skb with specified zone. */
> if (tmpl) {
> - if (skb->nfct)
> - nf_conntrack_put(skb->nfct);
> + if (skb->_nfct)
nf_ct_is_set()
> + nf_conntrack_put(skb_nfct(skb));
> nf_conntrack_get(&tmpl->ct_general);
> - skb->nfct = &tmpl->ct_general;
> - skb->nfctinfo = IP_CT_NEW;
> + skb->_nfct = (unsigned long)tmpl | IP_CT_NEW;
nf_ct_set()
> }
>
> err = nf_conntrack_in(net, info->family,
> @@ -819,7 +817,7 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
> if (err)
> return err;
>
> - ct = (struct nf_conn *)skb->nfct;
> + ct = (struct nf_conn *)skb_nfct(skb);
nf_conn_get()
> if (ct)
> nf_ct_deliver_cached_events(ct);
> }
> diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
> index 6575aba87630..3d6b9286c203 100644
> --- a/net/sched/cls_flow.c
> +++ b/net/sched/cls_flow.c
> @@ -129,7 +129,7 @@ static u32 flow_get_mark(const struct sk_buff *skb)
> static u32 flow_get_nfct(const struct sk_buff *skb)
> {
> #if IS_ENABLED(CONFIG_NF_CONNTRACK)
> - return addr_fold(skb->nfct);
> + return addr_fold(skb_nfct(skb));
> #else
> return 0;
> #endif
> --
> 2.7.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
prev parent reply other threads:[~2017-01-18 19:13 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-01-05 11:26 [PATCH nf-next v2 0/4] netfilter: skbuff: merge nfctinfo bits and nfct pointer Florian Westphal
2017-01-05 11:26 ` [PATCH nf-next v2 1/4] netfilter: conntrack: no need to pass ctinfo to error handler Florian Westphal
2017-01-05 11:26 ` [PATCH nf-next v2 2/4] netfilter: reset netfilter state when duplicating packet Florian Westphal
2017-01-05 11:26 ` [PATCH nf-next v2 3/4] netfilter: reduce direct skb->nfct usage Florian Westphal
2017-01-05 11:26 ` [PATCH nf-next v2 4/4] netfilter: merge ctinfo into nfct pointer storage area Florian Westphal
2017-01-18 19:13 ` Pablo Neira Ayuso [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170118191303.GA13621@salvia \
--to=pablo@netfilter.org \
--cc=fw@strlen.de \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).