* [PATCH net-next v18 4/8] netfilter: Add nf_ct_get_tuple_skb global lookup function
2018-05-31 9:56 [PATCH net-next v18 0/8] sched: Add Common Applications Kept Enhanced (cake) qdisc Toke Høiland-Jørgensen
@ 2018-05-31 9:56 ` Toke Høiland-Jørgensen
2018-05-31 9:56 ` [PATCH net-next v18 5/8] sch_cake: Add NAT awareness to packet classifier Toke Høiland-Jørgensen
2018-06-01 17:42 ` [PATCH net-next v18 0/8] sched: Add Common Applications Kept Enhanced (cake) qdisc Toke Høiland-Jørgensen
2 siblings, 0 replies; 5+ messages in thread
From: Toke Høiland-Jørgensen @ 2018-05-31 9:56 UTC (permalink / raw)
To: netdev, cake; +Cc: netfilter-devel
This adds a global netfilter function to extract a conntrack tuple from an
skb. The function uses a new function added to nf_ct_hook, which will try
to get the tuple from skb->_nfct, and do a full lookup if that fails. This
makes it possible to use the lookup function before the skb has passed
through the conntrack init hooks (e.g., in an ingress qdisc). The tuple is
copied to the caller to avoid issues with reference counting.
The function returns false if conntrack is not loaded, allowing it to be
used without incurring a module dependency on conntrack. This is used by
the NAT mode in sch_cake.
Cc: netfilter-devel@vger.kernel.org
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
---
include/linux/netfilter.h | 11 +++++++++++
net/netfilter/core.c | 15 +++++++++++++++
net/netfilter/nf_conntrack_core.c | 36 ++++++++++++++++++++++++++++++++++++
3 files changed, 62 insertions(+)
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 04551af2ff23..d7be35ab7967 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -388,8 +388,17 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
+struct nf_conntrack_tuple;
+bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+ const struct sk_buff *skb);
#else
static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
+struct nf_conntrack_tuple;
+static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+ const struct sk_buff *skb)
+{
+ return false;
+}
#endif
struct nf_conn;
@@ -398,6 +407,8 @@ enum ip_conntrack_info;
struct nf_ct_hook {
int (*update)(struct net *net, struct sk_buff *skb);
void (*destroy)(struct nf_conntrack *);
+ bool (*get_tuple_skb)(struct nf_conntrack_tuple *,
+ const struct sk_buff *);
};
extern struct nf_ct_hook __rcu *nf_ct_hook;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 168af54db975..dc240cb47ddf 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -603,6 +603,21 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
}
EXPORT_SYMBOL(nf_conntrack_destroy);
+bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+ const struct sk_buff *skb)
+{
+ struct nf_ct_hook *ct_hook;
+ bool ret = false;
+
+ rcu_read_lock();
+ ct_hook = rcu_dereference(nf_ct_hook);
+ if (ct_hook)
+ ret = ct_hook->get_tuple_skb(dst_tuple, skb);
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL(nf_ct_get_tuple_skb);
+
/* Built-in default zone used e.g. by modules. */
const struct nf_conntrack_zone nf_ct_zone_dflt = {
.id = NF_CT_DEFAULT_ZONE_ID,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3465da2a98bd..85ab2fd6a665 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1683,6 +1683,41 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
return 0;
}
+static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+ const struct sk_buff *skb)
+{
+ const struct nf_conntrack_tuple *src_tuple;
+ const struct nf_conntrack_tuple_hash *hash;
+ struct nf_conntrack_tuple srctuple;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) {
+ src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo));
+ memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+ return true;
+ }
+
+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+ NFPROTO_IPV4, dev_net(skb->dev),
+ &srctuple))
+ return false;
+
+ hash = nf_conntrack_find_get(dev_net(skb->dev),
+ &nf_ct_zone_dflt,
+ &srctuple);
+ if (!hash)
+ return false;
+
+ ct = nf_ct_tuplehash_to_ctrack(hash);
+ src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir);
+ memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+ nf_ct_put(ct);
+
+ return true;
+}
+
/* Bring out ya dead! */
static struct nf_conn *
get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
@@ -2204,6 +2239,7 @@ int nf_conntrack_init_start(void)
static struct nf_ct_hook nf_conntrack_hook = {
.update = nf_conntrack_update,
.destroy = destroy_conntrack,
+ .get_tuple_skb = nf_conntrack_get_tuple_skb,
};
void nf_conntrack_init_end(void)
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH net-next v18 5/8] sch_cake: Add NAT awareness to packet classifier
2018-05-31 9:56 [PATCH net-next v18 0/8] sched: Add Common Applications Kept Enhanced (cake) qdisc Toke Høiland-Jørgensen
2018-05-31 9:56 ` [PATCH net-next v18 4/8] netfilter: Add nf_ct_get_tuple_skb global lookup function Toke Høiland-Jørgensen
@ 2018-05-31 9:56 ` Toke Høiland-Jørgensen
2018-06-01 17:42 ` [PATCH net-next v18 0/8] sched: Add Common Applications Kept Enhanced (cake) qdisc Toke Høiland-Jørgensen
2 siblings, 0 replies; 5+ messages in thread
From: Toke Høiland-Jørgensen @ 2018-05-31 9:56 UTC (permalink / raw)
To: netdev, cake; +Cc: netfilter-devel
When CAKE is deployed on a gateway that also performs NAT (which is a
common deployment mode), the host fairness mechanism cannot distinguish
internal hosts from each other, and so fails to work correctly.
To fix this, we add an optional NAT awareness mode, which will query the
kernel conntrack mechanism to obtain the pre-NAT addresses for each packet
and use that in the flow and host hashing.
When the shaper is enabled and the host is already performing NAT, the cost
of this lookup is negligible. However, in unlimited mode with no NAT being
performed, there is a significant CPU cost at higher bandwidths. For this
reason, the feature is turned off by default.
Cc: netfilter-devel@vger.kernel.org
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
---
net/sched/sch_cake.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index f6a26636e98b..51768fd454a8 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -71,6 +71,10 @@
#include <net/tcp.h>
#include <net/flow_dissector.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#endif
+
#define CAKE_SET_WAYS (8)
#define CAKE_MAX_TINS (8)
#define CAKE_QUEUES (1024)
@@ -516,6 +520,29 @@ static bool cobalt_should_drop(struct cobalt_vars *vars,
return drop;
}
+static void cake_update_flowkeys(struct flow_keys *keys,
+ const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ struct nf_conntrack_tuple tuple = {};
+ bool rev = !skb->_nfct;
+
+ if (tc_skb_protocol(skb) != htons(ETH_P_IP))
+ return;
+
+ if (!nf_ct_get_tuple_skb(&tuple, skb))
+ return;
+
+ keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
+ keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
+
+ if (keys->ports.ports) {
+ keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
+ keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
+ }
+#endif
+}
+
/* Cake has several subtle multiple bit settings. In these cases you
* would be matching triple isolate mode as well.
*/
@@ -543,6 +570,9 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
skb_flow_dissect_flow_keys(skb, &keys,
FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+ if (flow_mode & CAKE_FLOW_NAT_FLAG)
+ cake_update_flowkeys(&keys, skb);
+
/* flow_hash_from_keys() sorts the addresses by value, so we have
* to preserve their order in a separate data structure to treat
* src and dst host addresses as independently selectable.
@@ -1919,6 +1949,18 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
if (err < 0)
return err;
+ if (tb[TCA_CAKE_NAT]) {
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ q->flow_mode &= ~CAKE_FLOW_NAT_FLAG;
+ q->flow_mode |= CAKE_FLOW_NAT_FLAG *
+ !!nla_get_u32(tb[TCA_CAKE_NAT]);
+#else
+ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT],
+ "No conntrack support in kernel");
+ return -EOPNOTSUPP;
+#endif
+ }
+
if (tb[TCA_CAKE_BASE_RATE64])
q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]);
@@ -2091,6 +2133,10 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_CAKE_NAT,
+ !!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
+ goto nla_put_failure;
+
return nla_nest_end(skb, opts);
nla_put_failure:
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH net-next v18 0/8] sched: Add Common Applications Kept Enhanced (cake) qdisc
2018-05-31 9:56 [PATCH net-next v18 0/8] sched: Add Common Applications Kept Enhanced (cake) qdisc Toke Høiland-Jørgensen
2018-05-31 9:56 ` [PATCH net-next v18 4/8] netfilter: Add nf_ct_get_tuple_skb global lookup function Toke Høiland-Jørgensen
2018-05-31 9:56 ` [PATCH net-next v18 5/8] sch_cake: Add NAT awareness to packet classifier Toke Høiland-Jørgensen
@ 2018-06-01 17:42 ` Toke Høiland-Jørgensen
2018-06-01 18:17 ` David Miller
2 siblings, 1 reply; 5+ messages in thread
From: Toke Høiland-Jørgensen @ 2018-06-01 17:42 UTC (permalink / raw)
To: netdev, cake
Cc: Georgios Amanakis, Pete Heist, Yuchung Cheng, Neal Cardwell,
Dave Taht, netfilter-devel
Toke Høiland-Jørgensen <toke@toke.dk> writes:
> This patch series adds the CAKE qdisc, and has been split up to ease
> review.
>
> I have attempted to split out each configurable feature into its own patch.
> The first commit adds the base shaper and packet scheduler, while
> subsequent commits add the optional features. The full userspace API and
> most data structures are included in this commit, but options not
> understood in the base version will be ignored.
Hmm, there seems to be a lockup issue being triggered when running CAKE
at high speeds (>40 Gbps in my tests). Please drop this series for now,
we'll return for 4.19 with a revised version.
-Toke
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH net-next v18 0/8] sched: Add Common Applications Kept Enhanced (cake) qdisc
2018-06-01 17:42 ` [PATCH net-next v18 0/8] sched: Add Common Applications Kept Enhanced (cake) qdisc Toke Høiland-Jørgensen
@ 2018-06-01 18:17 ` David Miller
0 siblings, 0 replies; 5+ messages in thread
From: David Miller @ 2018-06-01 18:17 UTC (permalink / raw)
To: toke
Cc: netdev, cake, gamanakis, peteheist, ycheng, ncardwell, dave.taht,
netfilter-devel
From: Toke Høiland-Jørgensen <toke@toke.dk>
Date: Fri, 01 Jun 2018 19:42:12 +0200
> Toke Høiland-Jørgensen <toke@toke.dk> writes:
>
>> This patch series adds the CAKE qdisc, and has been split up to ease
>> review.
>>
>> I have attempted to split out each configurable feature into its own patch.
>> The first commit adds the base shaper and packet scheduler, while
>> subsequent commits add the optional features. The full userspace API and
>> most data structures are included in this commit, but options not
>> understood in the base version will be ignored.
>
> Hmm, there seems to be a lockup issue being triggered when running CAKE
> at high speeds (>40 Gbps in my tests). Please drop this series for now,
> we'll return for 4.19 with a revised version.
Ok.
Thanks for continuing to push this work along.
^ permalink raw reply [flat|nested] 5+ messages in thread