* [PATCH net-next v2 1/3] net/sched: act_ct: Create nf flow table per zone
2020-03-01 12:58 [PATCH net-next v2 0/3] act_ct: Software offload of conntrack_in Paul Blakey
@ 2020-03-01 12:58 ` Paul Blakey
2020-03-01 15:47 ` Jiri Pirko
2020-03-01 12:58 ` [PATCH net-next v2 2/3] net/sched: act_ct: Offload established connections to flow table Paul Blakey
2020-03-01 12:58 ` [PATCH net-next v2 3/3] net/sched: act_ct: Software offload of established flows Paul Blakey
2 siblings, 1 reply; 7+ messages in thread
From: Paul Blakey @ 2020-03-01 12:58 UTC (permalink / raw)
To: Paul Blakey, Saeed Mahameed, Oz Shlomo, Jakub Kicinski,
Vlad Buslov, David Miller, netdev@vger.kernel.org, Jiri Pirko,
Roi Dayan
Use the NF flow tables infrastructure for CT offload.
Create a nf flow table per zone.
Next patches will add FT entries to this table, and do
the software offload.
Signed-off-by: Paul Blakey <paulb@mellanox.com>
---
Changelog:
v1->v2:
Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep)
Free ft on last tc act instance instead of last instance + last offloaded tuple,
this removes cleanup cb and netfilter patches, and is simpler
Removed accidental mlx5/core/en_tc.c change
Removed reviewed by Jiri - patch changed
include/net/tc_act/tc_ct.h | 2 +
net/sched/Kconfig | 2 +-
net/sched/act_ct.c | 143 ++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 145 insertions(+), 2 deletions(-)
diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
index a8b1564..cf3492e 100644
--- a/include/net/tc_act/tc_ct.h
+++ b/include/net/tc_act/tc_ct.h
@@ -25,6 +25,8 @@ struct tcf_ct_params {
u16 ct_action;
struct rcu_head rcu;
+
+ struct tcf_ct_flow_table *ct_ft;
};
struct tcf_ct {
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index edde0e5..bfbefb7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY
config NET_ACT_CT
tristate "connection tracking tc action"
- depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT
+ depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE
help
Say Y here to allow sending the packets to conntrack module.
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f685c0d..43dfdd1 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -15,6 +15,7 @@
#include <linux/pkt_cls.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/rhashtable.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
@@ -24,6 +25,7 @@
#include <uapi/linux/tc_act/tc_ct.h>
#include <net/tc_act/tc_ct.h>
+#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
@@ -31,6 +33,117 @@
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <uapi/linux/netfilter/nf_nat.h>
+static struct workqueue_struct *act_ct_wq;
+static struct rhashtable zones_ht;
+static DEFINE_SPINLOCK(zones_lock);
+
+struct tcf_ct_flow_table {
+ struct rhash_head node; /* In zones tables */
+
+ struct rcu_work rwork;
+ struct nf_flowtable nf_ft;
+ u16 zone;
+ u32 ref;
+
+ bool dying;
+};
+
+static const struct rhashtable_params zones_params = {
+ .head_offset = offsetof(struct tcf_ct_flow_table, node),
+ .key_offset = offsetof(struct tcf_ct_flow_table, zone),
+ .key_len = sizeof_field(struct tcf_ct_flow_table, zone),
+ .automatic_shrinking = true,
+};
+
+static struct nf_flowtable_type flowtable_ct = {
+ .owner = THIS_MODULE,
+};
+
+static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
+{
+ struct tcf_ct_flow_table *ct_ft, *new_ct_ft;
+ int err;
+
+ spin_lock_bh(&zones_lock);
+ ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params);
+ if (ct_ft)
+ goto take_ref;
+
+ spin_unlock_bh(&zones_lock);
+ new_ct_ft = kzalloc(sizeof(*new_ct_ft), GFP_KERNEL);
+ if (!new_ct_ft)
+ return -ENOMEM;
+
+ new_ct_ft->zone = params->zone;
+ spin_lock_bh(&zones_lock);
+ ct_ft = rhashtable_lookup_get_insert_fast(&zones_ht, &new_ct_ft->node,
+ zones_params);
+ if (IS_ERR(ct_ft)) {
+ err = PTR_ERR(ct_ft);
+ goto err_insert;
+ } else if (ct_ft) {
+ /* Already exists */
+ kfree(new_ct_ft);
+ goto take_ref;
+ }
+
+ ct_ft = new_ct_ft;
+ ct_ft->nf_ft.type = &flowtable_ct;
+ err = nf_flow_table_init(&ct_ft->nf_ft);
+ if (err)
+ goto err_init;
+
+ __module_get(THIS_MODULE);
+take_ref:
+ params->ct_ft = ct_ft;
+ ct_ft->ref++;
+ spin_unlock_bh(&zones_lock);
+
+ return 0;
+
+err_init:
+ rhashtable_remove_fast(&zones_ht, &new_ct_ft->node, zones_params);
+err_insert:
+ spin_unlock_bh(&zones_lock);
+ kfree(new_ct_ft);
+ return err;
+}
+
+static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
+{
+ struct tcf_ct_flow_table *ct_ft;
+
+ ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table,
+ rwork);
+ nf_flow_table_free(&ct_ft->nf_ft);
+ kfree(ct_ft);
+
+ module_put(THIS_MODULE);
+}
+
+static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
+{
+ struct tcf_ct_flow_table *ct_ft = params->ct_ft;
+
+ spin_lock_bh(&zones_lock);
+ if (--params->ct_ft->ref == 0) {
+ rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
+ INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
+ queue_rcu_work(act_ct_wq, &ct_ft->rwork);
+ }
+ spin_unlock_bh(&zones_lock);
+}
+
+static int tcf_ct_flow_tables_init(void)
+{
+ return rhashtable_init(&zones_ht, &zones_params);
+}
+
+static void tcf_ct_flow_tables_uninit(void)
+{
+ rhashtable_destroy(&zones_ht);
+}
+
static struct tc_action_ops act_ct_ops;
static unsigned int ct_net_id;
@@ -207,6 +320,8 @@ static void tcf_ct_params_free(struct rcu_head *head)
struct tcf_ct_params *params = container_of(head,
struct tcf_ct_params, rcu);
+ tcf_ct_flow_table_put(params);
+
if (params->tmpl)
nf_conntrack_put(¶ms->tmpl->ct_general);
kfree(params);
@@ -730,6 +845,10 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
if (err)
goto cleanup;
+ err = tcf_ct_flow_table_get(params);
+ if (err)
+ goto cleanup;
+
spin_lock_bh(&c->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
params = rcu_replace_pointer(c->params, params,
@@ -974,12 +1093,34 @@ static void __net_exit ct_exit_net(struct list_head *net_list)
static int __init ct_init_module(void)
{
- return tcf_register_action(&act_ct_ops, &ct_net_ops);
+ int err;
+
+ act_ct_wq = alloc_ordered_workqueue("act_ct_workqueue", 0);
+ if (!act_ct_wq)
+ return -ENOMEM;
+
+ err = tcf_ct_flow_tables_init();
+ if (err)
+ goto err_tbl_init;
+
+ err = tcf_register_action(&act_ct_ops, &ct_net_ops);
+ if (err)
+ goto err_register;
+
+ return 0;
+
+err_tbl_init:
+ destroy_workqueue(act_ct_wq);
+err_register:
+ tcf_ct_flow_tables_uninit();
+ return err;
}
static void __exit ct_cleanup_module(void)
{
tcf_unregister_action(&act_ct_ops, &ct_net_ops);
+ tcf_ct_flow_tables_uninit();
+ destroy_workqueue(act_ct_wq);
}
module_init(ct_init_module);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [PATCH net-next v2 1/3] net/sched: act_ct: Create nf flow table per zone
2020-03-01 12:58 ` [PATCH net-next v2 1/3] net/sched: act_ct: Create nf flow table per zone Paul Blakey
@ 2020-03-01 15:47 ` Jiri Pirko
2020-03-01 16:11 ` Paul Blakey
0 siblings, 1 reply; 7+ messages in thread
From: Jiri Pirko @ 2020-03-01 15:47 UTC (permalink / raw)
To: Paul Blakey
Cc: Saeed Mahameed, Oz Shlomo, Jakub Kicinski, Vlad Buslov,
David Miller, netdev@vger.kernel.org, Jiri Pirko, Roi Dayan
Sun, Mar 01, 2020 at 01:58:41PM CET, paulb@mellanox.com wrote:
>Use the NF flow tables infrastructure for CT offload.
>
>Create a nf flow table per zone.
>
>Next patches will add FT entries to this table, and do
>the software offload.
>
>Signed-off-by: Paul Blakey <paulb@mellanox.com>
>---
>Changelog:
> v1->v2:
> Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep)
> Free ft on last tc act instance instead of last instance + last offloaded tuple,
> this removes cleanup cb and netfilter patches, and is simpler
> Removed accidental mlx5/core/en_tc.c change
> Removed reviewed by Jiri - patch changed
>
> include/net/tc_act/tc_ct.h | 2 +
> net/sched/Kconfig | 2 +-
> net/sched/act_ct.c | 143 ++++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 145 insertions(+), 2 deletions(-)
>
>diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
>index a8b1564..cf3492e 100644
>--- a/include/net/tc_act/tc_ct.h
>+++ b/include/net/tc_act/tc_ct.h
>@@ -25,6 +25,8 @@ struct tcf_ct_params {
> u16 ct_action;
>
> struct rcu_head rcu;
>+
>+ struct tcf_ct_flow_table *ct_ft;
> };
>
> struct tcf_ct {
>diff --git a/net/sched/Kconfig b/net/sched/Kconfig
>index edde0e5..bfbefb7 100644
>--- a/net/sched/Kconfig
>+++ b/net/sched/Kconfig
>@@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY
>
> config NET_ACT_CT
> tristate "connection tracking tc action"
>- depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT
>+ depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE
> help
> Say Y here to allow sending the packets to conntrack module.
>
>diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
>index f685c0d..43dfdd1 100644
>--- a/net/sched/act_ct.c
>+++ b/net/sched/act_ct.c
>@@ -15,6 +15,7 @@
> #include <linux/pkt_cls.h>
> #include <linux/ip.h>
> #include <linux/ipv6.h>
>+#include <linux/rhashtable.h>
> #include <net/netlink.h>
> #include <net/pkt_sched.h>
> #include <net/pkt_cls.h>
>@@ -24,6 +25,7 @@
> #include <uapi/linux/tc_act/tc_ct.h>
> #include <net/tc_act/tc_ct.h>
>
>+#include <net/netfilter/nf_flow_table.h>
> #include <net/netfilter/nf_conntrack.h>
> #include <net/netfilter/nf_conntrack_core.h>
> #include <net/netfilter/nf_conntrack_zones.h>
>@@ -31,6 +33,117 @@
> #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
> #include <uapi/linux/netfilter/nf_nat.h>
>
>+static struct workqueue_struct *act_ct_wq;
>+static struct rhashtable zones_ht;
>+static DEFINE_SPINLOCK(zones_lock);
>+
>+struct tcf_ct_flow_table {
>+ struct rhash_head node; /* In zones tables */
>+
>+ struct rcu_work rwork;
>+ struct nf_flowtable nf_ft;
>+ u16 zone;
>+ u32 ref;
>+
>+ bool dying;
>+};
>+
>+static const struct rhashtable_params zones_params = {
>+ .head_offset = offsetof(struct tcf_ct_flow_table, node),
>+ .key_offset = offsetof(struct tcf_ct_flow_table, zone),
>+ .key_len = sizeof_field(struct tcf_ct_flow_table, zone),
>+ .automatic_shrinking = true,
>+};
>+
>+static struct nf_flowtable_type flowtable_ct = {
>+ .owner = THIS_MODULE,
>+};
>+
>+static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
>+{
>+ struct tcf_ct_flow_table *ct_ft, *new_ct_ft;
>+ int err;
>+
>+ spin_lock_bh(&zones_lock);
>+ ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params);
>+ if (ct_ft)
>+ goto take_ref;
>+
>+ spin_unlock_bh(&zones_lock);
>+ new_ct_ft = kzalloc(sizeof(*new_ct_ft), GFP_KERNEL);
Don't unlock-lock and just use GFP_ATOMIC.
>+ if (!new_ct_ft)
>+ return -ENOMEM;
>+
>+ new_ct_ft->zone = params->zone;
>+ spin_lock_bh(&zones_lock);
>+ ct_ft = rhashtable_lookup_get_insert_fast(&zones_ht, &new_ct_ft->node,
>+ zones_params);
>+ if (IS_ERR(ct_ft)) {
>+ err = PTR_ERR(ct_ft);
>+ goto err_insert;
>+ } else if (ct_ft) {
>+ /* Already exists */
>+ kfree(new_ct_ft);
>+ goto take_ref;
>+ }
>+
>+ ct_ft = new_ct_ft;
>+ ct_ft->nf_ft.type = &flowtable_ct;
>+ err = nf_flow_table_init(&ct_ft->nf_ft);
>+ if (err)
>+ goto err_init;
>+
>+ __module_get(THIS_MODULE);
>+take_ref:
>+ params->ct_ft = ct_ft;
>+ ct_ft->ref++;
>+ spin_unlock_bh(&zones_lock);
>+
>+ return 0;
>+
>+err_init:
>+ rhashtable_remove_fast(&zones_ht, &new_ct_ft->node, zones_params);
>+err_insert:
>+ spin_unlock_bh(&zones_lock);
>+ kfree(new_ct_ft);
>+ return err;
>+}
>+
>+static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
>+{
>+ struct tcf_ct_flow_table *ct_ft;
>+
>+ ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table,
>+ rwork);
>+ nf_flow_table_free(&ct_ft->nf_ft);
>+ kfree(ct_ft);
>+
>+ module_put(THIS_MODULE);
>+}
>+
>+static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
>+{
>+ struct tcf_ct_flow_table *ct_ft = params->ct_ft;
>+
>+ spin_lock_bh(&zones_lock);
>+ if (--params->ct_ft->ref == 0) {
>+ rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
>+ INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
>+ queue_rcu_work(act_ct_wq, &ct_ft->rwork);
>+ }
>+ spin_unlock_bh(&zones_lock);
>+}
>+
>+static int tcf_ct_flow_tables_init(void)
>+{
>+ return rhashtable_init(&zones_ht, &zones_params);
>+}
>+
>+static void tcf_ct_flow_tables_uninit(void)
>+{
>+ rhashtable_destroy(&zones_ht);
>+}
>+
> static struct tc_action_ops act_ct_ops;
> static unsigned int ct_net_id;
>
>@@ -207,6 +320,8 @@ static void tcf_ct_params_free(struct rcu_head *head)
> struct tcf_ct_params *params = container_of(head,
> struct tcf_ct_params, rcu);
>
>+ tcf_ct_flow_table_put(params);
>+
> if (params->tmpl)
> nf_conntrack_put(¶ms->tmpl->ct_general);
> kfree(params);
>@@ -730,6 +845,10 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
> if (err)
> goto cleanup;
>
>+ err = tcf_ct_flow_table_get(params);
>+ if (err)
>+ goto cleanup;
>+
> spin_lock_bh(&c->tcf_lock);
> goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
> params = rcu_replace_pointer(c->params, params,
>@@ -974,12 +1093,34 @@ static void __net_exit ct_exit_net(struct list_head *net_list)
>
> static int __init ct_init_module(void)
> {
>- return tcf_register_action(&act_ct_ops, &ct_net_ops);
>+ int err;
>+
>+ act_ct_wq = alloc_ordered_workqueue("act_ct_workqueue", 0);
>+ if (!act_ct_wq)
>+ return -ENOMEM;
>+
>+ err = tcf_ct_flow_tables_init();
>+ if (err)
>+ goto err_tbl_init;
>+
>+ err = tcf_register_action(&act_ct_ops, &ct_net_ops);
>+ if (err)
>+ goto err_register;
>+
>+ return 0;
>+
>+err_tbl_init:
>+ destroy_workqueue(act_ct_wq);
>+err_register:
>+ tcf_ct_flow_tables_uninit();
>+ return err;
> }
>
> static void __exit ct_cleanup_module(void)
> {
> tcf_unregister_action(&act_ct_ops, &ct_net_ops);
>+ tcf_ct_flow_tables_uninit();
>+ destroy_workqueue(act_ct_wq);
> }
>
> module_init(ct_init_module);
>--
>1.8.3.1
>
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH net-next v2 1/3] net/sched: act_ct: Create nf flow table per zone
2020-03-01 15:47 ` Jiri Pirko
@ 2020-03-01 16:11 ` Paul Blakey
2020-03-03 12:53 ` Paul Blakey
0 siblings, 1 reply; 7+ messages in thread
From: Paul Blakey @ 2020-03-01 16:11 UTC (permalink / raw)
To: Jiri Pirko
Cc: Saeed Mahameed, Oz Shlomo, Jakub Kicinski, Vlad Buslov,
David Miller, netdev@vger.kernel.org, Jiri Pirko, Roi Dayan
On 3/1/2020 5:47 PM, Jiri Pirko wrote:
> Sun, Mar 01, 2020 at 01:58:41PM CET, paulb@mellanox.com wrote:
>> Use the NF flow tables infrastructure for CT offload.
>>
>> Create a nf flow table per zone.
>>
>> Next patches will add FT entries to this table, and do
>> the software offload.
>>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> ---
>> Changelog:
>> v1->v2:
>> Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep)
>> Free ft on last tc act instance instead of last instance + last offloaded tuple,
>> this removes cleanup cb and netfilter patches, and is simpler
>> Removed accidental mlx5/core/en_tc.c change
>> Removed reviewed by Jiri - patch changed
>>
>> include/net/tc_act/tc_ct.h | 2 +
>> net/sched/Kconfig | 2 +-
>> net/sched/act_ct.c | 143 ++++++++++++++++++++++++++++++++++++++++++++-
>> 3 files changed, 145 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
>> index a8b1564..cf3492e 100644
>> --- a/include/net/tc_act/tc_ct.h
>> +++ b/include/net/tc_act/tc_ct.h
>> @@ -25,6 +25,8 @@ struct tcf_ct_params {
>> u16 ct_action;
>>
>> struct rcu_head rcu;
>> +
>> + struct tcf_ct_flow_table *ct_ft;
>> };
>>
>> struct tcf_ct {
>> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
>> index edde0e5..bfbefb7 100644
>> --- a/net/sched/Kconfig
>> +++ b/net/sched/Kconfig
>> @@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY
>>
>> config NET_ACT_CT
>> tristate "connection tracking tc action"
>> - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT
>> + depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE
>> help
>> Say Y here to allow sending the packets to conntrack module.
>>
>> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
>> index f685c0d..43dfdd1 100644
>> --- a/net/sched/act_ct.c
>> +++ b/net/sched/act_ct.c
>> @@ -15,6 +15,7 @@
>> #include <linux/pkt_cls.h>
>> #include <linux/ip.h>
>> #include <linux/ipv6.h>
>> +#include <linux/rhashtable.h>
>> #include <net/netlink.h>
>> #include <net/pkt_sched.h>
>> #include <net/pkt_cls.h>
>> @@ -24,6 +25,7 @@
>> #include <uapi/linux/tc_act/tc_ct.h>
>> #include <net/tc_act/tc_ct.h>
>>
>> +#include <net/netfilter/nf_flow_table.h>
>> #include <net/netfilter/nf_conntrack.h>
>> #include <net/netfilter/nf_conntrack_core.h>
>> #include <net/netfilter/nf_conntrack_zones.h>
>> @@ -31,6 +33,117 @@
>> #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
>> #include <uapi/linux/netfilter/nf_nat.h>
>>
>> +static struct workqueue_struct *act_ct_wq;
>> +static struct rhashtable zones_ht;
>> +static DEFINE_SPINLOCK(zones_lock);
>> +
>> +struct tcf_ct_flow_table {
>> + struct rhash_head node; /* In zones tables */
>> +
>> + struct rcu_work rwork;
>> + struct nf_flowtable nf_ft;
>> + u16 zone;
>> + u32 ref;
>> +
>> + bool dying;
>> +};
>> +
>> +static const struct rhashtable_params zones_params = {
>> + .head_offset = offsetof(struct tcf_ct_flow_table, node),
>> + .key_offset = offsetof(struct tcf_ct_flow_table, zone),
>> + .key_len = sizeof_field(struct tcf_ct_flow_table, zone),
>> + .automatic_shrinking = true,
>> +};
>> +
>> +static struct nf_flowtable_type flowtable_ct = {
>> + .owner = THIS_MODULE,
>> +};
>> +
>> +static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
>> +{
>> + struct tcf_ct_flow_table *ct_ft, *new_ct_ft;
>> + int err;
>> +
>> + spin_lock_bh(&zones_lock);
>> + ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params);
>> + if (ct_ft)
>> + goto take_ref;
>> +
>> + spin_unlock_bh(&zones_lock);
>> + new_ct_ft = kzalloc(sizeof(*new_ct_ft), GFP_KERNEL);
> Don't unlock-lock and just use GFP_ATOMIC.
Sure will do.
>
>
>> + if (!new_ct_ft)
>> + return -ENOMEM;
>> +
>> + new_ct_ft->zone = params->zone;
>> + spin_lock_bh(&zones_lock);
>> + ct_ft = rhashtable_lookup_get_insert_fast(&zones_ht, &new_ct_ft->node,
>> + zones_params);
>> + if (IS_ERR(ct_ft)) {
>> + err = PTR_ERR(ct_ft);
>> + goto err_insert;
>> + } else if (ct_ft) {
>> + /* Already exists */
>> + kfree(new_ct_ft);
>> + goto take_ref;
>> + }
>> +
>> + ct_ft = new_ct_ft;
>> + ct_ft->nf_ft.type = &flowtable_ct;
>> + err = nf_flow_table_init(&ct_ft->nf_ft);
>> + if (err)
>> + goto err_init;
>> +
>> + __module_get(THIS_MODULE);
>> +take_ref:
>> + params->ct_ft = ct_ft;
>> + ct_ft->ref++;
>> + spin_unlock_bh(&zones_lock);
>> +
>> + return 0;
>> +
>> +err_init:
>> + rhashtable_remove_fast(&zones_ht, &new_ct_ft->node, zones_params);
>> +err_insert:
>> + spin_unlock_bh(&zones_lock);
>> + kfree(new_ct_ft);
>> + return err;
>> +}
>> +
>> +static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
>> +{
>> + struct tcf_ct_flow_table *ct_ft;
>> +
>> + ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table,
>> + rwork);
>> + nf_flow_table_free(&ct_ft->nf_ft);
>> + kfree(ct_ft);
>> +
>> + module_put(THIS_MODULE);
>> +}
>> +
>> +static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
>> +{
>> + struct tcf_ct_flow_table *ct_ft = params->ct_ft;
>> +
>> + spin_lock_bh(&zones_lock);
>> + if (--params->ct_ft->ref == 0) {
>> + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
>> + INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
>> + queue_rcu_work(act_ct_wq, &ct_ft->rwork);
>> + }
>> + spin_unlock_bh(&zones_lock);
>> +}
>> +
>> +static int tcf_ct_flow_tables_init(void)
>> +{
>> + return rhashtable_init(&zones_ht, &zones_params);
>> +}
>> +
>> +static void tcf_ct_flow_tables_uninit(void)
>> +{
>> + rhashtable_destroy(&zones_ht);
>> +}
>> +
>> static struct tc_action_ops act_ct_ops;
>> static unsigned int ct_net_id;
>>
>> @@ -207,6 +320,8 @@ static void tcf_ct_params_free(struct rcu_head *head)
>> struct tcf_ct_params *params = container_of(head,
>> struct tcf_ct_params, rcu);
>>
>> + tcf_ct_flow_table_put(params);
>> +
>> if (params->tmpl)
>> nf_conntrack_put(¶ms->tmpl->ct_general);
>> kfree(params);
>> @@ -730,6 +845,10 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
>> if (err)
>> goto cleanup;
>>
>> + err = tcf_ct_flow_table_get(params);
>> + if (err)
>> + goto cleanup;
>> +
>> spin_lock_bh(&c->tcf_lock);
>> goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
>> params = rcu_replace_pointer(c->params, params,
>> @@ -974,12 +1093,34 @@ static void __net_exit ct_exit_net(struct list_head *net_list)
>>
>> static int __init ct_init_module(void)
>> {
>> - return tcf_register_action(&act_ct_ops, &ct_net_ops);
>> + int err;
>> +
>> + act_ct_wq = alloc_ordered_workqueue("act_ct_workqueue", 0);
>> + if (!act_ct_wq)
>> + return -ENOMEM;
>> +
>> + err = tcf_ct_flow_tables_init();
>> + if (err)
>> + goto err_tbl_init;
>> +
>> + err = tcf_register_action(&act_ct_ops, &ct_net_ops);
>> + if (err)
>> + goto err_register;
>> +
>> + return 0;
>> +
>> +err_tbl_init:
>> + destroy_workqueue(act_ct_wq);
>> +err_register:
>> + tcf_ct_flow_tables_uninit();
>> + return err;
>> }
>>
>> static void __exit ct_cleanup_module(void)
>> {
>> tcf_unregister_action(&act_ct_ops, &ct_net_ops);
>> + tcf_ct_flow_tables_uninit();
>> + destroy_workqueue(act_ct_wq);
>> }
>>
>> module_init(ct_init_module);
>> --
>> 1.8.3.1
>>
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH net-next v2 1/3] net/sched: act_ct: Create nf flow table per zone
2020-03-01 16:11 ` Paul Blakey
@ 2020-03-03 12:53 ` Paul Blakey
0 siblings, 0 replies; 7+ messages in thread
From: Paul Blakey @ 2020-03-03 12:53 UTC (permalink / raw)
To: Jiri Pirko
Cc: Saeed Mahameed, Oz Shlomo, Jakub Kicinski, Vlad Buslov,
David Miller, netdev@vger.kernel.org, Jiri Pirko, Roi Dayan
On 3/1/2020 6:11 PM, Paul Blakey wrote:
> On 3/1/2020 5:47 PM, Jiri Pirko wrote:
>> Sun, Mar 01, 2020 at 01:58:41PM CET, paulb@mellanox.com wrote:
>>> Use the NF flow tables infrastructure for CT offload.
>>>
>>> Create a nf flow table per zone.
>>>
>>> Next patches will add FT entries to this table, and do
>>> the software offload.
>>>
>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>> ---
>>> Changelog:
>>> v1->v2:
>>> Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep)
>>> Free ft on last tc act instance instead of last instance + last offloaded tuple,
>>> this removes cleanup cb and netfilter patches, and is simpler
>>> Removed accidental mlx5/core/en_tc.c change
>>> Removed reviewed by Jiri - patch changed
>>>
>>> include/net/tc_act/tc_ct.h | 2 +
>>> net/sched/Kconfig | 2 +-
>>> net/sched/act_ct.c | 143 ++++++++++++++++++++++++++++++++++++++++++++-
>>> 3 files changed, 145 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
>>> index a8b1564..cf3492e 100644
>>> --- a/include/net/tc_act/tc_ct.h
>>> +++ b/include/net/tc_act/tc_ct.h
>>> @@ -25,6 +25,8 @@ struct tcf_ct_params {
>>> u16 ct_action;
>>>
>>> struct rcu_head rcu;
>>> +
>>> + struct tcf_ct_flow_table *ct_ft;
>>> };
>>>
>>> struct tcf_ct {
>>> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
>>> index edde0e5..bfbefb7 100644
>>> --- a/net/sched/Kconfig
>>> +++ b/net/sched/Kconfig
>>> @@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY
>>>
>>> config NET_ACT_CT
>>> tristate "connection tracking tc action"
>>> - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT
>>> + depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE
>>> help
>>> Say Y here to allow sending the packets to conntrack module.
>>>
>>> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
>>> index f685c0d..43dfdd1 100644
>>> --- a/net/sched/act_ct.c
>>> +++ b/net/sched/act_ct.c
>>> @@ -15,6 +15,7 @@
>>> #include <linux/pkt_cls.h>
>>> #include <linux/ip.h>
>>> #include <linux/ipv6.h>
>>> +#include <linux/rhashtable.h>
>>> #include <net/netlink.h>
>>> #include <net/pkt_sched.h>
>>> #include <net/pkt_cls.h>
>>> @@ -24,6 +25,7 @@
>>> #include <uapi/linux/tc_act/tc_ct.h>
>>> #include <net/tc_act/tc_ct.h>
>>>
>>> +#include <net/netfilter/nf_flow_table.h>
>>> #include <net/netfilter/nf_conntrack.h>
>>> #include <net/netfilter/nf_conntrack_core.h>
>>> #include <net/netfilter/nf_conntrack_zones.h>
>>> @@ -31,6 +33,117 @@
>>> #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
>>> #include <uapi/linux/netfilter/nf_nat.h>
>>>
>>> +static struct workqueue_struct *act_ct_wq;
>>> +static struct rhashtable zones_ht;
>>> +static DEFINE_SPINLOCK(zones_lock);
>>> +
>>> +struct tcf_ct_flow_table {
>>> + struct rhash_head node; /* In zones tables */
>>> +
>>> + struct rcu_work rwork;
>>> + struct nf_flowtable nf_ft;
>>> + u16 zone;
>>> + u32 ref;
>>> +
>>> + bool dying;
>>> +};
>>> +
>>> +static const struct rhashtable_params zones_params = {
>>> + .head_offset = offsetof(struct tcf_ct_flow_table, node),
>>> + .key_offset = offsetof(struct tcf_ct_flow_table, zone),
>>> + .key_len = sizeof_field(struct tcf_ct_flow_table, zone),
>>> + .automatic_shrinking = true,
>>> +};
>>> +
>>> +static struct nf_flowtable_type flowtable_ct = {
>>> + .owner = THIS_MODULE,
>>> +};
>>> +
>>> +static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
>>> +{
>>> + struct tcf_ct_flow_table *ct_ft, *new_ct_ft;
>>> + int err;
>>> +
>>> + spin_lock_bh(&zones_lock);
>>> + ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params);
>>> + if (ct_ft)
>>> + goto take_ref;
>>> +
>>> + spin_unlock_bh(&zones_lock);
>>> + new_ct_ft = kzalloc(sizeof(*new_ct_ft), GFP_KERNEL);
>> Don't unlock-lock and just use GFP_ATOMIC.
> Sure will do.
Sent v3.
>>
>>> + if (!new_ct_ft)
>>> + return -ENOMEM;
>>> +
>>> + new_ct_ft->zone = params->zone;
>>> + spin_lock_bh(&zones_lock);
>>> + ct_ft = rhashtable_lookup_get_insert_fast(&zones_ht, &new_ct_ft->node,
>>> + zones_params);
>>> + if (IS_ERR(ct_ft)) {
>>> + err = PTR_ERR(ct_ft);
>>> + goto err_insert;
>>> + } else if (ct_ft) {
>>> + /* Already exists */
>>> + kfree(new_ct_ft);
>>> + goto take_ref;
>>> + }
>>> +
>>> + ct_ft = new_ct_ft;
>>> + ct_ft->nf_ft.type = &flowtable_ct;
>>> + err = nf_flow_table_init(&ct_ft->nf_ft);
>>> + if (err)
>>> + goto err_init;
>>> +
>>> + __module_get(THIS_MODULE);
>>> +take_ref:
>>> + params->ct_ft = ct_ft;
>>> + ct_ft->ref++;
>>> + spin_unlock_bh(&zones_lock);
>>> +
>>> + return 0;
>>> +
>>> +err_init:
>>> + rhashtable_remove_fast(&zones_ht, &new_ct_ft->node, zones_params);
>>> +err_insert:
>>> + spin_unlock_bh(&zones_lock);
>>> + kfree(new_ct_ft);
>>> + return err;
>>> +}
>>> +
>>> +static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
>>> +{
>>> + struct tcf_ct_flow_table *ct_ft;
>>> +
>>> + ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table,
>>> + rwork);
>>> + nf_flow_table_free(&ct_ft->nf_ft);
>>> + kfree(ct_ft);
>>> +
>>> + module_put(THIS_MODULE);
>>> +}
>>> +
>>> +static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
>>> +{
>>> + struct tcf_ct_flow_table *ct_ft = params->ct_ft;
>>> +
>>> + spin_lock_bh(&zones_lock);
>>> + if (--params->ct_ft->ref == 0) {
>>> + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
>>> + INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
>>> + queue_rcu_work(act_ct_wq, &ct_ft->rwork);
>>> + }
>>> + spin_unlock_bh(&zones_lock);
>>> +}
>>> +
>>> +static int tcf_ct_flow_tables_init(void)
>>> +{
>>> + return rhashtable_init(&zones_ht, &zones_params);
>>> +}
>>> +
>>> +static void tcf_ct_flow_tables_uninit(void)
>>> +{
>>> + rhashtable_destroy(&zones_ht);
>>> +}
>>> +
>>> static struct tc_action_ops act_ct_ops;
>>> static unsigned int ct_net_id;
>>>
>>> @@ -207,6 +320,8 @@ static void tcf_ct_params_free(struct rcu_head *head)
>>> struct tcf_ct_params *params = container_of(head,
>>> struct tcf_ct_params, rcu);
>>>
>>> + tcf_ct_flow_table_put(params);
>>> +
>>> if (params->tmpl)
>>> nf_conntrack_put(¶ms->tmpl->ct_general);
>>> kfree(params);
>>> @@ -730,6 +845,10 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
>>> if (err)
>>> goto cleanup;
>>>
>>> + err = tcf_ct_flow_table_get(params);
>>> + if (err)
>>> + goto cleanup;
>>> +
>>> spin_lock_bh(&c->tcf_lock);
>>> goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
>>> params = rcu_replace_pointer(c->params, params,
>>> @@ -974,12 +1093,34 @@ static void __net_exit ct_exit_net(struct list_head *net_list)
>>>
>>> static int __init ct_init_module(void)
>>> {
>>> - return tcf_register_action(&act_ct_ops, &ct_net_ops);
>>> + int err;
>>> +
>>> + act_ct_wq = alloc_ordered_workqueue("act_ct_workqueue", 0);
>>> + if (!act_ct_wq)
>>> + return -ENOMEM;
>>> +
>>> + err = tcf_ct_flow_tables_init();
>>> + if (err)
>>> + goto err_tbl_init;
>>> +
>>> + err = tcf_register_action(&act_ct_ops, &ct_net_ops);
>>> + if (err)
>>> + goto err_register;
>>> +
>>> + return 0;
>>> +
>>> +err_tbl_init:
>>> + destroy_workqueue(act_ct_wq);
>>> +err_register:
>>> + tcf_ct_flow_tables_uninit();
>>> + return err;
>>> }
>>>
>>> static void __exit ct_cleanup_module(void)
>>> {
>>> tcf_unregister_action(&act_ct_ops, &ct_net_ops);
>>> + tcf_ct_flow_tables_uninit();
>>> + destroy_workqueue(act_ct_wq);
>>> }
>>>
>>> module_init(ct_init_module);
>>> --
>>> 1.8.3.1
>>>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH net-next v2 2/3] net/sched: act_ct: Offload established connections to flow table
2020-03-01 12:58 [PATCH net-next v2 0/3] act_ct: Software offload of conntrack_in Paul Blakey
2020-03-01 12:58 ` [PATCH net-next v2 1/3] net/sched: act_ct: Create nf flow table per zone Paul Blakey
@ 2020-03-01 12:58 ` Paul Blakey
2020-03-01 12:58 ` [PATCH net-next v2 3/3] net/sched: act_ct: Software offload of established flows Paul Blakey
2 siblings, 0 replies; 7+ messages in thread
From: Paul Blakey @ 2020-03-01 12:58 UTC (permalink / raw)
To: Paul Blakey, Saeed Mahameed, Oz Shlomo, Jakub Kicinski,
Vlad Buslov, David Miller, netdev@vger.kernel.org, Jiri Pirko,
Roi Dayan
Add a ft entry when connections enter an established state and delete
the connections when they leave the established state.
The flow table assumes ownership of the connection. In the following
patch act_ct will lookup the ct state from the FT. In future patches,
drivers will register for callbacks for ft add/del events and will be
able to use the information to offload the connections.
Note that connection aging is managed by the FT.
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
net/sched/act_ct.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 63 insertions(+)
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 43dfdd1..b512f20 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -134,6 +134,67 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
spin_unlock_bh(&zones_lock);
}
+static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
+ struct nf_conn *ct,
+ bool tcp)
+{
+ struct flow_offload *entry;
+ int err;
+
+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
+ return;
+
+ entry = flow_offload_alloc(ct);
+ if (!entry) {
+ WARN_ON_ONCE(1);
+ goto err_alloc;
+ }
+
+ if (tcp) {
+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ }
+
+ err = flow_offload_add(&ct_ft->nf_ft, entry);
+ if (err)
+ goto err_add;
+
+ return;
+
+err_add:
+ flow_offload_free(entry);
+err_alloc:
+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
+}
+
+static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ bool tcp = false;
+
+ if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
+ return;
+
+ switch (nf_ct_protonum(ct)) {
+ case IPPROTO_TCP:
+ tcp = true;
+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
+ return;
+ break;
+ case IPPROTO_UDP:
+ break;
+ default:
+ return;
+ }
+
+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
+ ct->status & IPS_SEQ_ADJUST)
+ return;
+
+ tcf_ct_flow_table_add(ct_ft, ct, tcp);
+}
+
static int tcf_ct_flow_tables_init(void)
{
return rhashtable_init(&zones_ht, &zones_params);
@@ -587,6 +648,8 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
nf_conntrack_confirm(skb);
}
+ tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
+
out_push:
skb_push_rcsum(skb, nh_ofs);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH net-next v2 3/3] net/sched: act_ct: Software offload of established flows
2020-03-01 12:58 [PATCH net-next v2 0/3] act_ct: Software offload of conntrack_in Paul Blakey
2020-03-01 12:58 ` [PATCH net-next v2 1/3] net/sched: act_ct: Create nf flow table per zone Paul Blakey
2020-03-01 12:58 ` [PATCH net-next v2 2/3] net/sched: act_ct: Offload established connections to flow table Paul Blakey
@ 2020-03-01 12:58 ` Paul Blakey
2 siblings, 0 replies; 7+ messages in thread
From: Paul Blakey @ 2020-03-01 12:58 UTC (permalink / raw)
To: Paul Blakey, Saeed Mahameed, Oz Shlomo, Jakub Kicinski,
Vlad Buslov, David Miller, netdev@vger.kernel.org, Jiri Pirko,
Roi Dayan
Offload nf conntrack processing by looking up the 5-tuple in the
zone's flow table.
The nf conntrack module will process the packets until a connection is
in established state. Once in established state, the ct state pointer
(nf_conn) will be restored on the skb from a successful ft lookup.
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
Changelog:
v1->v2:
Add !skip_add curly braces
Removed extra setting thoff again
Check tcp proto outside of tcf_ct_flow_table_check_tcp
net/sched/act_ct.c | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 158 insertions(+), 2 deletions(-)
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index b512f20..c8b9837 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -195,6 +195,155 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
tcf_ct_flow_table_add(ct_ft, ct, tcp);
}
+static bool
+tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb,
+ struct flow_offload_tuple *tuple)
+{
+ struct flow_ports *ports;
+ unsigned int thoff;
+ struct iphdr *iph;
+
+ if (!pskb_may_pull(skb, sizeof(*iph)))
+ return false;
+
+ iph = ip_hdr(skb);
+ thoff = iph->ihl * 4;
+
+ if (ip_is_fragment(iph) ||
+ unlikely(thoff != sizeof(struct iphdr)))
+ return false;
+
+ if (iph->protocol != IPPROTO_TCP &&
+ iph->protocol != IPPROTO_UDP)
+ return false;
+
+ if (iph->ttl <= 1)
+ return false;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ return false;
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v4.s_addr = iph->saddr;
+ tuple->dst_v4.s_addr = iph->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET;
+ tuple->l4proto = iph->protocol;
+
+ return true;
+}
+
+static bool
+tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb,
+ struct flow_offload_tuple *tuple)
+{
+ struct flow_ports *ports;
+ struct ipv6hdr *ip6h;
+ unsigned int thoff;
+
+ if (!pskb_may_pull(skb, sizeof(*ip6h)))
+ return false;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->nexthdr != IPPROTO_TCP &&
+ ip6h->nexthdr != IPPROTO_UDP)
+ return false;
+
+ if (ip6h->hop_limit <= 1)
+ return false;
+
+ thoff = sizeof(*ip6h);
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ return false;
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v6 = ip6h->saddr;
+ tuple->dst_v6 = ip6h->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET6;
+ tuple->l4proto = ip6h->nexthdr;
+
+ return true;
+}
+
+static bool tcf_ct_flow_table_check_tcp(struct flow_offload *flow,
+ struct sk_buff *skb,
+ unsigned int thoff)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
+ return false;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ if (unlikely(tcph->fin || tcph->rst)) {
+ flow_offload_teardown(flow);
+ return false;
+ }
+
+ return true;
+}
+
+static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
+ struct sk_buff *skb,
+ u8 family)
+{
+ struct nf_flowtable *nf_ft = &p->ct_ft->nf_ft;
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct flow_offload_tuple tuple = {};
+ enum ip_conntrack_info ctinfo;
+ struct flow_offload *flow;
+ struct nf_conn *ct;
+ unsigned int thoff;
+ int ip_proto;
+ u8 dir;
+
+ /* Previously seen or loopback */
+ ct = nf_ct_get(skb, &ctinfo);
+ if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
+ return false;
+
+ switch (family) {
+ case NFPROTO_IPV4:
+ if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple))
+ return false;
+ break;
+ case NFPROTO_IPV6:
+ if (!tcf_ct_flow_table_fill_tuple_ipv6(skb, &tuple))
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ tuplehash = flow_offload_lookup(nf_ft, &tuple);
+ if (!tuplehash)
+ return false;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+ ct = flow->ct;
+
+ ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
+ IP_CT_ESTABLISHED_REPLY;
+
+ thoff = ip_hdr(skb)->ihl * 4;
+ ip_proto = ip_hdr(skb)->protocol;
+ if (ip_proto == IPPROTO_TCP &&
+ !tcf_ct_flow_table_check_tcp(flow, skb, thoff))
+ return false;
+
+ nf_conntrack_get(&ct->ct_general);
+ nf_ct_set(skb, ct, ctinfo);
+
+ return true;
+}
+
static int tcf_ct_flow_tables_init(void)
{
return rhashtable_init(&zones_ht, &zones_params);
@@ -563,6 +712,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
struct nf_hook_state state;
int nh_ofs, err, retval;
struct tcf_ct_params *p;
+ bool skip_add = false;
struct nf_conn *ct;
u8 family;
@@ -612,6 +762,11 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
*/
cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force);
if (!cached) {
+ if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) {
+ skip_add = true;
+ goto do_nat;
+ }
+
/* Associate skb with specified zone. */
if (tmpl) {
ct = nf_ct_get(skb, &ctinfo);
@@ -629,6 +784,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
goto out_push;
}
+do_nat:
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
goto out_push;
@@ -646,10 +802,10 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
* even if the connection is already confirmed.
*/
nf_conntrack_confirm(skb);
+ } else if (!skip_add) {
+ tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
}
- tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
-
out_push:
skb_push_rcsum(skb, nh_ofs);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 7+ messages in thread