* [PATCH net-next v6 1/3] net/sched: act_ct: Create nf flow table per zone
2020-03-03 15:57 [PATCH net-next v6 0/3] act_ct: Software offload of conntrack_in Paul Blakey
@ 2020-03-03 15:57 ` Paul Blakey
2020-03-03 16:51 ` Marcelo Ricardo Leitner
2020-03-07 20:12 ` Eric Dumazet
2020-03-03 15:57 ` [PATCH net-next v6 2/3] net/sched: act_ct: Offload established connections to flow table Paul Blakey
2020-03-03 15:57 ` [PATCH net-next v6 3/3] net/sched: act_ct: Software offload of established flows Paul Blakey
2 siblings, 2 replies; 14+ messages in thread
From: Paul Blakey @ 2020-03-03 15:57 UTC (permalink / raw)
To: Paul Blakey, Saeed Mahameed, Oz Shlomo, Jakub Kicinski,
Vlad Buslov, David Miller, netdev@vger.kernel.org, Jiri Pirko,
Roi Dayan
Use the NF flow tables infrastructure for CT offload.
Create a nf flow table per zone.
Next patches will add FT entries to this table, and do
the software offload.
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
---
v4->v5:
Added reviewed by Jiri, thanks!
v3->v4:
Alloc GFP_ATOMIC
v2->v3:
Ditch re-locking to alloc, and use atomic allocation
v1->v2:
Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep)
Free ft on last tc act instance instead of last instance + last offloaded tuple,
this removes cleanup cb and netfilter patches, and is simpler
Removed accidental mlx5/core/en_tc.c change
Removed reviewed by Jiri - patch changed
include/net/tc_act/tc_ct.h | 2 +
net/sched/Kconfig | 2 +-
net/sched/act_ct.c | 134 ++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 136 insertions(+), 2 deletions(-)
diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
index a8b1564..cf3492e 100644
--- a/include/net/tc_act/tc_ct.h
+++ b/include/net/tc_act/tc_ct.h
@@ -25,6 +25,8 @@ struct tcf_ct_params {
u16 ct_action;
struct rcu_head rcu;
+
+ struct tcf_ct_flow_table *ct_ft;
};
struct tcf_ct {
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index edde0e5..bfbefb7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY
config NET_ACT_CT
tristate "connection tracking tc action"
- depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT
+ depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE
help
Say Y here to allow sending the packets to conntrack module.
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f685c0d..3321087 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -15,6 +15,7 @@
#include <linux/pkt_cls.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/rhashtable.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
@@ -24,6 +25,7 @@
#include <uapi/linux/tc_act/tc_ct.h>
#include <net/tc_act/tc_ct.h>
+#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
@@ -31,6 +33,108 @@
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <uapi/linux/netfilter/nf_nat.h>
+static struct workqueue_struct *act_ct_wq;
+static struct rhashtable zones_ht;
+static DEFINE_SPINLOCK(zones_lock);
+
+struct tcf_ct_flow_table {
+ struct rhash_head node; /* In zones tables */
+
+ struct rcu_work rwork;
+ struct nf_flowtable nf_ft;
+ u16 zone;
+ u32 ref;
+
+ bool dying;
+};
+
+static const struct rhashtable_params zones_params = {
+ .head_offset = offsetof(struct tcf_ct_flow_table, node),
+ .key_offset = offsetof(struct tcf_ct_flow_table, zone),
+ .key_len = sizeof_field(struct tcf_ct_flow_table, zone),
+ .automatic_shrinking = true,
+};
+
+static struct nf_flowtable_type flowtable_ct = {
+ .owner = THIS_MODULE,
+};
+
+static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
+{
+ struct tcf_ct_flow_table *ct_ft;
+ int err = -ENOMEM;
+
+ spin_lock_bh(&zones_lock);
+ ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params);
+ if (ct_ft)
+ goto take_ref;
+
+ ct_ft = kzalloc(sizeof(*ct_ft), GFP_ATOMIC);
+ if (!ct_ft)
+ goto err_alloc;
+
+ ct_ft->zone = params->zone;
+ err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params);
+ if (err)
+ goto err_insert;
+
+ ct_ft->nf_ft.type = &flowtable_ct;
+ err = nf_flow_table_init(&ct_ft->nf_ft);
+ if (err)
+ goto err_init;
+
+ __module_get(THIS_MODULE);
+take_ref:
+ params->ct_ft = ct_ft;
+ ct_ft->ref++;
+ spin_unlock_bh(&zones_lock);
+
+ return 0;
+
+err_init:
+ rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
+err_insert:
+ kfree(ct_ft);
+err_alloc:
+ spin_unlock_bh(&zones_lock);
+ return err;
+}
+
+static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
+{
+ struct tcf_ct_flow_table *ct_ft;
+
+ ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table,
+ rwork);
+ nf_flow_table_free(&ct_ft->nf_ft);
+ kfree(ct_ft);
+
+ module_put(THIS_MODULE);
+}
+
+static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
+{
+ struct tcf_ct_flow_table *ct_ft = params->ct_ft;
+
+ spin_lock_bh(&zones_lock);
+ if (--params->ct_ft->ref == 0) {
+ rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
+ INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
+ queue_rcu_work(act_ct_wq, &ct_ft->rwork);
+ }
+ spin_unlock_bh(&zones_lock);
+}
+
+static int tcf_ct_flow_tables_init(void)
+{
+ return rhashtable_init(&zones_ht, &zones_params);
+}
+
+static void tcf_ct_flow_tables_uninit(void)
+{
+ rhashtable_destroy(&zones_ht);
+}
+
static struct tc_action_ops act_ct_ops;
static unsigned int ct_net_id;
@@ -207,6 +311,8 @@ static void tcf_ct_params_free(struct rcu_head *head)
struct tcf_ct_params *params = container_of(head,
struct tcf_ct_params, rcu);
+ tcf_ct_flow_table_put(params);
+
if (params->tmpl)
nf_conntrack_put(¶ms->tmpl->ct_general);
kfree(params);
@@ -730,6 +836,10 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
if (err)
goto cleanup;
+ err = tcf_ct_flow_table_get(params);
+ if (err)
+ goto cleanup;
+
spin_lock_bh(&c->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
params = rcu_replace_pointer(c->params, params,
@@ -974,12 +1084,34 @@ static void __net_exit ct_exit_net(struct list_head *net_list)
static int __init ct_init_module(void)
{
- return tcf_register_action(&act_ct_ops, &ct_net_ops);
+ int err;
+
+ act_ct_wq = alloc_ordered_workqueue("act_ct_workqueue", 0);
+ if (!act_ct_wq)
+ return -ENOMEM;
+
+ err = tcf_ct_flow_tables_init();
+ if (err)
+ goto err_tbl_init;
+
+ err = tcf_register_action(&act_ct_ops, &ct_net_ops);
+ if (err)
+ goto err_register;
+
+ return 0;
+
+err_tbl_init:
+ destroy_workqueue(act_ct_wq);
+err_register:
+ tcf_ct_flow_tables_uninit();
+ return err;
}
static void __exit ct_cleanup_module(void)
{
tcf_unregister_action(&act_ct_ops, &ct_net_ops);
+ tcf_ct_flow_tables_uninit();
+ destroy_workqueue(act_ct_wq);
}
module_init(ct_init_module);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH net-next v6 1/3] net/sched: act_ct: Create nf flow table per zone
2020-03-03 15:57 ` [PATCH net-next v6 1/3] net/sched: act_ct: Create nf flow table per zone Paul Blakey
@ 2020-03-03 16:51 ` Marcelo Ricardo Leitner
2020-03-07 20:12 ` Eric Dumazet
1 sibling, 0 replies; 14+ messages in thread
From: Marcelo Ricardo Leitner @ 2020-03-03 16:51 UTC (permalink / raw)
To: Paul Blakey
Cc: Saeed Mahameed, Oz Shlomo, Jakub Kicinski, Vlad Buslov,
David Miller, netdev@vger.kernel.org, Jiri Pirko, Roi Dayan
On Tue, Mar 03, 2020 at 05:57:50PM +0200, Paul Blakey wrote:
> Use the NF flow tables infrastructure for CT offload.
>
> Create a nf flow table per zone.
>
> Next patches will add FT entries to this table, and do
> the software offload.
>
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> ---
> v4->v5:
> Added reviewed by Jiri, thanks!
> v3->v4:
> Alloc GFP_ATOMIC
> v2->v3:
> Ditch re-locking to alloc, and use atomic allocation
> v1->v2:
> Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep)
> Free ft on last tc act instance instead of last instance + last offloaded tuple,
> this removes cleanup cb and netfilter patches, and is simpler
> Removed accidental mlx5/core/en_tc.c change
> Removed reviewed by Jiri - patch changed
>
> include/net/tc_act/tc_ct.h | 2 +
> net/sched/Kconfig | 2 +-
> net/sched/act_ct.c | 134 ++++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 136 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
> index a8b1564..cf3492e 100644
> --- a/include/net/tc_act/tc_ct.h
> +++ b/include/net/tc_act/tc_ct.h
> @@ -25,6 +25,8 @@ struct tcf_ct_params {
> u16 ct_action;
>
> struct rcu_head rcu;
> +
> + struct tcf_ct_flow_table *ct_ft;
> };
>
> struct tcf_ct {
> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index edde0e5..bfbefb7 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY
>
> config NET_ACT_CT
> tristate "connection tracking tc action"
> - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT
> + depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE
> help
> Say Y here to allow sending the packets to conntrack module.
>
> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
> index f685c0d..3321087 100644
> --- a/net/sched/act_ct.c
> +++ b/net/sched/act_ct.c
> @@ -15,6 +15,7 @@
> #include <linux/pkt_cls.h>
> #include <linux/ip.h>
> #include <linux/ipv6.h>
> +#include <linux/rhashtable.h>
> #include <net/netlink.h>
> #include <net/pkt_sched.h>
> #include <net/pkt_cls.h>
> @@ -24,6 +25,7 @@
> #include <uapi/linux/tc_act/tc_ct.h>
> #include <net/tc_act/tc_ct.h>
>
> +#include <net/netfilter/nf_flow_table.h>
> #include <net/netfilter/nf_conntrack.h>
> #include <net/netfilter/nf_conntrack_core.h>
> #include <net/netfilter/nf_conntrack_zones.h>
> @@ -31,6 +33,108 @@
> #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
> #include <uapi/linux/netfilter/nf_nat.h>
>
> +static struct workqueue_struct *act_ct_wq;
> +static struct rhashtable zones_ht;
> +static DEFINE_SPINLOCK(zones_lock);
> +
> +struct tcf_ct_flow_table {
> + struct rhash_head node; /* In zones tables */
> +
> + struct rcu_work rwork;
> + struct nf_flowtable nf_ft;
> + u16 zone;
> + u32 ref;
> +
> + bool dying;
> +};
> +
> +static const struct rhashtable_params zones_params = {
> + .head_offset = offsetof(struct tcf_ct_flow_table, node),
> + .key_offset = offsetof(struct tcf_ct_flow_table, zone),
> + .key_len = sizeof_field(struct tcf_ct_flow_table, zone),
> + .automatic_shrinking = true,
> +};
> +
> +static struct nf_flowtable_type flowtable_ct = {
> + .owner = THIS_MODULE,
> +};
> +
> +static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
> +{
> + struct tcf_ct_flow_table *ct_ft;
> + int err = -ENOMEM;
> +
> + spin_lock_bh(&zones_lock);
> + ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params);
> + if (ct_ft)
> + goto take_ref;
> +
> + ct_ft = kzalloc(sizeof(*ct_ft), GFP_ATOMIC);
> + if (!ct_ft)
> + goto err_alloc;
> +
> + ct_ft->zone = params->zone;
> + err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params);
> + if (err)
> + goto err_insert;
> +
> + ct_ft->nf_ft.type = &flowtable_ct;
> + err = nf_flow_table_init(&ct_ft->nf_ft);
> + if (err)
> + goto err_init;
> +
> + __module_get(THIS_MODULE);
> +take_ref:
> + params->ct_ft = ct_ft;
> + ct_ft->ref++;
> + spin_unlock_bh(&zones_lock);
> +
> + return 0;
> +
> +err_init:
> + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
> +err_insert:
> + kfree(ct_ft);
> +err_alloc:
> + spin_unlock_bh(&zones_lock);
> + return err;
> +}
> +
> +static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
> +{
> + struct tcf_ct_flow_table *ct_ft;
> +
> + ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table,
> + rwork);
> + nf_flow_table_free(&ct_ft->nf_ft);
> + kfree(ct_ft);
> +
> + module_put(THIS_MODULE);
> +}
> +
> +static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
> +{
> + struct tcf_ct_flow_table *ct_ft = params->ct_ft;
> +
> + spin_lock_bh(&zones_lock);
> + if (--params->ct_ft->ref == 0) {
> + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
> + INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
> + queue_rcu_work(act_ct_wq, &ct_ft->rwork);
> + }
> + spin_unlock_bh(&zones_lock);
> +}
> +
> +static int tcf_ct_flow_tables_init(void)
> +{
> + return rhashtable_init(&zones_ht, &zones_params);
> +}
> +
> +static void tcf_ct_flow_tables_uninit(void)
> +{
> + rhashtable_destroy(&zones_ht);
> +}
> +
> static struct tc_action_ops act_ct_ops;
> static unsigned int ct_net_id;
>
> @@ -207,6 +311,8 @@ static void tcf_ct_params_free(struct rcu_head *head)
> struct tcf_ct_params *params = container_of(head,
> struct tcf_ct_params, rcu);
>
> + tcf_ct_flow_table_put(params);
> +
> if (params->tmpl)
> nf_conntrack_put(¶ms->tmpl->ct_general);
> kfree(params);
> @@ -730,6 +836,10 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
> if (err)
> goto cleanup;
>
> + err = tcf_ct_flow_table_get(params);
> + if (err)
> + goto cleanup;
> +
> spin_lock_bh(&c->tcf_lock);
> goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
> params = rcu_replace_pointer(c->params, params,
> @@ -974,12 +1084,34 @@ static void __net_exit ct_exit_net(struct list_head *net_list)
>
> static int __init ct_init_module(void)
> {
> - return tcf_register_action(&act_ct_ops, &ct_net_ops);
> + int err;
> +
> + act_ct_wq = alloc_ordered_workqueue("act_ct_workqueue", 0);
> + if (!act_ct_wq)
> + return -ENOMEM;
> +
> + err = tcf_ct_flow_tables_init();
> + if (err)
> + goto err_tbl_init;
> +
> + err = tcf_register_action(&act_ct_ops, &ct_net_ops);
> + if (err)
> + goto err_register;
> +
> + return 0;
> +
> +err_tbl_init:
> + destroy_workqueue(act_ct_wq);
> +err_register:
> + tcf_ct_flow_tables_uninit();
> + return err;
> }
>
> static void __exit ct_cleanup_module(void)
> {
> tcf_unregister_action(&act_ct_ops, &ct_net_ops);
> + tcf_ct_flow_tables_uninit();
> + destroy_workqueue(act_ct_wq);
> }
>
> module_init(ct_init_module);
> --
> 1.8.3.1
>
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH net-next v6 1/3] net/sched: act_ct: Create nf flow table per zone
2020-03-03 15:57 ` [PATCH net-next v6 1/3] net/sched: act_ct: Create nf flow table per zone Paul Blakey
2020-03-03 16:51 ` Marcelo Ricardo Leitner
@ 2020-03-07 20:12 ` Eric Dumazet
2020-03-07 20:53 ` Eric Dumazet
1 sibling, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2020-03-07 20:12 UTC (permalink / raw)
To: Paul Blakey, Saeed Mahameed, Oz Shlomo, Jakub Kicinski,
Vlad Buslov, David Miller, netdev@vger.kernel.org, Jiri Pirko,
Roi Dayan
On 3/3/20 7:57 AM, Paul Blakey wrote:
> Use the NF flow tables infrastructure for CT offload.
>
> Create a nf flow table per zone.
>
> Next patches will add FT entries to this table, and do
> the software offload.
>
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Reviewed-by: Jiri Pirko <jiri@mellanox.com>
> ---
> v4->v5:
> Added reviewed by Jiri, thanks!
> v3->v4:
> Alloc GFP_ATOMIC
> v2->v3:
> Ditch re-locking to alloc, and use atomic allocation
> v1->v2:
> Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep)
> Free ft on last tc act instance instead of last instance + last offloaded tuple,
> this removes cleanup cb and netfilter patches, and is simpler
> Removed accidental mlx5/core/en_tc.c change
> Removed reviewed by Jiri - patch changed
>
> include/net/tc_act/tc_ct.h | 2 +
> net/sched/Kconfig | 2 +-
> net/sched/act_ct.c | 134 ++++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 136 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
> index a8b1564..cf3492e 100644
> --- a/include/net/tc_act/tc_ct.h
> +++ b/include/net/tc_act/tc_ct.h
> @@ -25,6 +25,8 @@ struct tcf_ct_params {
> u16 ct_action;
>
> struct rcu_head rcu;
> +
> + struct tcf_ct_flow_table *ct_ft;
> };
>
> struct tcf_ct {
> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index edde0e5..bfbefb7 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -972,7 +972,7 @@ config NET_ACT_TUNNEL_KEY
>
> config NET_ACT_CT
> tristate "connection tracking tc action"
> - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT
> + depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE
> help
> Say Y here to allow sending the packets to conntrack module.
>
> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
> index f685c0d..3321087 100644
> --- a/net/sched/act_ct.c
> +++ b/net/sched/act_ct.c
> @@ -15,6 +15,7 @@
> #include <linux/pkt_cls.h>
> #include <linux/ip.h>
> #include <linux/ipv6.h>
> +#include <linux/rhashtable.h>
> #include <net/netlink.h>
> #include <net/pkt_sched.h>
> #include <net/pkt_cls.h>
> @@ -24,6 +25,7 @@
> #include <uapi/linux/tc_act/tc_ct.h>
> #include <net/tc_act/tc_ct.h>
>
> +#include <net/netfilter/nf_flow_table.h>
> #include <net/netfilter/nf_conntrack.h>
> #include <net/netfilter/nf_conntrack_core.h>
> #include <net/netfilter/nf_conntrack_zones.h>
> @@ -31,6 +33,108 @@
> #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
> #include <uapi/linux/netfilter/nf_nat.h>
>
> +static struct workqueue_struct *act_ct_wq;
> +static struct rhashtable zones_ht;
> +static DEFINE_SPINLOCK(zones_lock);
> +
> +struct tcf_ct_flow_table {
> + struct rhash_head node; /* In zones tables */
> +
> + struct rcu_work rwork;
> + struct nf_flowtable nf_ft;
> + u16 zone;
> + u32 ref;
> +
> + bool dying;
> +};
> +
> +static const struct rhashtable_params zones_params = {
> + .head_offset = offsetof(struct tcf_ct_flow_table, node),
> + .key_offset = offsetof(struct tcf_ct_flow_table, zone),
> + .key_len = sizeof_field(struct tcf_ct_flow_table, zone),
> + .automatic_shrinking = true,
> +};
> +
> +static struct nf_flowtable_type flowtable_ct = {
> + .owner = THIS_MODULE,
> +};
> +
> +static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
> +{
> + struct tcf_ct_flow_table *ct_ft;
> + int err = -ENOMEM;
> +
> + spin_lock_bh(&zones_lock);
> + ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params);
> + if (ct_ft)
> + goto take_ref;
> +
> + ct_ft = kzalloc(sizeof(*ct_ft), GFP_ATOMIC);
> + if (!ct_ft)
> + goto err_alloc;
> +
> + ct_ft->zone = params->zone;
> + err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params);
> + if (err)
> + goto err_insert;
> +
> + ct_ft->nf_ft.type = &flowtable_ct;
> + err = nf_flow_table_init(&ct_ft->nf_ft);
This call is going to allocate a rhashtable (GFP_KERNEL allocations that might sleep)
Since you still hold zones_lock spinlock, a splat should occur.
"BUG: sleeping function called from invalid context in ..."
DEBUG_ATOMIC_SLEEP=y is your friend.
And it is always a good thing to make sure a patch does not trigger a lockdep splat
CONFIG_PROVE_LOCKING=y
> + if (err)
> + goto err_init;
> +
> + __module_get(THIS_MODULE);
> +take_ref:
> + params->ct_ft = ct_ft;
> + ct_ft->ref++;
> + spin_unlock_bh(&zones_lock);
> +
> + return 0;
> +
> +err_init:
> + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
> +err_insert:
> + kfree(ct_ft);
> +err_alloc:
> + spin_unlock_bh(&zones_lock);
> + return err;
> +}
> +
> +static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
> +{
> + struct tcf_ct_flow_table *ct_ft;
> +
> + ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table,
> + rwork);
> + nf_flow_table_free(&ct_ft->nf_ft);
> + kfree(ct_ft);
> +
> + module_put(THIS_MODULE);
> +}
> +
> +static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
> +{
> + struct tcf_ct_flow_table *ct_ft = params->ct_ft;
> +
> + spin_lock_bh(&zones_lock);
> + if (--params->ct_ft->ref == 0) {
> + rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
> + INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
> + queue_rcu_work(act_ct_wq, &ct_ft->rwork);
> + }
> + spin_unlock_bh(&zones_lock);
> +}
> +
> +static int tcf_ct_flow_tables_init(void)
> +{
> + return rhashtable_init(&zones_ht, &zones_params);
> +}
> +
> +static void tcf_ct_flow_tables_uninit(void)
> +{
> + rhashtable_destroy(&zones_ht);
> +}
> +
> static struct tc_action_ops act_ct_ops;
> static unsigned int ct_net_id;
>
> @@ -207,6 +311,8 @@ static void tcf_ct_params_free(struct rcu_head *head)
> struct tcf_ct_params *params = container_of(head,
> struct tcf_ct_params, rcu);
>
> + tcf_ct_flow_table_put(params);
> +
> if (params->tmpl)
> nf_conntrack_put(¶ms->tmpl->ct_general);
> kfree(params);
> @@ -730,6 +836,10 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
> if (err)
> goto cleanup;
>
> + err = tcf_ct_flow_table_get(params);
> + if (err)
> + goto cleanup;
> +
> spin_lock_bh(&c->tcf_lock);
> goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
> params = rcu_replace_pointer(c->params, params,
> @@ -974,12 +1084,34 @@ static void __net_exit ct_exit_net(struct list_head *net_list)
>
> static int __init ct_init_module(void)
> {
> - return tcf_register_action(&act_ct_ops, &ct_net_ops);
> + int err;
> +
> + act_ct_wq = alloc_ordered_workqueue("act_ct_workqueue", 0);
> + if (!act_ct_wq)
> + return -ENOMEM;
> +
> + err = tcf_ct_flow_tables_init();
> + if (err)
> + goto err_tbl_init;
> +
> + err = tcf_register_action(&act_ct_ops, &ct_net_ops);
> + if (err)
> + goto err_register;
> +
> + return 0;
> +
> +err_tbl_init:
> + destroy_workqueue(act_ct_wq);
> +err_register:
> + tcf_ct_flow_tables_uninit();
> + return err;
> }
>
> static void __exit ct_cleanup_module(void)
> {
> tcf_unregister_action(&act_ct_ops, &ct_net_ops);
> + tcf_ct_flow_tables_uninit();
> + destroy_workqueue(act_ct_wq);
> }
>
> module_init(ct_init_module);
>
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH net-next v6 1/3] net/sched: act_ct: Create nf flow table per zone
2020-03-07 20:12 ` Eric Dumazet
@ 2020-03-07 20:53 ` Eric Dumazet
2020-03-08 8:11 ` Paul Blakey
0 siblings, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2020-03-07 20:53 UTC (permalink / raw)
To: Paul Blakey, Saeed Mahameed, Oz Shlomo, Jakub Kicinski,
Vlad Buslov, David Miller, netdev@vger.kernel.org, Jiri Pirko,
Roi Dayan
On 3/7/20 12:12 PM, Eric Dumazet wrote:
>
>
> On 3/3/20 7:57 AM, Paul Blakey wrote:
>> Use the NF flow tables infrastructure for CT offload.
>>
>> Create a nf flow table per zone.
>>
>> Next patches will add FT entries to this table, and do
>> the software offload.
>>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> Reviewed-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>> v4->v5:
>> Added reviewed by Jiri, thanks!
>> v3->v4:
>> Alloc GFP_ATOMIC
>> v2->v3:
>> Ditch re-locking to alloc, and use atomic allocation
>> v1->v2:
>> Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep)
>> Free ft on last tc act instance instead of last instance + last offloaded tuple,
>> this removes cleanup cb and netfilter patches, and is simpler
>> Removed accidental mlx5/core/en_tc.c change
>> Removed reviewed by Jiri - patch changed
>>
>> + err = nf_flow_table_init(&ct_ft->nf_ft);
>
> This call is going to allocate a rhashtable (GFP_KERNEL allocations that might sleep)
>
> Since you still hold zones_lock spinlock, a splat should occur.
>
> "BUG: sleeping function called from invalid context in ..."
>
> DEBUG_ATOMIC_SLEEP=y is your friend.
>
> And it is always a good thing to make sure a patch does not trigger a lockdep splat
>
> CONFIG_PROVE_LOCKING=y
Also abusing a spinlock and GFP_ATOMIC allocations in control path is highly discouraged.
I can not test the following fix, any objections before I submit this officially ?
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 23eba61f0f819212a3522c3c63b938d0b8d997e2..3d9e678d7d5336f1746035745b091bea0dcb5fdd 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -35,15 +35,15 @@
static struct workqueue_struct *act_ct_wq;
static struct rhashtable zones_ht;
-static DEFINE_SPINLOCK(zones_lock);
+static DEFINE_MUTEX(zones_mutex);
struct tcf_ct_flow_table {
struct rhash_head node; /* In zones tables */
struct rcu_work rwork;
struct nf_flowtable nf_ft;
+ refcount_t ref;
u16 zone;
- u32 ref;
bool dying;
};
@@ -64,14 +64,15 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
struct tcf_ct_flow_table *ct_ft;
int err = -ENOMEM;
- spin_lock_bh(&zones_lock);
+ mutex_lock(&zones_mutex);
ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params);
- if (ct_ft)
- goto take_ref;
+ if (ct_ft && refcount_inc_not_zero(&ct_ft->ref))
+ goto out_unlock;
- ct_ft = kzalloc(sizeof(*ct_ft), GFP_ATOMIC);
+ ct_ft = kzalloc(sizeof(*ct_ft), GFP_KERNEL);
if (!ct_ft)
goto err_alloc;
+ refcount_set(&ct_ft->ref, 1);
ct_ft->zone = params->zone;
err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params);
@@ -84,10 +85,9 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
goto err_init;
__module_get(THIS_MODULE);
-take_ref:
+out_unlock:
params->ct_ft = ct_ft;
- ct_ft->ref++;
- spin_unlock_bh(&zones_lock);
+ mutex_unlock(&zones_mutex);
return 0;
@@ -96,7 +96,7 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
err_insert:
kfree(ct_ft);
err_alloc:
- spin_unlock_bh(&zones_lock);
+ mutex_unlock(&zones_mutex);
return err;
}
@@ -116,13 +116,11 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
{
struct tcf_ct_flow_table *ct_ft = params->ct_ft;
- spin_lock_bh(&zones_lock);
- if (--params->ct_ft->ref == 0) {
+ if (refcount_dec_and_test(¶ms->ct_ft->ref)) {
rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
queue_rcu_work(act_ct_wq, &ct_ft->rwork);
}
- spin_unlock_bh(&zones_lock);
}
static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH net-next v6 1/3] net/sched: act_ct: Create nf flow table per zone
2020-03-07 20:53 ` Eric Dumazet
@ 2020-03-08 8:11 ` Paul Blakey
2020-03-08 8:15 ` Paul Blakey
0 siblings, 1 reply; 14+ messages in thread
From: Paul Blakey @ 2020-03-08 8:11 UTC (permalink / raw)
To: Eric Dumazet, Saeed Mahameed, Oz Shlomo, Jakub Kicinski,
Vlad Buslov, David Miller, netdev@vger.kernel.org, Jiri Pirko,
Roi Dayan
iirc I did the spin lock bh because we can be called from queue work rcu handler , so I wanted to disable soft irq.
I got a possible deadlock splat for that.
On 3/7/2020 10:53 PM, Eric Dumazet wrote:
>
> On 3/7/20 12:12 PM, Eric Dumazet wrote:
>>
>> On 3/3/20 7:57 AM, Paul Blakey wrote:
>>> Use the NF flow tables infrastructure for CT offload.
>>>
>>> Create a nf flow table per zone.
>>>
>>> Next patches will add FT entries to this table, and do
>>> the software offload.
>>>
>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>> Reviewed-by: Jiri Pirko <jiri@mellanox.com>
>>> ---
>>> v4->v5:
>>> Added reviewed by Jiri, thanks!
>>> v3->v4:
>>> Alloc GFP_ATOMIC
>>> v2->v3:
>>> Ditch re-locking to alloc, and use atomic allocation
>>> v1->v2:
>>> Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep)
>>> Free ft on last tc act instance instead of last instance + last offloaded tuple,
>>> this removes cleanup cb and netfilter patches, and is simpler
>>> Removed accidental mlx5/core/en_tc.c change
>>> Removed reviewed by Jiri - patch changed
>>>
>>> + err = nf_flow_table_init(&ct_ft->nf_ft);
>> This call is going to allocate a rhashtable (GFP_KERNEL allocations that might sleep)
>>
>> Since you still hold zones_lock spinlock, a splat should occur.
>>
>> "BUG: sleeping function called from invalid context in ..."
>>
>> DEBUG_ATOMIC_SLEEP=y is your friend.
>>
>> And it is always a good thing to make sure a patch does not trigger a lockdep splat
>>
>> CONFIG_PROVE_LOCKING=y
> Also abusing a spinlock and GFP_ATOMIC allocations in control path is highly discouraged.
>
> I can not test the following fix, any objections before I submit this officially ?
>
> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
> index 23eba61f0f819212a3522c3c63b938d0b8d997e2..3d9e678d7d5336f1746035745b091bea0dcb5fdd 100644
> --- a/net/sched/act_ct.c
> +++ b/net/sched/act_ct.c
> @@ -35,15 +35,15 @@
>
> static struct workqueue_struct *act_ct_wq;
> static struct rhashtable zones_ht;
> -static DEFINE_SPINLOCK(zones_lock);
> +static DEFINE_MUTEX(zones_mutex);
>
> struct tcf_ct_flow_table {
> struct rhash_head node; /* In zones tables */
>
> struct rcu_work rwork;
> struct nf_flowtable nf_ft;
> + refcount_t ref;
> u16 zone;
> - u32 ref;
>
> bool dying;
> };
> @@ -64,14 +64,15 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
> struct tcf_ct_flow_table *ct_ft;
> int err = -ENOMEM;
>
> - spin_lock_bh(&zones_lock);
> + mutex_lock(&zones_mutex);
> ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params);
> - if (ct_ft)
> - goto take_ref;
> + if (ct_ft && refcount_inc_not_zero(&ct_ft->ref))
> + goto out_unlock;
>
> - ct_ft = kzalloc(sizeof(*ct_ft), GFP_ATOMIC);
> + ct_ft = kzalloc(sizeof(*ct_ft), GFP_KERNEL);
> if (!ct_ft)
> goto err_alloc;
> + refcount_set(&ct_ft->ref, 1);
>
> ct_ft->zone = params->zone;
> err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params);
> @@ -84,10 +85,9 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
> goto err_init;
>
> __module_get(THIS_MODULE);
> -take_ref:
> +out_unlock:
> params->ct_ft = ct_ft;
> - ct_ft->ref++;
> - spin_unlock_bh(&zones_lock);
> + mutex_unlock(&zones_mutex);
>
> return 0;
>
> @@ -96,7 +96,7 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
> err_insert:
> kfree(ct_ft);
> err_alloc:
> - spin_unlock_bh(&zones_lock);
> + mutex_unlock(&zones_mutex);
> return err;
> }
>
> @@ -116,13 +116,11 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
> {
> struct tcf_ct_flow_table *ct_ft = params->ct_ft;
>
> - spin_lock_bh(&zones_lock);
> - if (--params->ct_ft->ref == 0) {
> + if (refcount_dec_and_test(¶ms->ct_ft->ref)) {
> rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
> INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
> queue_rcu_work(act_ct_wq, &ct_ft->rwork);
> }
> - spin_unlock_bh(&zones_lock);
> }
>
> static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
>
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH net-next v6 1/3] net/sched: act_ct: Create nf flow table per zone
2020-03-08 8:11 ` Paul Blakey
@ 2020-03-08 8:15 ` Paul Blakey
2020-03-08 20:42 ` Eric Dumazet
0 siblings, 1 reply; 14+ messages in thread
From: Paul Blakey @ 2020-03-08 8:15 UTC (permalink / raw)
To: Eric Dumazet, Saeed Mahameed, Oz Shlomo, Jakub Kicinski,
Vlad Buslov, David Miller, netdev@vger.kernel.org, Jiri Pirko,
Roi Dayan
On 3/8/2020 10:11 AM, Paul Blakey wrote:
> iirc I did the spin lock bh because we can be called from queue work rcu handler , so I wanted to disable soft irq.
>
> I got a possible deadlock splat for that.
Here I meant this call rcu:
static void tcf_ct_cleanup(struct tc_action *a)
{
>-------struct tcf_ct_params *params;
>-------struct tcf_ct *c = to_ct(a);
>-------params = rcu_dereference_protected(c->params, 1);
>-------if (params)
>------->-------call_rcu(¶ms->rcu, tcf_ct_params_free);
}
static void tcf_ct_params_free(struct rcu_head *head)
{
>-------struct tcf_ct_params *params = container_of(head,
>------->------->------->------->------->------- struct tcf_ct_params, rcu);
>-------tcf_ct_flow_table_put(params);
...
>
>
> On 3/7/2020 10:53 PM, Eric Dumazet wrote:
>
>> On 3/7/20 12:12 PM, Eric Dumazet wrote:
>>> On 3/3/20 7:57 AM, Paul Blakey wrote:
>>>> Use the NF flow tables infrastructure for CT offload.
>>>>
>>>> Create a nf flow table per zone.
>>>>
>>>> Next patches will add FT entries to this table, and do
>>>> the software offload.
>>>>
>>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>>> Reviewed-by: Jiri Pirko <jiri@mellanox.com>
>>>> ---
>>>> v4->v5:
>>>> Added reviewed by Jiri, thanks!
>>>> v3->v4:
>>>> Alloc GFP_ATOMIC
>>>> v2->v3:
>>>> Ditch re-locking to alloc, and use atomic allocation
>>>> v1->v2:
>>>> Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep)
>>>> Free ft on last tc act instance instead of last instance + last offloaded tuple,
>>>> this removes cleanup cb and netfilter patches, and is simpler
>>>> Removed accidental mlx5/core/en_tc.c change
>>>> Removed reviewed by Jiri - patch changed
>>>>
>>>> + err = nf_flow_table_init(&ct_ft->nf_ft);
>>> This call is going to allocate a rhashtable (GFP_KERNEL allocations that might sleep)
>>>
>>> Since you still hold zones_lock spinlock, a splat should occur.
>>>
>>> "BUG: sleeping function called from invalid context in ..."
>>>
>>> DEBUG_ATOMIC_SLEEP=y is your friend.
>>>
>>> And it is always a good thing to make sure a patch does not trigger a lockdep splat
>>>
>>> CONFIG_PROVE_LOCKING=y
>> Also abusing a spinlock and GFP_ATOMIC allocations in control path is highly discouraged.
>>
>> I can not test the following fix, any objections before I submit this officially ?
>>
>> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
>> index 23eba61f0f819212a3522c3c63b938d0b8d997e2..3d9e678d7d5336f1746035745b091bea0dcb5fdd 100644
>> --- a/net/sched/act_ct.c
>> +++ b/net/sched/act_ct.c
>> @@ -35,15 +35,15 @@
>>
>> static struct workqueue_struct *act_ct_wq;
>> static struct rhashtable zones_ht;
>> -static DEFINE_SPINLOCK(zones_lock);
>> +static DEFINE_MUTEX(zones_mutex);
>>
>> struct tcf_ct_flow_table {
>> struct rhash_head node; /* In zones tables */
>>
>> struct rcu_work rwork;
>> struct nf_flowtable nf_ft;
>> + refcount_t ref;
>> u16 zone;
>> - u32 ref;
>>
>> bool dying;
>> };
>> @@ -64,14 +64,15 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
>> struct tcf_ct_flow_table *ct_ft;
>> int err = -ENOMEM;
>>
>> - spin_lock_bh(&zones_lock);
>> + mutex_lock(&zones_mutex);
>> ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params);
>> - if (ct_ft)
>> - goto take_ref;
>> + if (ct_ft && refcount_inc_not_zero(&ct_ft->ref))
>> + goto out_unlock;
>>
>> - ct_ft = kzalloc(sizeof(*ct_ft), GFP_ATOMIC);
>> + ct_ft = kzalloc(sizeof(*ct_ft), GFP_KERNEL);
>> if (!ct_ft)
>> goto err_alloc;
>> + refcount_set(&ct_ft->ref, 1);
>>
>> ct_ft->zone = params->zone;
>> err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params);
>> @@ -84,10 +85,9 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
>> goto err_init;
>>
>> __module_get(THIS_MODULE);
>> -take_ref:
>> +out_unlock:
>> params->ct_ft = ct_ft;
>> - ct_ft->ref++;
>> - spin_unlock_bh(&zones_lock);
>> + mutex_unlock(&zones_mutex);
>>
>> return 0;
>>
>> @@ -96,7 +96,7 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
>> err_insert:
>> kfree(ct_ft);
>> err_alloc:
>> - spin_unlock_bh(&zones_lock);
>> + mutex_unlock(&zones_mutex);
>> return err;
>> }
>>
>> @@ -116,13 +116,11 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
>> {
>> struct tcf_ct_flow_table *ct_ft = params->ct_ft;
>>
>> - spin_lock_bh(&zones_lock);
>> - if (--params->ct_ft->ref == 0) {
>> + if (refcount_dec_and_test(¶ms->ct_ft->ref)) {
>> rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
>> INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
>> queue_rcu_work(act_ct_wq, &ct_ft->rwork);
>> }
>> - spin_unlock_bh(&zones_lock);
>> }
>>
>> static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
>>
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH net-next v6 1/3] net/sched: act_ct: Create nf flow table per zone
2020-03-08 8:15 ` Paul Blakey
@ 2020-03-08 20:42 ` Eric Dumazet
2020-03-09 8:02 ` Paul Blakey
0 siblings, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2020-03-08 20:42 UTC (permalink / raw)
To: Paul Blakey, Eric Dumazet, Saeed Mahameed, Oz Shlomo,
Jakub Kicinski, Vlad Buslov, David Miller, netdev@vger.kernel.org,
Jiri Pirko, Roi Dayan
On 3/8/20 12:15 AM, Paul Blakey wrote:
> On 3/8/2020 10:11 AM, Paul Blakey wrote:
>
>> iirc I did the spin lock bh because we can be called from queue work rcu handler , so I wanted to disable soft irq.
>>
>> I got a possible deadlock splat for that.
>
> Here I meant this call rcu:
>
> static void tcf_ct_cleanup(struct tc_action *a)
> {
>> -------struct tcf_ct_params *params;
>> -------struct tcf_ct *c = to_ct(a);
>
>> -------params = rcu_dereference_protected(c->params, 1);
>> -------if (params)
>> ------->-------call_rcu(¶ms->rcu, tcf_ct_params_free);
> }
>
Yes, understood, but to solve this problem we had many other choices,
and still keeping GFP_KERNEL allocations and a mutex for control path.
Have you read my patch ?
By not even trying to get a spinlock in tcf_ct_flow_table_put(),
and instead use a refcount for ->ref, we avoid having this issue in the first place.
static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
{
struct tcf_ct_flow_table *ct_ft = params->ct_ft;
if (refcount_dec_and_test(¶ms->ct_ft->ref)) {
rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
queue_rcu_work(act_ct_wq, &ct_ft->rwork);
}
}
> static void tcf_ct_params_free(struct rcu_head *head)
> {
>> -------struct tcf_ct_params *params = container_of(head,
>> ------->------->------->------->------->------- struct tcf_ct_params, rcu);
>
>> -------tcf_ct_flow_table_put(params);
>
> ...
>
>
>>
>>
>> On 3/7/2020 10:53 PM, Eric Dumazet wrote:
>>
>>> On 3/7/20 12:12 PM, Eric Dumazet wrote:
>>>> On 3/3/20 7:57 AM, Paul Blakey wrote:
>>>>> Use the NF flow tables infrastructure for CT offload.
>>>>>
>>>>> Create a nf flow table per zone.
>>>>>
>>>>> Next patches will add FT entries to this table, and do
>>>>> the software offload.
>>>>>
>>>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>>>> Reviewed-by: Jiri Pirko <jiri@mellanox.com>
>>>>> ---
>>>>> v4->v5:
>>>>> Added reviewed by Jiri, thanks!
>>>>> v3->v4:
>>>>> Alloc GFP_ATOMIC
>>>>> v2->v3:
>>>>> Ditch re-locking to alloc, and use atomic allocation
>>>>> v1->v2:
>>>>> Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep)
>>>>> Free ft on last tc act instance instead of last instance + last offloaded tuple,
>>>>> this removes cleanup cb and netfilter patches, and is simpler
>>>>> Removed accidental mlx5/core/en_tc.c change
>>>>> Removed reviewed by Jiri - patch changed
>>>>>
>>>>> + err = nf_flow_table_init(&ct_ft->nf_ft);
>>>> This call is going to allocate a rhashtable (GFP_KERNEL allocations that might sleep)
>>>>
>>>> Since you still hold zones_lock spinlock, a splat should occur.
>>>>
>>>> "BUG: sleeping function called from invalid context in ..."
>>>>
>>>> DEBUG_ATOMIC_SLEEP=y is your friend.
>>>>
>>>> And it is always a good thing to make sure a patch does not trigger a lockdep splat
>>>>
>>>> CONFIG_PROVE_LOCKING=y
>>> Also abusing a spinlock and GFP_ATOMIC allocations in control path is highly discouraged.
>>>
>>> I can not test the following fix, any objections before I submit this officially ?
>>>
>>> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
>>> index 23eba61f0f819212a3522c3c63b938d0b8d997e2..3d9e678d7d5336f1746035745b091bea0dcb5fdd 100644
>>> --- a/net/sched/act_ct.c
>>> +++ b/net/sched/act_ct.c
>>> @@ -35,15 +35,15 @@
>>>
>>> static struct workqueue_struct *act_ct_wq;
>>> static struct rhashtable zones_ht;
>>> -static DEFINE_SPINLOCK(zones_lock);
>>> +static DEFINE_MUTEX(zones_mutex);
>>>
>>> struct tcf_ct_flow_table {
>>> struct rhash_head node; /* In zones tables */
>>>
>>> struct rcu_work rwork;
>>> struct nf_flowtable nf_ft;
>>> + refcount_t ref;
>>> u16 zone;
>>> - u32 ref;
>>>
>>> bool dying;
>>> };
>>> @@ -64,14 +64,15 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
>>> struct tcf_ct_flow_table *ct_ft;
>>> int err = -ENOMEM;
>>>
>>> - spin_lock_bh(&zones_lock);
>>> + mutex_lock(&zones_mutex);
>>> ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params);
>>> - if (ct_ft)
>>> - goto take_ref;
>>> + if (ct_ft && refcount_inc_not_zero(&ct_ft->ref))
>>> + goto out_unlock;
>>>
>>> - ct_ft = kzalloc(sizeof(*ct_ft), GFP_ATOMIC);
>>> + ct_ft = kzalloc(sizeof(*ct_ft), GFP_KERNEL);
>>> if (!ct_ft)
>>> goto err_alloc;
>>> + refcount_set(&ct_ft->ref, 1);
>>>
>>> ct_ft->zone = params->zone;
>>> err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params);
>>> @@ -84,10 +85,9 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
>>> goto err_init;
>>>
>>> __module_get(THIS_MODULE);
>>> -take_ref:
>>> +out_unlock:
>>> params->ct_ft = ct_ft;
>>> - ct_ft->ref++;
>>> - spin_unlock_bh(&zones_lock);
>>> + mutex_unlock(&zones_mutex);
>>>
>>> return 0;
>>>
>>> @@ -96,7 +96,7 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
>>> err_insert:
>>> kfree(ct_ft);
>>> err_alloc:
>>> - spin_unlock_bh(&zones_lock);
>>> + mutex_unlock(&zones_mutex);
>>> return err;
>>> }
>>>
>>> @@ -116,13 +116,11 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
>>> {
>>> struct tcf_ct_flow_table *ct_ft = params->ct_ft;
>>>
>>> - spin_lock_bh(&zones_lock);
>>> - if (--params->ct_ft->ref == 0) {
>>> + if (refcount_dec_and_test(¶ms->ct_ft->ref)) {
>>> rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
>>> INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
>>> queue_rcu_work(act_ct_wq, &ct_ft->rwork);
>>> }
>>> - spin_unlock_bh(&zones_lock);
>>> }
>>>
>>> static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
>>>
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH net-next v6 1/3] net/sched: act_ct: Create nf flow table per zone
2020-03-08 20:42 ` Eric Dumazet
@ 2020-03-09 8:02 ` Paul Blakey
0 siblings, 0 replies; 14+ messages in thread
From: Paul Blakey @ 2020-03-09 8:02 UTC (permalink / raw)
To: Eric Dumazet, Saeed Mahameed, Oz Shlomo, Jakub Kicinski,
Vlad Buslov, David Miller, netdev@vger.kernel.org, Jiri Pirko,
Roi Dayan
On 3/8/2020 10:42 PM, Eric Dumazet wrote:
>
> On 3/8/20 12:15 AM, Paul Blakey wrote:
>> On 3/8/2020 10:11 AM, Paul Blakey wrote:
>>
>>> iirc I did the spin lock bh because we can be called from queue work rcu handler , so I wanted to disable soft irq.
>>>
>>> I got a possible deadlock splat for that.
>> Here I meant this call rcu:
>>
>> static void tcf_ct_cleanup(struct tc_action *a)
>> {
>>> -------struct tcf_ct_params *params;
>>> -------struct tcf_ct *c = to_ct(a);
>>> -------params = rcu_dereference_protected(c->params, 1);
>>> -------if (params)
>>> ------->-------call_rcu(¶ms->rcu, tcf_ct_params_free);
>> }
>>
> Yes, understood, but to solve this problem we had many other choices,
> and still keeping GFP_KERNEL allocations and a mutex for control path.
>
> Have you read my patch ?
>
> By not even trying to get a spinlock in tcf_ct_flow_table_put(),
> and instead use a refcount for ->ref, we avoid having this issue in the first place.
>
> static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
> {
> struct tcf_ct_flow_table *ct_ft = params->ct_ft;
>
> if (refcount_dec_and_test(¶ms->ct_ft->ref)) {
> rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
> INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
> queue_rcu_work(act_ct_wq, &ct_ft->rwork);
> }
> }
Sorry missed that, thanks for the fix.
>> static void tcf_ct_params_free(struct rcu_head *head)
>> {
>>> -------struct tcf_ct_params *params = container_of(head,
>>> ------->------->------->------->------->------- struct tcf_ct_params, rcu);
>>> -------tcf_ct_flow_table_put(params);
>> ...
>>
>>
>>>
>>> On 3/7/2020 10:53 PM, Eric Dumazet wrote:
>>>
>>>> On 3/7/20 12:12 PM, Eric Dumazet wrote:
>>>>> On 3/3/20 7:57 AM, Paul Blakey wrote:
>>>>>> Use the NF flow tables infrastructure for CT offload.
>>>>>>
>>>>>> Create a nf flow table per zone.
>>>>>>
>>>>>> Next patches will add FT entries to this table, and do
>>>>>> the software offload.
>>>>>>
>>>>>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>>>>>> Reviewed-by: Jiri Pirko <jiri@mellanox.com>
>>>>>> ---
>>>>>> v4->v5:
>>>>>> Added reviewed by Jiri, thanks!
>>>>>> v3->v4:
>>>>>> Alloc GFP_ATOMIC
>>>>>> v2->v3:
>>>>>> Ditch re-locking to alloc, and use atomic allocation
>>>>>> v1->v2:
>>>>>> Use spin_lock_bh instead of spin_lock, and unlock for alloc (as it can sleep)
>>>>>> Free ft on last tc act instance instead of last instance + last offloaded tuple,
>>>>>> this removes cleanup cb and netfilter patches, and is simpler
>>>>>> Removed accidental mlx5/core/en_tc.c change
>>>>>> Removed reviewed by Jiri - patch changed
>>>>>>
>>>>>> + err = nf_flow_table_init(&ct_ft->nf_ft);
>>>>> This call is going to allocate a rhashtable (GFP_KERNEL allocations that might sleep)
>>>>>
>>>>> Since you still hold zones_lock spinlock, a splat should occur.
>>>>>
>>>>> "BUG: sleeping function called from invalid context in ..."
>>>>>
>>>>> DEBUG_ATOMIC_SLEEP=y is your friend.
>>>>>
>>>>> And it is always a good thing to make sure a patch does not trigger a lockdep splat
>>>>>
>>>>> CONFIG_PROVE_LOCKING=y
>>>> Also abusing a spinlock and GFP_ATOMIC allocations in control path is highly discouraged.
>>>>
>>>> I can not test the following fix, any objections before I submit this officially ?
>>>>
>>>> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
>>>> index 23eba61f0f819212a3522c3c63b938d0b8d997e2..3d9e678d7d5336f1746035745b091bea0dcb5fdd 100644
>>>> --- a/net/sched/act_ct.c
>>>> +++ b/net/sched/act_ct.c
>>>> @@ -35,15 +35,15 @@
>>>>
>>>> static struct workqueue_struct *act_ct_wq;
>>>> static struct rhashtable zones_ht;
>>>> -static DEFINE_SPINLOCK(zones_lock);
>>>> +static DEFINE_MUTEX(zones_mutex);
>>>>
>>>> struct tcf_ct_flow_table {
>>>> struct rhash_head node; /* In zones tables */
>>>>
>>>> struct rcu_work rwork;
>>>> struct nf_flowtable nf_ft;
>>>> + refcount_t ref;
>>>> u16 zone;
>>>> - u32 ref;
>>>>
>>>> bool dying;
>>>> };
>>>> @@ -64,14 +64,15 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
>>>> struct tcf_ct_flow_table *ct_ft;
>>>> int err = -ENOMEM;
>>>>
>>>> - spin_lock_bh(&zones_lock);
>>>> + mutex_lock(&zones_mutex);
>>>> ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params);
>>>> - if (ct_ft)
>>>> - goto take_ref;
>>>> + if (ct_ft && refcount_inc_not_zero(&ct_ft->ref))
>>>> + goto out_unlock;
>>>>
>>>> - ct_ft = kzalloc(sizeof(*ct_ft), GFP_ATOMIC);
>>>> + ct_ft = kzalloc(sizeof(*ct_ft), GFP_KERNEL);
>>>> if (!ct_ft)
>>>> goto err_alloc;
>>>> + refcount_set(&ct_ft->ref, 1);
>>>>
>>>> ct_ft->zone = params->zone;
>>>> err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params);
>>>> @@ -84,10 +85,9 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
>>>> goto err_init;
>>>>
>>>> __module_get(THIS_MODULE);
>>>> -take_ref:
>>>> +out_unlock:
>>>> params->ct_ft = ct_ft;
>>>> - ct_ft->ref++;
>>>> - spin_unlock_bh(&zones_lock);
>>>> + mutex_unlock(&zones_mutex);
>>>>
>>>> return 0;
>>>>
>>>> @@ -96,7 +96,7 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params)
>>>> err_insert:
>>>> kfree(ct_ft);
>>>> err_alloc:
>>>> - spin_unlock_bh(&zones_lock);
>>>> + mutex_unlock(&zones_mutex);
>>>> return err;
>>>> }
>>>>
>>>> @@ -116,13 +116,11 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
>>>> {
>>>> struct tcf_ct_flow_table *ct_ft = params->ct_ft;
>>>>
>>>> - spin_lock_bh(&zones_lock);
>>>> - if (--params->ct_ft->ref == 0) {
>>>> + if (refcount_dec_and_test(¶ms->ct_ft->ref)) {
>>>> rhashtable_remove_fast(&zones_ht, &ct_ft->node, zones_params);
>>>> INIT_RCU_WORK(&ct_ft->rwork, tcf_ct_flow_table_cleanup_work);
>>>> queue_rcu_work(act_ct_wq, &ct_ft->rwork);
>>>> }
>>>> - spin_unlock_bh(&zones_lock);
>>>> }
>>>>
>>>> static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
>>>>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH net-next v6 2/3] net/sched: act_ct: Offload established connections to flow table
2020-03-03 15:57 [PATCH net-next v6 0/3] act_ct: Software offload of conntrack_in Paul Blakey
2020-03-03 15:57 ` [PATCH net-next v6 1/3] net/sched: act_ct: Create nf flow table per zone Paul Blakey
@ 2020-03-03 15:57 ` Paul Blakey
2020-03-03 16:51 ` Marcelo Ricardo Leitner
2020-03-03 15:57 ` [PATCH net-next v6 3/3] net/sched: act_ct: Software offload of established flows Paul Blakey
2 siblings, 1 reply; 14+ messages in thread
From: Paul Blakey @ 2020-03-03 15:57 UTC (permalink / raw)
To: Paul Blakey, Saeed Mahameed, Oz Shlomo, Jakub Kicinski,
Vlad Buslov, David Miller, netdev@vger.kernel.org, Jiri Pirko,
Roi Dayan
Add a ft entry when connections enter an established state and delete
the connections when they leave the established state.
The flow table assumes ownership of the connection. In the following
patch act_ct will lookup the ct state from the FT. In future patches,
drivers will register for callbacks for ft add/del events and will be
able to use the information to offload the connections.
Note that connection aging is managed by the FT.
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
net/sched/act_ct.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 63 insertions(+)
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 3321087..2ab38431 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -125,6 +125,67 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
spin_unlock_bh(&zones_lock);
}
+static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
+ struct nf_conn *ct,
+ bool tcp)
+{
+ struct flow_offload *entry;
+ int err;
+
+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
+ return;
+
+ entry = flow_offload_alloc(ct);
+ if (!entry) {
+ WARN_ON_ONCE(1);
+ goto err_alloc;
+ }
+
+ if (tcp) {
+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ }
+
+ err = flow_offload_add(&ct_ft->nf_ft, entry);
+ if (err)
+ goto err_add;
+
+ return;
+
+err_add:
+ flow_offload_free(entry);
+err_alloc:
+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
+}
+
+static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ bool tcp = false;
+
+ if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
+ return;
+
+ switch (nf_ct_protonum(ct)) {
+ case IPPROTO_TCP:
+ tcp = true;
+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
+ return;
+ break;
+ case IPPROTO_UDP:
+ break;
+ default:
+ return;
+ }
+
+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
+ ct->status & IPS_SEQ_ADJUST)
+ return;
+
+ tcf_ct_flow_table_add(ct_ft, ct, tcp);
+}
+
static int tcf_ct_flow_tables_init(void)
{
return rhashtable_init(&zones_ht, &zones_params);
@@ -578,6 +639,8 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
nf_conntrack_confirm(skb);
}
+ tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
+
out_push:
skb_push_rcsum(skb, nh_ofs);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH net-next v6 2/3] net/sched: act_ct: Offload established connections to flow table
2020-03-03 15:57 ` [PATCH net-next v6 2/3] net/sched: act_ct: Offload established connections to flow table Paul Blakey
@ 2020-03-03 16:51 ` Marcelo Ricardo Leitner
0 siblings, 0 replies; 14+ messages in thread
From: Marcelo Ricardo Leitner @ 2020-03-03 16:51 UTC (permalink / raw)
To: Paul Blakey
Cc: Saeed Mahameed, Oz Shlomo, Jakub Kicinski, Vlad Buslov,
David Miller, netdev@vger.kernel.org, Jiri Pirko, Roi Dayan
On Tue, Mar 03, 2020 at 05:57:51PM +0200, Paul Blakey wrote:
> Add a ft entry when connections enter an established state and delete
> the connections when they leave the established state.
>
> The flow table assumes ownership of the connection. In the following
> patch act_ct will lookup the ct state from the FT. In future patches,
> drivers will register for callbacks for ft add/del events and will be
> able to use the information to offload the connections.
>
> Note that connection aging is managed by the FT.
>
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Acked-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> ---
> net/sched/act_ct.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 63 insertions(+)
>
> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
> index 3321087..2ab38431 100644
> --- a/net/sched/act_ct.c
> +++ b/net/sched/act_ct.c
> @@ -125,6 +125,67 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params)
> spin_unlock_bh(&zones_lock);
> }
>
> +static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
> + struct nf_conn *ct,
> + bool tcp)
> +{
> + struct flow_offload *entry;
> + int err;
> +
> + if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
> + return;
> +
> + entry = flow_offload_alloc(ct);
> + if (!entry) {
> + WARN_ON_ONCE(1);
> + goto err_alloc;
> + }
> +
> + if (tcp) {
> + ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
> + ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
> + }
> +
> + err = flow_offload_add(&ct_ft->nf_ft, entry);
> + if (err)
> + goto err_add;
> +
> + return;
> +
> +err_add:
> + flow_offload_free(entry);
> +err_alloc:
> + clear_bit(IPS_OFFLOAD_BIT, &ct->status);
> +}
> +
> +static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
> + struct nf_conn *ct,
> + enum ip_conntrack_info ctinfo)
> +{
> + bool tcp = false;
> +
> + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY)
> + return;
> +
> + switch (nf_ct_protonum(ct)) {
> + case IPPROTO_TCP:
> + tcp = true;
> + if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
> + return;
> + break;
> + case IPPROTO_UDP:
> + break;
> + default:
> + return;
> + }
> +
> + if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
> + ct->status & IPS_SEQ_ADJUST)
> + return;
> +
> + tcf_ct_flow_table_add(ct_ft, ct, tcp);
> +}
> +
> static int tcf_ct_flow_tables_init(void)
> {
> return rhashtable_init(&zones_ht, &zones_params);
> @@ -578,6 +639,8 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
> nf_conntrack_confirm(skb);
> }
>
> + tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
> +
> out_push:
> skb_push_rcsum(skb, nh_ofs);
>
> --
> 1.8.3.1
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH net-next v6 3/3] net/sched: act_ct: Software offload of established flows
2020-03-03 15:57 [PATCH net-next v6 0/3] act_ct: Software offload of conntrack_in Paul Blakey
2020-03-03 15:57 ` [PATCH net-next v6 1/3] net/sched: act_ct: Create nf flow table per zone Paul Blakey
2020-03-03 15:57 ` [PATCH net-next v6 2/3] net/sched: act_ct: Offload established connections to flow table Paul Blakey
@ 2020-03-03 15:57 ` Paul Blakey
2020-03-03 16:51 ` Marcelo Ricardo Leitner
2020-03-03 18:32 ` Nikolay Aleksandrov
2 siblings, 2 replies; 14+ messages in thread
From: Paul Blakey @ 2020-03-03 15:57 UTC (permalink / raw)
To: Paul Blakey, Saeed Mahameed, Oz Shlomo, Jakub Kicinski,
Vlad Buslov, David Miller, netdev@vger.kernel.org, Jiri Pirko,
Roi Dayan
Offload nf conntrack processing by looking up the 5-tuple in the
zone's flow table.
The nf conntrack module will process the packets until a connection is
in established state. Once in established state, the ct state pointer
(nf_conn) will be restored on the skb from a successful ft lookup.
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
Changelog:
v5->v6:
Refactor tcp fin/rst check, get tcp header inside filler, and test in caller
Pull tcp with ports instead of two pulls
v4->v5:
Re-read ip/ip6 header after pulling as skb ptrs may change
Use pskb_network_may_pull instaed of pskb_may_pull
v1->v2:
Add !skip_add curly braces
Removed extra setting thoff again
Check tcp proto outside of tcf_ct_flow_table_check_tcp
net/sched/act_ct.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 150 insertions(+), 2 deletions(-)
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 2ab38431..23eba61 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -186,6 +186,147 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
tcf_ct_flow_table_add(ct_ft, ct, tcp);
}
+static bool
+tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb,
+ struct flow_offload_tuple *tuple,
+ struct tcphdr **tcph)
+{
+ struct flow_ports *ports;
+ unsigned int thoff;
+ struct iphdr *iph;
+
+ if (!pskb_network_may_pull(skb, sizeof(*iph)))
+ return false;
+
+ iph = ip_hdr(skb);
+ thoff = iph->ihl * 4;
+
+ if (ip_is_fragment(iph) ||
+ unlikely(thoff != sizeof(struct iphdr)))
+ return false;
+
+ if (iph->protocol != IPPROTO_TCP &&
+ iph->protocol != IPPROTO_UDP)
+ return false;
+
+ if (iph->ttl <= 1)
+ return false;
+
+ if (!pskb_network_may_pull(skb, iph->protocol == IPPROTO_TCP ?
+ thoff + sizeof(struct tcphdr) :
+ thoff + sizeof(*ports)))
+ return false;
+
+ iph = ip_hdr(skb);
+ if (iph->protocol == IPPROTO_TCP)
+ *tcph = (void *)(skb_network_header(skb) + thoff);
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+ tuple->src_v4.s_addr = iph->saddr;
+ tuple->dst_v4.s_addr = iph->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET;
+ tuple->l4proto = iph->protocol;
+
+ return true;
+}
+
+static bool
+tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb,
+ struct flow_offload_tuple *tuple,
+ struct tcphdr **tcph)
+{
+ struct flow_ports *ports;
+ struct ipv6hdr *ip6h;
+ unsigned int thoff;
+
+ if (!pskb_network_may_pull(skb, sizeof(*ip6h)))
+ return false;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->nexthdr != IPPROTO_TCP &&
+ ip6h->nexthdr != IPPROTO_UDP)
+ return false;
+
+ if (ip6h->hop_limit <= 1)
+ return false;
+
+ thoff = sizeof(*ip6h);
+ if (!pskb_network_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ?
+ thoff + sizeof(struct tcphdr) :
+ thoff + sizeof(*ports)))
+ return false;
+
+ ip6h = ipv6_hdr(skb);
+ if (ip6h->nexthdr == IPPROTO_TCP)
+ *tcph = (void *)(skb_network_header(skb) + thoff);
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+ tuple->src_v6 = ip6h->saddr;
+ tuple->dst_v6 = ip6h->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET6;
+ tuple->l4proto = ip6h->nexthdr;
+
+ return true;
+}
+
+static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
+ struct sk_buff *skb,
+ u8 family)
+{
+ struct nf_flowtable *nf_ft = &p->ct_ft->nf_ft;
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct flow_offload_tuple tuple = {};
+ enum ip_conntrack_info ctinfo;
+ struct tcphdr *tcph = NULL;
+ struct flow_offload *flow;
+ struct nf_conn *ct;
+ u8 dir;
+
+ /* Previously seen or loopback */
+ ct = nf_ct_get(skb, &ctinfo);
+ if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
+ return false;
+
+ switch (family) {
+ case NFPROTO_IPV4:
+ if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph))
+ return false;
+ break;
+ case NFPROTO_IPV6:
+ if (!tcf_ct_flow_table_fill_tuple_ipv6(skb, &tuple, &tcph))
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ tuplehash = flow_offload_lookup(nf_ft, &tuple);
+ if (!tuplehash)
+ return false;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+ ct = flow->ct;
+
+ if (tcph && (unlikely(tcph->fin || tcph->rst))) {
+ flow_offload_teardown(flow);
+ return false;
+ }
+
+ ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
+ IP_CT_ESTABLISHED_REPLY;
+
+ nf_conntrack_get(&ct->ct_general);
+ nf_ct_set(skb, ct, ctinfo);
+
+ return true;
+}
+
static int tcf_ct_flow_tables_init(void)
{
return rhashtable_init(&zones_ht, &zones_params);
@@ -554,6 +695,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
struct nf_hook_state state;
int nh_ofs, err, retval;
struct tcf_ct_params *p;
+ bool skip_add = false;
struct nf_conn *ct;
u8 family;
@@ -603,6 +745,11 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
*/
cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force);
if (!cached) {
+ if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) {
+ skip_add = true;
+ goto do_nat;
+ }
+
/* Associate skb with specified zone. */
if (tmpl) {
ct = nf_ct_get(skb, &ctinfo);
@@ -620,6 +767,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
goto out_push;
}
+do_nat:
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
goto out_push;
@@ -637,10 +785,10 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
* even if the connection is already confirmed.
*/
nf_conntrack_confirm(skb);
+ } else if (!skip_add) {
+ tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
}
- tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
-
out_push:
skb_push_rcsum(skb, nh_ofs);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH net-next v6 3/3] net/sched: act_ct: Software offload of established flows
2020-03-03 15:57 ` [PATCH net-next v6 3/3] net/sched: act_ct: Software offload of established flows Paul Blakey
@ 2020-03-03 16:51 ` Marcelo Ricardo Leitner
2020-03-03 18:32 ` Nikolay Aleksandrov
1 sibling, 0 replies; 14+ messages in thread
From: Marcelo Ricardo Leitner @ 2020-03-03 16:51 UTC (permalink / raw)
To: Paul Blakey
Cc: Saeed Mahameed, Oz Shlomo, Jakub Kicinski, Vlad Buslov,
David Miller, netdev@vger.kernel.org, Jiri Pirko, Roi Dayan
On Tue, Mar 03, 2020 at 05:57:52PM +0200, Paul Blakey wrote:
> Offload nf conntrack processing by looking up the 5-tuple in the
> zone's flow table.
>
> The nf conntrack module will process the packets until a connection is
> in established state. Once in established state, the ct state pointer
> (nf_conn) will be restored on the skb from a successful ft lookup.
>
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Acked-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> ---
> Changelog:
> v5->v6:
> Refactor tcp fin/rst check, get tcp header inside filler, and test in caller
> Pull tcp with ports instead of two pulls
> v4->v5:
> Re-read ip/ip6 header after pulling as skb ptrs may change
> Use pskb_network_may_pull instaed of pskb_may_pull
> v1->v2:
> Add !skip_add curly braces
> Removed extra setting thoff again
> Check tcp proto outside of tcf_ct_flow_table_check_tcp
>
> net/sched/act_ct.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 150 insertions(+), 2 deletions(-)
>
> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
> index 2ab38431..23eba61 100644
> --- a/net/sched/act_ct.c
> +++ b/net/sched/act_ct.c
> @@ -186,6 +186,147 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
> tcf_ct_flow_table_add(ct_ft, ct, tcp);
> }
>
> +static bool
> +tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb,
> + struct flow_offload_tuple *tuple,
> + struct tcphdr **tcph)
> +{
> + struct flow_ports *ports;
> + unsigned int thoff;
> + struct iphdr *iph;
> +
> + if (!pskb_network_may_pull(skb, sizeof(*iph)))
> + return false;
> +
> + iph = ip_hdr(skb);
> + thoff = iph->ihl * 4;
> +
> + if (ip_is_fragment(iph) ||
> + unlikely(thoff != sizeof(struct iphdr)))
> + return false;
> +
> + if (iph->protocol != IPPROTO_TCP &&
> + iph->protocol != IPPROTO_UDP)
> + return false;
> +
> + if (iph->ttl <= 1)
> + return false;
> +
> + if (!pskb_network_may_pull(skb, iph->protocol == IPPROTO_TCP ?
> + thoff + sizeof(struct tcphdr) :
> + thoff + sizeof(*ports)))
> + return false;
> +
> + iph = ip_hdr(skb);
> + if (iph->protocol == IPPROTO_TCP)
> + *tcph = (void *)(skb_network_header(skb) + thoff);
> +
> + ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
> + tuple->src_v4.s_addr = iph->saddr;
> + tuple->dst_v4.s_addr = iph->daddr;
> + tuple->src_port = ports->source;
> + tuple->dst_port = ports->dest;
> + tuple->l3proto = AF_INET;
> + tuple->l4proto = iph->protocol;
> +
> + return true;
> +}
> +
> +static bool
> +tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb,
> + struct flow_offload_tuple *tuple,
> + struct tcphdr **tcph)
> +{
> + struct flow_ports *ports;
> + struct ipv6hdr *ip6h;
> + unsigned int thoff;
> +
> + if (!pskb_network_may_pull(skb, sizeof(*ip6h)))
> + return false;
> +
> + ip6h = ipv6_hdr(skb);
> +
> + if (ip6h->nexthdr != IPPROTO_TCP &&
> + ip6h->nexthdr != IPPROTO_UDP)
> + return false;
> +
> + if (ip6h->hop_limit <= 1)
> + return false;
> +
> + thoff = sizeof(*ip6h);
> + if (!pskb_network_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ?
> + thoff + sizeof(struct tcphdr) :
> + thoff + sizeof(*ports)))
> + return false;
> +
> + ip6h = ipv6_hdr(skb);
> + if (ip6h->nexthdr == IPPROTO_TCP)
> + *tcph = (void *)(skb_network_header(skb) + thoff);
> +
> + ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
> + tuple->src_v6 = ip6h->saddr;
> + tuple->dst_v6 = ip6h->daddr;
> + tuple->src_port = ports->source;
> + tuple->dst_port = ports->dest;
> + tuple->l3proto = AF_INET6;
> + tuple->l4proto = ip6h->nexthdr;
> +
> + return true;
> +}
> +
> +static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
> + struct sk_buff *skb,
> + u8 family)
> +{
> + struct nf_flowtable *nf_ft = &p->ct_ft->nf_ft;
> + struct flow_offload_tuple_rhash *tuplehash;
> + struct flow_offload_tuple tuple = {};
> + enum ip_conntrack_info ctinfo;
> + struct tcphdr *tcph = NULL;
> + struct flow_offload *flow;
> + struct nf_conn *ct;
> + u8 dir;
> +
> + /* Previously seen or loopback */
> + ct = nf_ct_get(skb, &ctinfo);
> + if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
> + return false;
> +
> + switch (family) {
> + case NFPROTO_IPV4:
> + if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph))
> + return false;
> + break;
> + case NFPROTO_IPV6:
> + if (!tcf_ct_flow_table_fill_tuple_ipv6(skb, &tuple, &tcph))
> + return false;
> + break;
> + default:
> + return false;
> + }
> +
> + tuplehash = flow_offload_lookup(nf_ft, &tuple);
> + if (!tuplehash)
> + return false;
> +
> + dir = tuplehash->tuple.dir;
> + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
> + ct = flow->ct;
> +
> + if (tcph && (unlikely(tcph->fin || tcph->rst))) {
> + flow_offload_teardown(flow);
> + return false;
> + }
> +
> + ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
> + IP_CT_ESTABLISHED_REPLY;
> +
> + nf_conntrack_get(&ct->ct_general);
> + nf_ct_set(skb, ct, ctinfo);
> +
> + return true;
> +}
> +
> static int tcf_ct_flow_tables_init(void)
> {
> return rhashtable_init(&zones_ht, &zones_params);
> @@ -554,6 +695,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
> struct nf_hook_state state;
> int nh_ofs, err, retval;
> struct tcf_ct_params *p;
> + bool skip_add = false;
> struct nf_conn *ct;
> u8 family;
>
> @@ -603,6 +745,11 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
> */
> cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force);
> if (!cached) {
> + if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) {
> + skip_add = true;
> + goto do_nat;
> + }
> +
> /* Associate skb with specified zone. */
> if (tmpl) {
> ct = nf_ct_get(skb, &ctinfo);
> @@ -620,6 +767,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
> goto out_push;
> }
>
> +do_nat:
> ct = nf_ct_get(skb, &ctinfo);
> if (!ct)
> goto out_push;
> @@ -637,10 +785,10 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
> * even if the connection is already confirmed.
> */
> nf_conntrack_confirm(skb);
> + } else if (!skip_add) {
> + tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
> }
>
> - tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
> -
> out_push:
> skb_push_rcsum(skb, nh_ofs);
>
> --
> 1.8.3.1
>
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH net-next v6 3/3] net/sched: act_ct: Software offload of established flows
2020-03-03 15:57 ` [PATCH net-next v6 3/3] net/sched: act_ct: Software offload of established flows Paul Blakey
2020-03-03 16:51 ` Marcelo Ricardo Leitner
@ 2020-03-03 18:32 ` Nikolay Aleksandrov
1 sibling, 0 replies; 14+ messages in thread
From: Nikolay Aleksandrov @ 2020-03-03 18:32 UTC (permalink / raw)
To: Paul Blakey, Saeed Mahameed, Oz Shlomo, Jakub Kicinski,
Vlad Buslov, David Miller, netdev@vger.kernel.org, Jiri Pirko,
Roi Dayan
On 03/03/2020 17:57, Paul Blakey wrote:
> Offload nf conntrack processing by looking up the 5-tuple in the
> zone's flow table.
>
> The nf conntrack module will process the packets until a connection is
> in established state. Once in established state, the ct state pointer
> (nf_conn) will be restored on the skb from a successful ft lookup.
>
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Acked-by: Jiri Pirko <jiri@mellanox.com>
> ---
> Changelog:
> v5->v6:
> Refactor tcp fin/rst check, get tcp header inside filler, and test in caller
> Pull tcp with ports instead of two pulls
> v4->v5:
> Re-read ip/ip6 header after pulling as skb ptrs may change
> Use pskb_network_may_pull instaed of pskb_may_pull
> v1->v2:
> Add !skip_add curly braces
> Removed extra setting thoff again
> Check tcp proto outside of tcf_ct_flow_table_check_tcp
>
Looks good to me, thanks! In the future please add all reviewers to the CC list.
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
> net/sched/act_ct.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 150 insertions(+), 2 deletions(-)
>
> diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
> index 2ab38431..23eba61 100644
> --- a/net/sched/act_ct.c
> +++ b/net/sched/act_ct.c
> @@ -186,6 +186,147 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
> tcf_ct_flow_table_add(ct_ft, ct, tcp);
> }
>
> +static bool
> +tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb,
> + struct flow_offload_tuple *tuple,
> + struct tcphdr **tcph)
> +{
> + struct flow_ports *ports;
> + unsigned int thoff;
> + struct iphdr *iph;
> +
> + if (!pskb_network_may_pull(skb, sizeof(*iph)))
> + return false;
> +
> + iph = ip_hdr(skb);
> + thoff = iph->ihl * 4;
> +
> + if (ip_is_fragment(iph) ||
> + unlikely(thoff != sizeof(struct iphdr)))
> + return false;
> +
> + if (iph->protocol != IPPROTO_TCP &&
> + iph->protocol != IPPROTO_UDP)
> + return false;
> +
> + if (iph->ttl <= 1)
> + return false;
> +
> + if (!pskb_network_may_pull(skb, iph->protocol == IPPROTO_TCP ?
> + thoff + sizeof(struct tcphdr) :
> + thoff + sizeof(*ports)))
> + return false;
> +
> + iph = ip_hdr(skb);
> + if (iph->protocol == IPPROTO_TCP)
> + *tcph = (void *)(skb_network_header(skb) + thoff);
> +
> + ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
> + tuple->src_v4.s_addr = iph->saddr;
> + tuple->dst_v4.s_addr = iph->daddr;
> + tuple->src_port = ports->source;
> + tuple->dst_port = ports->dest;
> + tuple->l3proto = AF_INET;
> + tuple->l4proto = iph->protocol;
> +
> + return true;
> +}
> +
> +static bool
> +tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb,
> + struct flow_offload_tuple *tuple,
> + struct tcphdr **tcph)
> +{
> + struct flow_ports *ports;
> + struct ipv6hdr *ip6h;
> + unsigned int thoff;
> +
> + if (!pskb_network_may_pull(skb, sizeof(*ip6h)))
> + return false;
> +
> + ip6h = ipv6_hdr(skb);
> +
> + if (ip6h->nexthdr != IPPROTO_TCP &&
> + ip6h->nexthdr != IPPROTO_UDP)
> + return false;
> +
> + if (ip6h->hop_limit <= 1)
> + return false;
> +
> + thoff = sizeof(*ip6h);
> + if (!pskb_network_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ?
> + thoff + sizeof(struct tcphdr) :
> + thoff + sizeof(*ports)))
> + return false;
> +
> + ip6h = ipv6_hdr(skb);
> + if (ip6h->nexthdr == IPPROTO_TCP)
> + *tcph = (void *)(skb_network_header(skb) + thoff);
> +
> + ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
> + tuple->src_v6 = ip6h->saddr;
> + tuple->dst_v6 = ip6h->daddr;
> + tuple->src_port = ports->source;
> + tuple->dst_port = ports->dest;
> + tuple->l3proto = AF_INET6;
> + tuple->l4proto = ip6h->nexthdr;
> +
> + return true;
> +}
> +
> +static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
> + struct sk_buff *skb,
> + u8 family)
> +{
> + struct nf_flowtable *nf_ft = &p->ct_ft->nf_ft;
> + struct flow_offload_tuple_rhash *tuplehash;
> + struct flow_offload_tuple tuple = {};
> + enum ip_conntrack_info ctinfo;
> + struct tcphdr *tcph = NULL;
> + struct flow_offload *flow;
> + struct nf_conn *ct;
> + u8 dir;
> +
> + /* Previously seen or loopback */
> + ct = nf_ct_get(skb, &ctinfo);
> + if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
> + return false;
> +
> + switch (family) {
> + case NFPROTO_IPV4:
> + if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph))
> + return false;
> + break;
> + case NFPROTO_IPV6:
> + if (!tcf_ct_flow_table_fill_tuple_ipv6(skb, &tuple, &tcph))
> + return false;
> + break;
> + default:
> + return false;
> + }
> +
> + tuplehash = flow_offload_lookup(nf_ft, &tuple);
> + if (!tuplehash)
> + return false;
> +
> + dir = tuplehash->tuple.dir;
> + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
> + ct = flow->ct;
> +
> + if (tcph && (unlikely(tcph->fin || tcph->rst))) {
> + flow_offload_teardown(flow);
> + return false;
> + }
> +
> + ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
> + IP_CT_ESTABLISHED_REPLY;
> +
> + nf_conntrack_get(&ct->ct_general);
> + nf_ct_set(skb, ct, ctinfo);
> +
> + return true;
> +}
> +
> static int tcf_ct_flow_tables_init(void)
> {
> return rhashtable_init(&zones_ht, &zones_params);
> @@ -554,6 +695,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
> struct nf_hook_state state;
> int nh_ofs, err, retval;
> struct tcf_ct_params *p;
> + bool skip_add = false;
> struct nf_conn *ct;
> u8 family;
>
> @@ -603,6 +745,11 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
> */
> cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force);
> if (!cached) {
> + if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) {
> + skip_add = true;
> + goto do_nat;
> + }
> +
> /* Associate skb with specified zone. */
> if (tmpl) {
> ct = nf_ct_get(skb, &ctinfo);
> @@ -620,6 +767,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
> goto out_push;
> }
>
> +do_nat:
> ct = nf_ct_get(skb, &ctinfo);
> if (!ct)
> goto out_push;
> @@ -637,10 +785,10 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
> * even if the connection is already confirmed.
> */
> nf_conntrack_confirm(skb);
> + } else if (!skip_add) {
> + tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
> }
>
> - tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
> -
> out_push:
> skb_push_rcsum(skb, nh_ofs);
>
>
^ permalink raw reply [flat|nested] 14+ messages in thread