netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
@ 2024-08-12 17:17 Xin Long
  2024-08-13 17:57 ` Aaron Conole
                   ` (3 more replies)
  0 siblings, 4 replies; 17+ messages in thread
From: Xin Long @ 2024-08-12 17:17 UTC (permalink / raw)
  To: network dev, dev, ovs-dev
  Cc: davem, kuba, Eric Dumazet, Paolo Abeni, Pravin B Shelar,
	Ilya Maximets, Aaron Conole, Florian Westphal

Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
label counting"), we should also switch to per-action label counting
in openvswitch conntrack, as Florian suggested.

The difference is that nf_connlabels_get() is called unconditionally
when creating an ct action in ovs_ct_copy_action(). As with these
flows:

  table=0,ip,actions=ct(commit,table=1)
  table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)

it needs to make sure the label ext is created in the 1st flow before
the ct is committed in ovs_ct_commit(). Otherwise, the warning in
nf_ct_ext_add() when creating the label ext in the 2nd flow will
be triggered:

   WARN_ON(nf_ct_is_confirmed(ct));

Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
v2: move ovs_net into #if in ovs_ct_exit() as Jakub noticed.
---
 net/openvswitch/conntrack.c | 30 ++++++++++++------------------
 net/openvswitch/datapath.h  |  3 ---
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 8eb1d644b741..a3da5ee34f92 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
 	    attr == OVS_KEY_ATTR_CT_MARK)
 		return true;
 	if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
-	    attr == OVS_KEY_ATTR_CT_LABELS) {
-		struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-
-		return ovs_net->xt_label;
-	}
+	    attr == OVS_KEY_ATTR_CT_LABELS)
+		return true;
 
 	return false;
 }
@@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
 		       const struct sw_flow_key *key,
 		       struct sw_flow_actions **sfa,  bool log)
 {
+	unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
 	struct ovs_conntrack_info ct_info;
 	const char *helper = NULL;
 	u16 family;
@@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
 		return -ENOMEM;
 	}
 
+	if (nf_connlabels_get(net, n_bits - 1)) {
+		nf_ct_tmpl_free(ct_info.ct);
+		OVS_NLERR(log, "Failed to set connlabel length");
+		return -EOPNOTSUPP;
+	}
+
 	if (ct_info.timeout[0]) {
 		if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
 				      ct_info.timeout))
@@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
 	if (ct_info->ct) {
 		if (ct_info->timeout[0])
 			nf_ct_destroy_timeout(ct_info->ct);
+		nf_connlabels_put(nf_ct_net(ct_info->ct));
 		nf_ct_tmpl_free(ct_info->ct);
 	}
 }
@@ -2002,17 +2007,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
 
 int ovs_ct_init(struct net *net)
 {
-	unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
+#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
 	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
 
-	if (nf_connlabels_get(net, n_bits - 1)) {
-		ovs_net->xt_label = false;
-		OVS_NLERR(true, "Failed to set connlabel length");
-	} else {
-		ovs_net->xt_label = true;
-	}
-
-#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
 	return ovs_ct_limit_init(net, ovs_net);
 #else
 	return 0;
@@ -2021,12 +2018,9 @@ int ovs_ct_init(struct net *net)
 
 void ovs_ct_exit(struct net *net)
 {
+#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
 	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
 
-#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
 	ovs_ct_limit_exit(net, ovs_net);
 #endif
-
-	if (ovs_net->xt_label)
-		nf_connlabels_put(net);
 }
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 9ca6231ea647..365b9bb7f546 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -160,9 +160,6 @@ struct ovs_net {
 #if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
 	struct ovs_ct_limit_info *ct_limit_info;
 #endif
-
-	/* Module reference for configuring conntrack. */
-	bool xt_label;
 };
 
 /**
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2024-08-12 17:17 [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack Xin Long
@ 2024-08-13 17:57 ` Aaron Conole
  2024-08-13 18:58 ` Florian Westphal
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 17+ messages in thread
From: Aaron Conole @ 2024-08-13 17:57 UTC (permalink / raw)
  To: Xin Long
  Cc: network dev, dev, ovs-dev, davem, kuba, Eric Dumazet, Paolo Abeni,
	Pravin B Shelar, Ilya Maximets, Florian Westphal

Xin Long <lucien.xin@gmail.com> writes:

> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
> label counting"), we should also switch to per-action label counting
> in openvswitch conntrack, as Florian suggested.
>
> The difference is that nf_connlabels_get() is called unconditionally
> when creating an ct action in ovs_ct_copy_action(). As with these
> flows:
>
>   table=0,ip,actions=ct(commit,table=1)
>   table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
>
> it needs to make sure the label ext is created in the 1st flow before
> the ct is committed in ovs_ct_commit(). Otherwise, the warning in
> nf_ct_ext_add() when creating the label ext in the 2nd flow will
> be triggered:
>
>    WARN_ON(nf_ct_is_confirmed(ct));
>
> Signed-off-by: Xin Long <lucien.xin@gmail.com>
> ---

Reviewed-by: Aaron Conole <aconole@redhat.com>


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2024-08-12 17:17 [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack Xin Long
  2024-08-13 17:57 ` Aaron Conole
@ 2024-08-13 18:58 ` Florian Westphal
  2024-08-16  2:20 ` patchwork-bot+netdevbpf
  2025-02-24  9:01 ` Jianbo Liu
  3 siblings, 0 replies; 17+ messages in thread
From: Florian Westphal @ 2024-08-13 18:58 UTC (permalink / raw)
  To: Xin Long
  Cc: network dev, dev, ovs-dev, davem, kuba, Eric Dumazet, Paolo Abeni,
	Pravin B Shelar, Ilya Maximets, Aaron Conole, Florian Westphal

Xin Long <lucien.xin@gmail.com> wrote:
> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
> label counting"), we should also switch to per-action label counting
> in openvswitch conntrack, as Florian suggested.
> 
> The difference is that nf_connlabels_get() is called unconditionally
> when creating an ct action in ovs_ct_copy_action(). As with these
> flows:
> 
>   table=0,ip,actions=ct(commit,table=1)
>   table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
> 
> it needs to make sure the label ext is created in the 1st flow before
> the ct is committed in ovs_ct_commit(). Otherwise, the warning in
> nf_ct_ext_add() when creating the label ext in the 2nd flow will
> be triggered:

With this and https://patchwork.ozlabs.org/project/netfilter-devel/patch/7380c37e2d58a93164b7f2212c90cd23f9d910f8.1721268584.git.lucien.xin@gmail.com/
applied new netns doesn't have conntrack enabled anymore, so

Acked-by: Florian Westphal <fw@strlen.de>

Thanks Xinlong!

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2024-08-12 17:17 [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack Xin Long
  2024-08-13 17:57 ` Aaron Conole
  2024-08-13 18:58 ` Florian Westphal
@ 2024-08-16  2:20 ` patchwork-bot+netdevbpf
  2025-02-24  9:01 ` Jianbo Liu
  3 siblings, 0 replies; 17+ messages in thread
From: patchwork-bot+netdevbpf @ 2024-08-16  2:20 UTC (permalink / raw)
  To: Xin Long
  Cc: netdev, dev, ovs-dev, davem, kuba, edumazet, pabeni, pshelar,
	i.maximets, aconole, fw

Hello:

This patch was applied to netdev/net-next.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Mon, 12 Aug 2024 13:17:53 -0400 you wrote:
> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
> label counting"), we should also switch to per-action label counting
> in openvswitch conntrack, as Florian suggested.
> 
> The difference is that nf_connlabels_get() is called unconditionally
> when creating an ct action in ovs_ct_copy_action(). As with these
> flows:
> 
> [...]

Here is the summary with links:
  - [PATCHv2,net-next] openvswitch: switch to per-action label counting in conntrack
    https://git.kernel.org/netdev/net-next/c/fcb1aa5163b1

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2024-08-12 17:17 [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack Xin Long
                   ` (2 preceding siblings ...)
  2024-08-16  2:20 ` patchwork-bot+netdevbpf
@ 2025-02-24  9:01 ` Jianbo Liu
  2025-02-24 19:55   ` Xin Long
  3 siblings, 1 reply; 17+ messages in thread
From: Jianbo Liu @ 2025-02-24  9:01 UTC (permalink / raw)
  To: Xin Long, network dev, dev, ovs-dev
  Cc: davem, kuba, Eric Dumazet, Paolo Abeni, Pravin B Shelar,
	Ilya Maximets, Aaron Conole, Florian Westphal



On 8/13/2024 1:17 AM, Xin Long wrote:
> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
> label counting"), we should also switch to per-action label counting
> in openvswitch conntrack, as Florian suggested.
> 
> The difference is that nf_connlabels_get() is called unconditionally
> when creating an ct action in ovs_ct_copy_action(). As with these
> flows:
> 
>    table=0,ip,actions=ct(commit,table=1)
>    table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
> 
> it needs to make sure the label ext is created in the 1st flow before
> the ct is committed in ovs_ct_commit(). Otherwise, the warning in
> nf_ct_ext_add() when creating the label ext in the 2nd flow will
> be triggered:
> 
>     WARN_ON(nf_ct_is_confirmed(ct));
> 

Hi Xin Long,

The ct can be committed before openvswitch handles packets with CT 
actions. And we can trigger the warning by creating VF and running ping 
over it with the following configurations:

ovs-vsctl add-br br
ovs-vsctl add-port br eth2
ovs-vsctl add-port br eth4
ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk 
actions=ct(table=1)"
ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_state=+trk+new 
actions=ct(exec(set_field:0xef7d->ct_label), commit), output:eth2"
ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_label=0xef7d, 
ct_state=+trk+est actions=output:eth2"

The eth2 is PF, and eth4 is VF's representor.
Would you like to fix it?

Thanks!
Jianbo

> Signed-off-by: Xin Long <lucien.xin@gmail.com>
> ---
> v2: move ovs_net into #if in ovs_ct_exit() as Jakub noticed.
> ---
>   net/openvswitch/conntrack.c | 30 ++++++++++++------------------
>   net/openvswitch/datapath.h  |  3 ---
>   2 files changed, 12 insertions(+), 21 deletions(-)
> 
> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> index 8eb1d644b741..a3da5ee34f92 100644
> --- a/net/openvswitch/conntrack.c
> +++ b/net/openvswitch/conntrack.c
> @@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
>   	    attr == OVS_KEY_ATTR_CT_MARK)
>   		return true;
>   	if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
> -	    attr == OVS_KEY_ATTR_CT_LABELS) {
> -		struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> -
> -		return ovs_net->xt_label;
> -	}
> +	    attr == OVS_KEY_ATTR_CT_LABELS)
> +		return true;
>   
>   	return false;
>   }
> @@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
>   		       const struct sw_flow_key *key,
>   		       struct sw_flow_actions **sfa,  bool log)
>   {
> +	unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
>   	struct ovs_conntrack_info ct_info;
>   	const char *helper = NULL;
>   	u16 family;
> @@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
>   		return -ENOMEM;
>   	}
>   
> +	if (nf_connlabels_get(net, n_bits - 1)) {
> +		nf_ct_tmpl_free(ct_info.ct);
> +		OVS_NLERR(log, "Failed to set connlabel length");
> +		return -EOPNOTSUPP;
> +	}
> +
>   	if (ct_info.timeout[0]) {
>   		if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
>   				      ct_info.timeout))
> @@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
>   	if (ct_info->ct) {
>   		if (ct_info->timeout[0])
>   			nf_ct_destroy_timeout(ct_info->ct);
> +		nf_connlabels_put(nf_ct_net(ct_info->ct));
>   		nf_ct_tmpl_free(ct_info->ct);
>   	}
>   }
> @@ -2002,17 +2007,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
>   
>   int ovs_ct_init(struct net *net)
>   {
> -	unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> +#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>   	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>   
> -	if (nf_connlabels_get(net, n_bits - 1)) {
> -		ovs_net->xt_label = false;
> -		OVS_NLERR(true, "Failed to set connlabel length");
> -	} else {
> -		ovs_net->xt_label = true;
> -	}
> -
> -#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>   	return ovs_ct_limit_init(net, ovs_net);
>   #else
>   	return 0;
> @@ -2021,12 +2018,9 @@ int ovs_ct_init(struct net *net)
>   
>   void ovs_ct_exit(struct net *net)
>   {
> +#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>   	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>   
> -#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>   	ovs_ct_limit_exit(net, ovs_net);
>   #endif
> -
> -	if (ovs_net->xt_label)
> -		nf_connlabels_put(net);
>   }
> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> index 9ca6231ea647..365b9bb7f546 100644
> --- a/net/openvswitch/datapath.h
> +++ b/net/openvswitch/datapath.h
> @@ -160,9 +160,6 @@ struct ovs_net {
>   #if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>   	struct ovs_ct_limit_info *ct_limit_info;
>   #endif
> -
> -	/* Module reference for configuring conntrack. */
> -	bool xt_label;
>   };
>   
>   /**


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2025-02-24  9:01 ` Jianbo Liu
@ 2025-02-24 19:55   ` Xin Long
  2025-02-25  1:38     ` Jianbo Liu
  0 siblings, 1 reply; 17+ messages in thread
From: Xin Long @ 2025-02-24 19:55 UTC (permalink / raw)
  To: Jianbo Liu
  Cc: network dev, dev, ovs-dev, davem, kuba, Eric Dumazet, Paolo Abeni,
	Pravin B Shelar, Ilya Maximets, Aaron Conole, Florian Westphal

On Mon, Feb 24, 2025 at 4:01 AM Jianbo Liu <jianbol@nvidia.com> wrote:
>
>
>
> On 8/13/2024 1:17 AM, Xin Long wrote:
> > Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
> > label counting"), we should also switch to per-action label counting
> > in openvswitch conntrack, as Florian suggested.
> >
> > The difference is that nf_connlabels_get() is called unconditionally
> > when creating an ct action in ovs_ct_copy_action(). As with these
> > flows:
> >
> >    table=0,ip,actions=ct(commit,table=1)
> >    table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
> >
> > it needs to make sure the label ext is created in the 1st flow before
> > the ct is committed in ovs_ct_commit(). Otherwise, the warning in
> > nf_ct_ext_add() when creating the label ext in the 2nd flow will
> > be triggered:
> >
> >     WARN_ON(nf_ct_is_confirmed(ct));
> >
>
> Hi Xin Long,
>
> The ct can be committed before openvswitch handles packets with CT
> actions. And we can trigger the warning by creating VF and running ping
> over it with the following configurations:
>
> ovs-vsctl add-br br
> ovs-vsctl add-port br eth2
> ovs-vsctl add-port br eth4
> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
> actions=ct(table=1)"
> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_state=+trk+new
> actions=ct(exec(set_field:0xef7d->ct_label), commit), output:eth2"
> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_label=0xef7d,
> ct_state=+trk+est actions=output:eth2"
>
> The eth2 is PF, and eth4 is VF's representor.
> Would you like to fix it?
Hi, Jianbo,

Sure, we have to attach a new ct to the skb in __ovs_ct_lookup() for
this case, and even delete the one created before ovs_ct.

Can you check if this works on your env?

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 3bb4810234aa..c599ee013dfe 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -595,6 +595,11 @@ static bool skb_nfct_cached(struct net *net,
             rcu_dereference(timeout_ext->timeout))
             return false;
     }
+    if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && !nf_ct_labels_find(ct)) {
+        if (nf_ct_is_confirmed(ct))
+            nf_ct_delete(ct, 0, 0);
+        return false;
+    }

Thanks.

>
> Thanks!
> Jianbo
>
> > Signed-off-by: Xin Long <lucien.xin@gmail.com>
> > ---
> > v2: move ovs_net into #if in ovs_ct_exit() as Jakub noticed.
> > ---
> >   net/openvswitch/conntrack.c | 30 ++++++++++++------------------
> >   net/openvswitch/datapath.h  |  3 ---
> >   2 files changed, 12 insertions(+), 21 deletions(-)
> >
> > diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> > index 8eb1d644b741..a3da5ee34f92 100644
> > --- a/net/openvswitch/conntrack.c
> > +++ b/net/openvswitch/conntrack.c
> > @@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
> >           attr == OVS_KEY_ATTR_CT_MARK)
> >               return true;
> >       if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
> > -         attr == OVS_KEY_ATTR_CT_LABELS) {
> > -             struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> > -
> > -             return ovs_net->xt_label;
> > -     }
> > +         attr == OVS_KEY_ATTR_CT_LABELS)
> > +             return true;
> >
> >       return false;
> >   }
> > @@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
> >                      const struct sw_flow_key *key,
> >                      struct sw_flow_actions **sfa,  bool log)
> >   {
> > +     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> >       struct ovs_conntrack_info ct_info;
> >       const char *helper = NULL;
> >       u16 family;
> > @@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
> >               return -ENOMEM;
> >       }
> >
> > +     if (nf_connlabels_get(net, n_bits - 1)) {
> > +             nf_ct_tmpl_free(ct_info.ct);
> > +             OVS_NLERR(log, "Failed to set connlabel length");
> > +             return -EOPNOTSUPP;
> > +     }
> > +
> >       if (ct_info.timeout[0]) {
> >               if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
> >                                     ct_info.timeout))
> > @@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
> >       if (ct_info->ct) {
> >               if (ct_info->timeout[0])
> >                       nf_ct_destroy_timeout(ct_info->ct);
> > +             nf_connlabels_put(nf_ct_net(ct_info->ct));
> >               nf_ct_tmpl_free(ct_info->ct);
> >       }
> >   }
> > @@ -2002,17 +2007,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
> >
> >   int ovs_ct_init(struct net *net)
> >   {
> > -     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> > +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >       struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >
> > -     if (nf_connlabels_get(net, n_bits - 1)) {
> > -             ovs_net->xt_label = false;
> > -             OVS_NLERR(true, "Failed to set connlabel length");
> > -     } else {
> > -             ovs_net->xt_label = true;
> > -     }
> > -
> > -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >       return ovs_ct_limit_init(net, ovs_net);
> >   #else
> >       return 0;
> > @@ -2021,12 +2018,9 @@ int ovs_ct_init(struct net *net)
> >
> >   void ovs_ct_exit(struct net *net)
> >   {
> > +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >       struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >
> > -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >       ovs_ct_limit_exit(net, ovs_net);
> >   #endif
> > -
> > -     if (ovs_net->xt_label)
> > -             nf_connlabels_put(net);
> >   }
> > diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > index 9ca6231ea647..365b9bb7f546 100644
> > --- a/net/openvswitch/datapath.h
> > +++ b/net/openvswitch/datapath.h
> > @@ -160,9 +160,6 @@ struct ovs_net {
> >   #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >       struct ovs_ct_limit_info *ct_limit_info;
> >   #endif
> > -
> > -     /* Module reference for configuring conntrack. */
> > -     bool xt_label;
> >   };
> >
> >   /**
>

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2025-02-24 19:55   ` Xin Long
@ 2025-02-25  1:38     ` Jianbo Liu
  2025-02-25 14:57       ` Xin Long
  0 siblings, 1 reply; 17+ messages in thread
From: Jianbo Liu @ 2025-02-25  1:38 UTC (permalink / raw)
  To: Xin Long
  Cc: network dev, dev, ovs-dev, davem, kuba, Eric Dumazet, Paolo Abeni,
	Pravin B Shelar, Ilya Maximets, Aaron Conole, Florian Westphal



On 2/25/2025 3:55 AM, Xin Long wrote:
> On Mon, Feb 24, 2025 at 4:01 AM Jianbo Liu <jianbol@nvidia.com> wrote:
>>
>>
>>
>> On 8/13/2024 1:17 AM, Xin Long wrote:
>>> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
>>> label counting"), we should also switch to per-action label counting
>>> in openvswitch conntrack, as Florian suggested.
>>>
>>> The difference is that nf_connlabels_get() is called unconditionally
>>> when creating an ct action in ovs_ct_copy_action(). As with these
>>> flows:
>>>
>>>     table=0,ip,actions=ct(commit,table=1)
>>>     table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
>>>
>>> it needs to make sure the label ext is created in the 1st flow before
>>> the ct is committed in ovs_ct_commit(). Otherwise, the warning in
>>> nf_ct_ext_add() when creating the label ext in the 2nd flow will
>>> be triggered:
>>>
>>>      WARN_ON(nf_ct_is_confirmed(ct));
>>>
>>
>> Hi Xin Long,
>>
>> The ct can be committed before openvswitch handles packets with CT
>> actions. And we can trigger the warning by creating VF and running ping
>> over it with the following configurations:
>>
>> ovs-vsctl add-br br
>> ovs-vsctl add-port br eth2
>> ovs-vsctl add-port br eth4
>> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
>> actions=ct(table=1)"
>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_state=+trk+new
>> actions=ct(exec(set_field:0xef7d->ct_label), commit), output:eth2"
>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_label=0xef7d,
>> ct_state=+trk+est actions=output:eth2"
>>
>> The eth2 is PF, and eth4 is VF's representor.
>> Would you like to fix it?
> Hi, Jianbo,
> 
> Sure, we have to attach a new ct to the skb in __ovs_ct_lookup() for
> this case, and even delete the one created before ovs_ct.
> 
> Can you check if this works on your env?

Yes, it works.
Could you please submit the formal patch? Thanks!

> 
> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> index 3bb4810234aa..c599ee013dfe 100644
> --- a/net/openvswitch/conntrack.c
> +++ b/net/openvswitch/conntrack.c
> @@ -595,6 +595,11 @@ static bool skb_nfct_cached(struct net *net,
>               rcu_dereference(timeout_ext->timeout))
>               return false;
>       }
> +    if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && !nf_ct_labels_find(ct)) {
> +        if (nf_ct_is_confirmed(ct))
> +            nf_ct_delete(ct, 0, 0);
> +        return false;
> +    }
> 
> Thanks.
> 
>>
>> Thanks!
>> Jianbo
>>
>>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
>>> ---
>>> v2: move ovs_net into #if in ovs_ct_exit() as Jakub noticed.
>>> ---
>>>    net/openvswitch/conntrack.c | 30 ++++++++++++------------------
>>>    net/openvswitch/datapath.h  |  3 ---
>>>    2 files changed, 12 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
>>> index 8eb1d644b741..a3da5ee34f92 100644
>>> --- a/net/openvswitch/conntrack.c
>>> +++ b/net/openvswitch/conntrack.c
>>> @@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
>>>            attr == OVS_KEY_ATTR_CT_MARK)
>>>                return true;
>>>        if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
>>> -         attr == OVS_KEY_ATTR_CT_LABELS) {
>>> -             struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>> -
>>> -             return ovs_net->xt_label;
>>> -     }
>>> +         attr == OVS_KEY_ATTR_CT_LABELS)
>>> +             return true;
>>>
>>>        return false;
>>>    }
>>> @@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
>>>                       const struct sw_flow_key *key,
>>>                       struct sw_flow_actions **sfa,  bool log)
>>>    {
>>> +     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
>>>        struct ovs_conntrack_info ct_info;
>>>        const char *helper = NULL;
>>>        u16 family;
>>> @@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
>>>                return -ENOMEM;
>>>        }
>>>
>>> +     if (nf_connlabels_get(net, n_bits - 1)) {
>>> +             nf_ct_tmpl_free(ct_info.ct);
>>> +             OVS_NLERR(log, "Failed to set connlabel length");
>>> +             return -EOPNOTSUPP;
>>> +     }
>>> +
>>>        if (ct_info.timeout[0]) {
>>>                if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
>>>                                      ct_info.timeout))
>>> @@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
>>>        if (ct_info->ct) {
>>>                if (ct_info->timeout[0])
>>>                        nf_ct_destroy_timeout(ct_info->ct);
>>> +             nf_connlabels_put(nf_ct_net(ct_info->ct));
>>>                nf_ct_tmpl_free(ct_info->ct);
>>>        }
>>>    }
>>> @@ -2002,17 +2007,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
>>>
>>>    int ovs_ct_init(struct net *net)
>>>    {
>>> -     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>>
>>> -     if (nf_connlabels_get(net, n_bits - 1)) {
>>> -             ovs_net->xt_label = false;
>>> -             OVS_NLERR(true, "Failed to set connlabel length");
>>> -     } else {
>>> -             ovs_net->xt_label = true;
>>> -     }
>>> -
>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>        return ovs_ct_limit_init(net, ovs_net);
>>>    #else
>>>        return 0;
>>> @@ -2021,12 +2018,9 @@ int ovs_ct_init(struct net *net)
>>>
>>>    void ovs_ct_exit(struct net *net)
>>>    {
>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>>
>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>        ovs_ct_limit_exit(net, ovs_net);
>>>    #endif
>>> -
>>> -     if (ovs_net->xt_label)
>>> -             nf_connlabels_put(net);
>>>    }
>>> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
>>> index 9ca6231ea647..365b9bb7f546 100644
>>> --- a/net/openvswitch/datapath.h
>>> +++ b/net/openvswitch/datapath.h
>>> @@ -160,9 +160,6 @@ struct ovs_net {
>>>    #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>        struct ovs_ct_limit_info *ct_limit_info;
>>>    #endif
>>> -
>>> -     /* Module reference for configuring conntrack. */
>>> -     bool xt_label;
>>>    };
>>>
>>>    /**
>>


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2025-02-25  1:38     ` Jianbo Liu
@ 2025-02-25 14:57       ` Xin Long
  2025-03-02 18:22         ` Xin Long
  0 siblings, 1 reply; 17+ messages in thread
From: Xin Long @ 2025-02-25 14:57 UTC (permalink / raw)
  To: Jianbo Liu
  Cc: network dev, dev, ovs-dev, davem, kuba, Eric Dumazet, Paolo Abeni,
	Pravin B Shelar, Ilya Maximets, Aaron Conole, Florian Westphal

On Mon, Feb 24, 2025 at 8:38 PM Jianbo Liu <jianbol@nvidia.com> wrote:
>
>
>
> On 2/25/2025 3:55 AM, Xin Long wrote:
> > On Mon, Feb 24, 2025 at 4:01 AM Jianbo Liu <jianbol@nvidia.com> wrote:
> >>
> >>
> >>
> >> On 8/13/2024 1:17 AM, Xin Long wrote:
> >>> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
> >>> label counting"), we should also switch to per-action label counting
> >>> in openvswitch conntrack, as Florian suggested.
> >>>
> >>> The difference is that nf_connlabels_get() is called unconditionally
> >>> when creating an ct action in ovs_ct_copy_action(). As with these
> >>> flows:
> >>>
> >>>     table=0,ip,actions=ct(commit,table=1)
> >>>     table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
> >>>
> >>> it needs to make sure the label ext is created in the 1st flow before
> >>> the ct is committed in ovs_ct_commit(). Otherwise, the warning in
> >>> nf_ct_ext_add() when creating the label ext in the 2nd flow will
> >>> be triggered:
> >>>
> >>>      WARN_ON(nf_ct_is_confirmed(ct));
> >>>
> >>
> >> Hi Xin Long,
> >>
> >> The ct can be committed before openvswitch handles packets with CT
> >> actions. And we can trigger the warning by creating VF and running ping
> >> over it with the following configurations:
> >>
> >> ovs-vsctl add-br br
> >> ovs-vsctl add-port br eth2
> >> ovs-vsctl add-port br eth4
> >> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
> >> actions=ct(table=1)"
> >> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_state=+trk+new
> >> actions=ct(exec(set_field:0xef7d->ct_label), commit), output:eth2"
> >> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_label=0xef7d,
> >> ct_state=+trk+est actions=output:eth2"
> >>
> >> The eth2 is PF, and eth4 is VF's representor.
> >> Would you like to fix it?
> > Hi, Jianbo,
> >
> > Sure, we have to attach a new ct to the skb in __ovs_ct_lookup() for
> > this case, and even delete the one created before ovs_ct.
> >
> > Can you check if this works on your env?
>
> Yes, it works.
> Could you please submit the formal patch? Thanks!
Great, I will post after running some of my local tests.

>
> >
> > diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> > index 3bb4810234aa..c599ee013dfe 100644
> > --- a/net/openvswitch/conntrack.c
> > +++ b/net/openvswitch/conntrack.c
> > @@ -595,6 +595,11 @@ static bool skb_nfct_cached(struct net *net,
> >               rcu_dereference(timeout_ext->timeout))
> >               return false;
> >       }
> > +    if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && !nf_ct_labels_find(ct)) {
> > +        if (nf_ct_is_confirmed(ct))
> > +            nf_ct_delete(ct, 0, 0);
> > +        return false;
> > +    }
> >
> > Thanks.
> >
> >>
> >> Thanks!
> >> Jianbo
> >>
> >>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
> >>> ---
> >>> v2: move ovs_net into #if in ovs_ct_exit() as Jakub noticed.
> >>> ---
> >>>    net/openvswitch/conntrack.c | 30 ++++++++++++------------------
> >>>    net/openvswitch/datapath.h  |  3 ---
> >>>    2 files changed, 12 insertions(+), 21 deletions(-)
> >>>
> >>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> >>> index 8eb1d644b741..a3da5ee34f92 100644
> >>> --- a/net/openvswitch/conntrack.c
> >>> +++ b/net/openvswitch/conntrack.c
> >>> @@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
> >>>            attr == OVS_KEY_ATTR_CT_MARK)
> >>>                return true;
> >>>        if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
> >>> -         attr == OVS_KEY_ATTR_CT_LABELS) {
> >>> -             struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>> -
> >>> -             return ovs_net->xt_label;
> >>> -     }
> >>> +         attr == OVS_KEY_ATTR_CT_LABELS)
> >>> +             return true;
> >>>
> >>>        return false;
> >>>    }
> >>> @@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
> >>>                       const struct sw_flow_key *key,
> >>>                       struct sw_flow_actions **sfa,  bool log)
> >>>    {
> >>> +     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> >>>        struct ovs_conntrack_info ct_info;
> >>>        const char *helper = NULL;
> >>>        u16 family;
> >>> @@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
> >>>                return -ENOMEM;
> >>>        }
> >>>
> >>> +     if (nf_connlabels_get(net, n_bits - 1)) {
> >>> +             nf_ct_tmpl_free(ct_info.ct);
> >>> +             OVS_NLERR(log, "Failed to set connlabel length");
> >>> +             return -EOPNOTSUPP;
> >>> +     }
> >>> +
> >>>        if (ct_info.timeout[0]) {
> >>>                if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
> >>>                                      ct_info.timeout))
> >>> @@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
> >>>        if (ct_info->ct) {
> >>>                if (ct_info->timeout[0])
> >>>                        nf_ct_destroy_timeout(ct_info->ct);
> >>> +             nf_connlabels_put(nf_ct_net(ct_info->ct));
> >>>                nf_ct_tmpl_free(ct_info->ct);
> >>>        }
> >>>    }
> >>> @@ -2002,17 +2007,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
> >>>
> >>>    int ovs_ct_init(struct net *net)
> >>>    {
> >>> -     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> >>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>>
> >>> -     if (nf_connlabels_get(net, n_bits - 1)) {
> >>> -             ovs_net->xt_label = false;
> >>> -             OVS_NLERR(true, "Failed to set connlabel length");
> >>> -     } else {
> >>> -             ovs_net->xt_label = true;
> >>> -     }
> >>> -
> >>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>        return ovs_ct_limit_init(net, ovs_net);
> >>>    #else
> >>>        return 0;
> >>> @@ -2021,12 +2018,9 @@ int ovs_ct_init(struct net *net)
> >>>
> >>>    void ovs_ct_exit(struct net *net)
> >>>    {
> >>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>>
> >>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>        ovs_ct_limit_exit(net, ovs_net);
> >>>    #endif
> >>> -
> >>> -     if (ovs_net->xt_label)
> >>> -             nf_connlabels_put(net);
> >>>    }
> >>> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> >>> index 9ca6231ea647..365b9bb7f546 100644
> >>> --- a/net/openvswitch/datapath.h
> >>> +++ b/net/openvswitch/datapath.h
> >>> @@ -160,9 +160,6 @@ struct ovs_net {
> >>>    #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>        struct ovs_ct_limit_info *ct_limit_info;
> >>>    #endif
> >>> -
> >>> -     /* Module reference for configuring conntrack. */
> >>> -     bool xt_label;
> >>>    };
> >>>
> >>>    /**
> >>
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2025-02-25 14:57       ` Xin Long
@ 2025-03-02 18:22         ` Xin Long
  2025-03-03  2:14           ` Jianbo Liu
  2025-03-03 10:56           ` Ilya Maximets
  0 siblings, 2 replies; 17+ messages in thread
From: Xin Long @ 2025-03-02 18:22 UTC (permalink / raw)
  To: Jianbo Liu
  Cc: network dev, dev, ovs-dev, davem, kuba, Eric Dumazet, Paolo Abeni,
	Pravin B Shelar, Ilya Maximets, Aaron Conole, Florian Westphal

On Tue, Feb 25, 2025 at 9:57 AM Xin Long <lucien.xin@gmail.com> wrote:
>
> On Mon, Feb 24, 2025 at 8:38 PM Jianbo Liu <jianbol@nvidia.com> wrote:
> >
> >
> >
> > On 2/25/2025 3:55 AM, Xin Long wrote:
> > > On Mon, Feb 24, 2025 at 4:01 AM Jianbo Liu <jianbol@nvidia.com> wrote:
> > >>
> > >>
> > >>
> > >> On 8/13/2024 1:17 AM, Xin Long wrote:
> > >>> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
> > >>> label counting"), we should also switch to per-action label counting
> > >>> in openvswitch conntrack, as Florian suggested.
> > >>>
> > >>> The difference is that nf_connlabels_get() is called unconditionally
> > >>> when creating an ct action in ovs_ct_copy_action(). As with these
> > >>> flows:
> > >>>
> > >>>     table=0,ip,actions=ct(commit,table=1)
> > >>>     table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
> > >>>
> > >>> it needs to make sure the label ext is created in the 1st flow before
> > >>> the ct is committed in ovs_ct_commit(). Otherwise, the warning in
> > >>> nf_ct_ext_add() when creating the label ext in the 2nd flow will
> > >>> be triggered:
> > >>>
> > >>>      WARN_ON(nf_ct_is_confirmed(ct));
> > >>>
> > >>
> > >> Hi Xin Long,
> > >>
> > >> The ct can be committed before openvswitch handles packets with CT
> > >> actions. And we can trigger the warning by creating VF and running ping
> > >> over it with the following configurations:
> > >>
> > >> ovs-vsctl add-br br
> > >> ovs-vsctl add-port br eth2
> > >> ovs-vsctl add-port br eth4
> > >> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
> > >> actions=ct(table=1)"
> > >> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_state=+trk+new
> > >> actions=ct(exec(set_field:0xef7d->ct_label), commit), output:eth2"
> > >> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_label=0xef7d,
> > >> ct_state=+trk+est actions=output:eth2"
> > >>
> > >> The eth2 is PF, and eth4 is VF's representor.
> > >> Would you like to fix it?
> > > Hi, Jianbo,
> > >
> > > Sure, we have to attach a new ct to the skb in __ovs_ct_lookup() for
> > > this case, and even delete the one created before ovs_ct.
> > >
> > > Can you check if this works on your env?
> >
> > Yes, it works.
> > Could you please submit the formal patch? Thanks!
> Great, I will post after running some of my local tests.
>
Hi Jianbo,

I recently ran some tests and observed that the current approach cannot
completely avoid the warning. If an skb enters __ovs_ct_lookup() without
an attached connection tracking (ct) entry, it may still acquire an
existing ct created outside of OVS (possibly by netfilter) through
nf_conntrack_in(). This will trigger the warning in ovs_ct_set_labels().

Deleting a ct created outside OVS and creating a new one within
__ovs_ct_lookup() doesn't seem reasonable and would be difficult to
implement. However, since OVS is not supposed to use ct entries created
externally, I believe ct zones can be used to prevent this issue.
In your case, the following flows should work:

ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
actions=ct(table=1,zone=1)"
ovs-ofctl add-flow br "table=1,
in_port=eth4,ip,ct_state=+trk+new,ct_zone=1
actions=ct(exec(set_field:0xef7d->ct_label),commit,zone=1),
output:eth2"
ovs-ofctl add-flow br "table=1,
in_port=eth4,ip,ct_label=0xef7d,ct_state=+trk+est,ct_zone=1
actions=output:eth2"

Regarding the warning triggered by externally created ct entries, I plan
to remove the ovs_ct_get_conn_labels() call from ovs_ct_set_labels() and
I'll let nf_connlabels_replace() return an error in such cases, similar
to how tcf_ct_act_set_labels() handles this scenario in tc act_ct.

Thanks.
> >
> > >
> > > diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> > > index 3bb4810234aa..c599ee013dfe 100644
> > > --- a/net/openvswitch/conntrack.c
> > > +++ b/net/openvswitch/conntrack.c
> > > @@ -595,6 +595,11 @@ static bool skb_nfct_cached(struct net *net,
> > >               rcu_dereference(timeout_ext->timeout))
> > >               return false;
> > >       }
> > > +    if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && !nf_ct_labels_find(ct)) {
> > > +        if (nf_ct_is_confirmed(ct))
> > > +            nf_ct_delete(ct, 0, 0);
> > > +        return false;
> > > +    }
> > >
> > > Thanks.
> > >
> > >>
> > >> Thanks!
> > >> Jianbo
> > >>
> > >>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
> > >>> ---
> > >>> v2: move ovs_net into #if in ovs_ct_exit() as Jakub noticed.
> > >>> ---
> > >>>    net/openvswitch/conntrack.c | 30 ++++++++++++------------------
> > >>>    net/openvswitch/datapath.h  |  3 ---
> > >>>    2 files changed, 12 insertions(+), 21 deletions(-)
> > >>>
> > >>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> > >>> index 8eb1d644b741..a3da5ee34f92 100644
> > >>> --- a/net/openvswitch/conntrack.c
> > >>> +++ b/net/openvswitch/conntrack.c
> > >>> @@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
> > >>>            attr == OVS_KEY_ATTR_CT_MARK)
> > >>>                return true;
> > >>>        if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
> > >>> -         attr == OVS_KEY_ATTR_CT_LABELS) {
> > >>> -             struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> > >>> -
> > >>> -             return ovs_net->xt_label;
> > >>> -     }
> > >>> +         attr == OVS_KEY_ATTR_CT_LABELS)
> > >>> +             return true;
> > >>>
> > >>>        return false;
> > >>>    }
> > >>> @@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
> > >>>                       const struct sw_flow_key *key,
> > >>>                       struct sw_flow_actions **sfa,  bool log)
> > >>>    {
> > >>> +     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> > >>>        struct ovs_conntrack_info ct_info;
> > >>>        const char *helper = NULL;
> > >>>        u16 family;
> > >>> @@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
> > >>>                return -ENOMEM;
> > >>>        }
> > >>>
> > >>> +     if (nf_connlabels_get(net, n_bits - 1)) {
> > >>> +             nf_ct_tmpl_free(ct_info.ct);
> > >>> +             OVS_NLERR(log, "Failed to set connlabel length");
> > >>> +             return -EOPNOTSUPP;
> > >>> +     }
> > >>> +
> > >>>        if (ct_info.timeout[0]) {
> > >>>                if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
> > >>>                                      ct_info.timeout))
> > >>> @@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
> > >>>        if (ct_info->ct) {
> > >>>                if (ct_info->timeout[0])
> > >>>                        nf_ct_destroy_timeout(ct_info->ct);
> > >>> +             nf_connlabels_put(nf_ct_net(ct_info->ct));
> > >>>                nf_ct_tmpl_free(ct_info->ct);
> > >>>        }
> > >>>    }
> > >>> @@ -2002,17 +2007,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
> > >>>
> > >>>    int ovs_ct_init(struct net *net)
> > >>>    {
> > >>> -     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> > >>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> > >>>        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> > >>>
> > >>> -     if (nf_connlabels_get(net, n_bits - 1)) {
> > >>> -             ovs_net->xt_label = false;
> > >>> -             OVS_NLERR(true, "Failed to set connlabel length");
> > >>> -     } else {
> > >>> -             ovs_net->xt_label = true;
> > >>> -     }
> > >>> -
> > >>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> > >>>        return ovs_ct_limit_init(net, ovs_net);
> > >>>    #else
> > >>>        return 0;
> > >>> @@ -2021,12 +2018,9 @@ int ovs_ct_init(struct net *net)
> > >>>
> > >>>    void ovs_ct_exit(struct net *net)
> > >>>    {
> > >>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> > >>>        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> > >>>
> > >>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> > >>>        ovs_ct_limit_exit(net, ovs_net);
> > >>>    #endif
> > >>> -
> > >>> -     if (ovs_net->xt_label)
> > >>> -             nf_connlabels_put(net);
> > >>>    }
> > >>> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> > >>> index 9ca6231ea647..365b9bb7f546 100644
> > >>> --- a/net/openvswitch/datapath.h
> > >>> +++ b/net/openvswitch/datapath.h
> > >>> @@ -160,9 +160,6 @@ struct ovs_net {
> > >>>    #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> > >>>        struct ovs_ct_limit_info *ct_limit_info;
> > >>>    #endif
> > >>> -
> > >>> -     /* Module reference for configuring conntrack. */
> > >>> -     bool xt_label;
> > >>>    };
> > >>>
> > >>>    /**
> > >>
> >

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2025-03-02 18:22         ` Xin Long
@ 2025-03-03  2:14           ` Jianbo Liu
  2025-03-03 16:42             ` Xin Long
  2025-03-03 10:56           ` Ilya Maximets
  1 sibling, 1 reply; 17+ messages in thread
From: Jianbo Liu @ 2025-03-03  2:14 UTC (permalink / raw)
  To: Xin Long
  Cc: network dev, dev, ovs-dev, davem, kuba, Eric Dumazet, Paolo Abeni,
	Pravin B Shelar, Ilya Maximets, Aaron Conole, Florian Westphal



On 3/3/2025 2:22 AM, Xin Long wrote:
> On Tue, Feb 25, 2025 at 9:57 AM Xin Long <lucien.xin@gmail.com> wrote:
>>
>> On Mon, Feb 24, 2025 at 8:38 PM Jianbo Liu <jianbol@nvidia.com> wrote:
>>>
>>>
>>>
>>> On 2/25/2025 3:55 AM, Xin Long wrote:
>>>> On Mon, Feb 24, 2025 at 4:01 AM Jianbo Liu <jianbol@nvidia.com> wrote:
>>>>>
>>>>>
>>>>>
>>>>> On 8/13/2024 1:17 AM, Xin Long wrote:
>>>>>> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
>>>>>> label counting"), we should also switch to per-action label counting
>>>>>> in openvswitch conntrack, as Florian suggested.
>>>>>>
>>>>>> The difference is that nf_connlabels_get() is called unconditionally
>>>>>> when creating an ct action in ovs_ct_copy_action(). As with these
>>>>>> flows:
>>>>>>
>>>>>>      table=0,ip,actions=ct(commit,table=1)
>>>>>>      table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
>>>>>>
>>>>>> it needs to make sure the label ext is created in the 1st flow before
>>>>>> the ct is committed in ovs_ct_commit(). Otherwise, the warning in
>>>>>> nf_ct_ext_add() when creating the label ext in the 2nd flow will
>>>>>> be triggered:
>>>>>>
>>>>>>       WARN_ON(nf_ct_is_confirmed(ct));
>>>>>>
>>>>>
>>>>> Hi Xin Long,
>>>>>
>>>>> The ct can be committed before openvswitch handles packets with CT
>>>>> actions. And we can trigger the warning by creating VF and running ping
>>>>> over it with the following configurations:
>>>>>
>>>>> ovs-vsctl add-br br
>>>>> ovs-vsctl add-port br eth2
>>>>> ovs-vsctl add-port br eth4
>>>>> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
>>>>> actions=ct(table=1)"
>>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_state=+trk+new
>>>>> actions=ct(exec(set_field:0xef7d->ct_label), commit), output:eth2"
>>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_label=0xef7d,
>>>>> ct_state=+trk+est actions=output:eth2"
>>>>>
>>>>> The eth2 is PF, and eth4 is VF's representor.
>>>>> Would you like to fix it?
>>>> Hi, Jianbo,
>>>>
>>>> Sure, we have to attach a new ct to the skb in __ovs_ct_lookup() for
>>>> this case, and even delete the one created before ovs_ct.
>>>>
>>>> Can you check if this works on your env?
>>>
>>> Yes, it works.
>>> Could you please submit the formal patch? Thanks!
>> Great, I will post after running some of my local tests.
>>
> Hi Jianbo,
> 
> I recently ran some tests and observed that the current approach cannot
> completely avoid the warning. If an skb enters __ovs_ct_lookup() without
> an attached connection tracking (ct) entry, it may still acquire an
> existing ct created outside of OVS (possibly by netfilter) through
> nf_conntrack_in(). This will trigger the warning in ovs_ct_set_labels().
> 
> Deleting a ct created outside OVS and creating a new one within
> __ovs_ct_lookup() doesn't seem reasonable and would be difficult to

Yes, I'm also skeptical of your temporary fix, and waiting for your 
formal one.

> implement. However, since OVS is not supposed to use ct entries created
> externally, I believe ct zones can be used to prevent this issue.
> In your case, the following flows should work:
> 
> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
> actions=ct(table=1,zone=1)"
> ovs-ofctl add-flow br "table=1,
> in_port=eth4,ip,ct_state=+trk+new,ct_zone=1
> actions=ct(exec(set_field:0xef7d->ct_label),commit,zone=1),
> output:eth2"
> ovs-ofctl add-flow br "table=1,
> in_port=eth4,ip,ct_label=0xef7d,ct_state=+trk+est,ct_zone=1
> actions=output:eth2"
> 
> Regarding the warning triggered by externally created ct entries, I plan
> to remove the ovs_ct_get_conn_labels() call from ovs_ct_set_labels() and
> I'll let nf_connlabels_replace() return an error in such cases, similar
> to how tcf_ct_act_set_labels() handles this scenario in tc act_ct.
> 

It's a good idea to be consistent with act_ct implementation. But, would 
you like to revert first if it takes long time to work on the fix?

Thanks!

> Thanks.
>>>
>>>>
>>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
>>>> index 3bb4810234aa..c599ee013dfe 100644
>>>> --- a/net/openvswitch/conntrack.c
>>>> +++ b/net/openvswitch/conntrack.c
>>>> @@ -595,6 +595,11 @@ static bool skb_nfct_cached(struct net *net,
>>>>                rcu_dereference(timeout_ext->timeout))
>>>>                return false;
>>>>        }
>>>> +    if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && !nf_ct_labels_find(ct)) {
>>>> +        if (nf_ct_is_confirmed(ct))
>>>> +            nf_ct_delete(ct, 0, 0);
>>>> +        return false;
>>>> +    }
>>>>
>>>> Thanks.
>>>>
>>>>>
>>>>> Thanks!
>>>>> Jianbo
>>>>>
>>>>>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
>>>>>> ---
>>>>>> v2: move ovs_net into #if in ovs_ct_exit() as Jakub noticed.
>>>>>> ---
>>>>>>     net/openvswitch/conntrack.c | 30 ++++++++++++------------------
>>>>>>     net/openvswitch/datapath.h  |  3 ---
>>>>>>     2 files changed, 12 insertions(+), 21 deletions(-)
>>>>>>
>>>>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
>>>>>> index 8eb1d644b741..a3da5ee34f92 100644
>>>>>> --- a/net/openvswitch/conntrack.c
>>>>>> +++ b/net/openvswitch/conntrack.c
>>>>>> @@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
>>>>>>             attr == OVS_KEY_ATTR_CT_MARK)
>>>>>>                 return true;
>>>>>>         if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
>>>>>> -         attr == OVS_KEY_ATTR_CT_LABELS) {
>>>>>> -             struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>>>>> -
>>>>>> -             return ovs_net->xt_label;
>>>>>> -     }
>>>>>> +         attr == OVS_KEY_ATTR_CT_LABELS)
>>>>>> +             return true;
>>>>>>
>>>>>>         return false;
>>>>>>     }
>>>>>> @@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
>>>>>>                        const struct sw_flow_key *key,
>>>>>>                        struct sw_flow_actions **sfa,  bool log)
>>>>>>     {
>>>>>> +     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
>>>>>>         struct ovs_conntrack_info ct_info;
>>>>>>         const char *helper = NULL;
>>>>>>         u16 family;
>>>>>> @@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
>>>>>>                 return -ENOMEM;
>>>>>>         }
>>>>>>
>>>>>> +     if (nf_connlabels_get(net, n_bits - 1)) {
>>>>>> +             nf_ct_tmpl_free(ct_info.ct);
>>>>>> +             OVS_NLERR(log, "Failed to set connlabel length");
>>>>>> +             return -EOPNOTSUPP;
>>>>>> +     }
>>>>>> +
>>>>>>         if (ct_info.timeout[0]) {
>>>>>>                 if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
>>>>>>                                       ct_info.timeout))
>>>>>> @@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
>>>>>>         if (ct_info->ct) {
>>>>>>                 if (ct_info->timeout[0])
>>>>>>                         nf_ct_destroy_timeout(ct_info->ct);
>>>>>> +             nf_connlabels_put(nf_ct_net(ct_info->ct));
>>>>>>                 nf_ct_tmpl_free(ct_info->ct);
>>>>>>         }
>>>>>>     }
>>>>>> @@ -2002,17 +2007,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
>>>>>>
>>>>>>     int ovs_ct_init(struct net *net)
>>>>>>     {
>>>>>> -     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
>>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>>>>>
>>>>>> -     if (nf_connlabels_get(net, n_bits - 1)) {
>>>>>> -             ovs_net->xt_label = false;
>>>>>> -             OVS_NLERR(true, "Failed to set connlabel length");
>>>>>> -     } else {
>>>>>> -             ovs_net->xt_label = true;
>>>>>> -     }
>>>>>> -
>>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>         return ovs_ct_limit_init(net, ovs_net);
>>>>>>     #else
>>>>>>         return 0;
>>>>>> @@ -2021,12 +2018,9 @@ int ovs_ct_init(struct net *net)
>>>>>>
>>>>>>     void ovs_ct_exit(struct net *net)
>>>>>>     {
>>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>>>>>
>>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>         ovs_ct_limit_exit(net, ovs_net);
>>>>>>     #endif
>>>>>> -
>>>>>> -     if (ovs_net->xt_label)
>>>>>> -             nf_connlabels_put(net);
>>>>>>     }
>>>>>> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
>>>>>> index 9ca6231ea647..365b9bb7f546 100644
>>>>>> --- a/net/openvswitch/datapath.h
>>>>>> +++ b/net/openvswitch/datapath.h
>>>>>> @@ -160,9 +160,6 @@ struct ovs_net {
>>>>>>     #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>         struct ovs_ct_limit_info *ct_limit_info;
>>>>>>     #endif
>>>>>> -
>>>>>> -     /* Module reference for configuring conntrack. */
>>>>>> -     bool xt_label;
>>>>>>     };
>>>>>>
>>>>>>     /**
>>>>>
>>>


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2025-03-02 18:22         ` Xin Long
  2025-03-03  2:14           ` Jianbo Liu
@ 2025-03-03 10:56           ` Ilya Maximets
  2025-03-03 16:38             ` Xin Long
  1 sibling, 1 reply; 17+ messages in thread
From: Ilya Maximets @ 2025-03-03 10:56 UTC (permalink / raw)
  To: Xin Long, Jianbo Liu
  Cc: i.maximets, network dev, dev, ovs-dev, davem, kuba, Eric Dumazet,
	Paolo Abeni, Pravin B Shelar, Aaron Conole, Florian Westphal

On 3/2/25 19:22, Xin Long wrote:
> On Tue, Feb 25, 2025 at 9:57 AM Xin Long <lucien.xin@gmail.com> wrote:
>>
>> On Mon, Feb 24, 2025 at 8:38 PM Jianbo Liu <jianbol@nvidia.com> wrote:
>>>
>>>
>>>
>>> On 2/25/2025 3:55 AM, Xin Long wrote:
>>>> On Mon, Feb 24, 2025 at 4:01 AM Jianbo Liu <jianbol@nvidia.com> wrote:
>>>>>
>>>>>
>>>>>
>>>>> On 8/13/2024 1:17 AM, Xin Long wrote:
>>>>>> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
>>>>>> label counting"), we should also switch to per-action label counting
>>>>>> in openvswitch conntrack, as Florian suggested.
>>>>>>
>>>>>> The difference is that nf_connlabels_get() is called unconditionally
>>>>>> when creating an ct action in ovs_ct_copy_action(). As with these
>>>>>> flows:
>>>>>>
>>>>>>     table=0,ip,actions=ct(commit,table=1)
>>>>>>     table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
>>>>>>
>>>>>> it needs to make sure the label ext is created in the 1st flow before
>>>>>> the ct is committed in ovs_ct_commit(). Otherwise, the warning in
>>>>>> nf_ct_ext_add() when creating the label ext in the 2nd flow will
>>>>>> be triggered:
>>>>>>
>>>>>>      WARN_ON(nf_ct_is_confirmed(ct));
>>>>>>
>>>>>
>>>>> Hi Xin Long,
>>>>>
>>>>> The ct can be committed before openvswitch handles packets with CT
>>>>> actions. And we can trigger the warning by creating VF and running ping
>>>>> over it with the following configurations:
>>>>>
>>>>> ovs-vsctl add-br br
>>>>> ovs-vsctl add-port br eth2
>>>>> ovs-vsctl add-port br eth4
>>>>> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
>>>>> actions=ct(table=1)"
>>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_state=+trk+new
>>>>> actions=ct(exec(set_field:0xef7d->ct_label), commit), output:eth2"
>>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_label=0xef7d,
>>>>> ct_state=+trk+est actions=output:eth2"
>>>>>
>>>>> The eth2 is PF, and eth4 is VF's representor.
>>>>> Would you like to fix it?
>>>> Hi, Jianbo,
>>>>
>>>> Sure, we have to attach a new ct to the skb in __ovs_ct_lookup() for
>>>> this case, and even delete the one created before ovs_ct.
>>>>
>>>> Can you check if this works on your env?
>>>
>>> Yes, it works.
>>> Could you please submit the formal patch? Thanks!
>> Great, I will post after running some of my local tests.
>>
> Hi Jianbo,
> 
> I recently ran some tests and observed that the current approach cannot
> completely avoid the warning. If an skb enters __ovs_ct_lookup() without
> an attached connection tracking (ct) entry, it may still acquire an
> existing ct created outside of OVS (possibly by netfilter) through
> nf_conntrack_in(). This will trigger the warning in ovs_ct_set_labels().
> 
> Deleting a ct created outside OVS and creating a new one within
> __ovs_ct_lookup() doesn't seem reasonable and would be difficult to
> implement. However, since OVS is not supposed to use ct entries created
> externally,

I'm not fully following this conversation, but this statement doesn't
seem right.  OVS should be able to use ct entries created externally.
i.e. if the packet already has some ct entry attached while entering
OVS datapth, OVS should be able to match on the content of that entry.

> I believe ct zones can be used to prevent this issue.
> In your case, the following flows should work:
> 
> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
> actions=ct(table=1,zone=1)"
> ovs-ofctl add-flow br "table=1,
> in_port=eth4,ip,ct_state=+trk+new,ct_zone=1
> actions=ct(exec(set_field:0xef7d->ct_label),commit,zone=1),
> output:eth2"
> ovs-ofctl add-flow br "table=1,
> in_port=eth4,ip,ct_label=0xef7d,ct_state=+trk+est,ct_zone=1
> actions=output:eth2"
> 
> Regarding the warning triggered by externally created ct entries, I plan
> to remove the ovs_ct_get_conn_labels() call from ovs_ct_set_labels() and
> I'll let nf_connlabels_replace() return an error in such cases, similar
> to how tcf_ct_act_set_labels() handles this scenario in tc act_ct.
> 
> Thanks.
>>>
>>>>
>>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
>>>> index 3bb4810234aa..c599ee013dfe 100644
>>>> --- a/net/openvswitch/conntrack.c
>>>> +++ b/net/openvswitch/conntrack.c
>>>> @@ -595,6 +595,11 @@ static bool skb_nfct_cached(struct net *net,
>>>>               rcu_dereference(timeout_ext->timeout))
>>>>               return false;
>>>>       }
>>>> +    if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && !nf_ct_labels_find(ct)) {
>>>> +        if (nf_ct_is_confirmed(ct))
>>>> +            nf_ct_delete(ct, 0, 0);
>>>> +        return false;
>>>> +    }
>>>>
>>>> Thanks.
>>>>
>>>>>
>>>>> Thanks!
>>>>> Jianbo
>>>>>
>>>>>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
>>>>>> ---
>>>>>> v2: move ovs_net into #if in ovs_ct_exit() as Jakub noticed.
>>>>>> ---
>>>>>>    net/openvswitch/conntrack.c | 30 ++++++++++++------------------
>>>>>>    net/openvswitch/datapath.h  |  3 ---
>>>>>>    2 files changed, 12 insertions(+), 21 deletions(-)
>>>>>>
>>>>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
>>>>>> index 8eb1d644b741..a3da5ee34f92 100644
>>>>>> --- a/net/openvswitch/conntrack.c
>>>>>> +++ b/net/openvswitch/conntrack.c
>>>>>> @@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
>>>>>>            attr == OVS_KEY_ATTR_CT_MARK)
>>>>>>                return true;
>>>>>>        if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
>>>>>> -         attr == OVS_KEY_ATTR_CT_LABELS) {
>>>>>> -             struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>>>>> -
>>>>>> -             return ovs_net->xt_label;
>>>>>> -     }
>>>>>> +         attr == OVS_KEY_ATTR_CT_LABELS)
>>>>>> +             return true;
>>>>>>
>>>>>>        return false;
>>>>>>    }
>>>>>> @@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
>>>>>>                       const struct sw_flow_key *key,
>>>>>>                       struct sw_flow_actions **sfa,  bool log)
>>>>>>    {
>>>>>> +     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
>>>>>>        struct ovs_conntrack_info ct_info;
>>>>>>        const char *helper = NULL;
>>>>>>        u16 family;
>>>>>> @@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
>>>>>>                return -ENOMEM;
>>>>>>        }
>>>>>>
>>>>>> +     if (nf_connlabels_get(net, n_bits - 1)) {
>>>>>> +             nf_ct_tmpl_free(ct_info.ct);
>>>>>> +             OVS_NLERR(log, "Failed to set connlabel length");
>>>>>> +             return -EOPNOTSUPP;
>>>>>> +     }
>>>>>> +
>>>>>>        if (ct_info.timeout[0]) {
>>>>>>                if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
>>>>>>                                      ct_info.timeout))
>>>>>> @@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
>>>>>>        if (ct_info->ct) {
>>>>>>                if (ct_info->timeout[0])
>>>>>>                        nf_ct_destroy_timeout(ct_info->ct);
>>>>>> +             nf_connlabels_put(nf_ct_net(ct_info->ct));
>>>>>>                nf_ct_tmpl_free(ct_info->ct);
>>>>>>        }
>>>>>>    }
>>>>>> @@ -2002,17 +2007,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
>>>>>>
>>>>>>    int ovs_ct_init(struct net *net)
>>>>>>    {
>>>>>> -     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
>>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>>>>>
>>>>>> -     if (nf_connlabels_get(net, n_bits - 1)) {
>>>>>> -             ovs_net->xt_label = false;
>>>>>> -             OVS_NLERR(true, "Failed to set connlabel length");
>>>>>> -     } else {
>>>>>> -             ovs_net->xt_label = true;
>>>>>> -     }
>>>>>> -
>>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>        return ovs_ct_limit_init(net, ovs_net);
>>>>>>    #else
>>>>>>        return 0;
>>>>>> @@ -2021,12 +2018,9 @@ int ovs_ct_init(struct net *net)
>>>>>>
>>>>>>    void ovs_ct_exit(struct net *net)
>>>>>>    {
>>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>>>>>
>>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>        ovs_ct_limit_exit(net, ovs_net);
>>>>>>    #endif
>>>>>> -
>>>>>> -     if (ovs_net->xt_label)
>>>>>> -             nf_connlabels_put(net);
>>>>>>    }
>>>>>> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
>>>>>> index 9ca6231ea647..365b9bb7f546 100644
>>>>>> --- a/net/openvswitch/datapath.h
>>>>>> +++ b/net/openvswitch/datapath.h
>>>>>> @@ -160,9 +160,6 @@ struct ovs_net {
>>>>>>    #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>        struct ovs_ct_limit_info *ct_limit_info;
>>>>>>    #endif
>>>>>> -
>>>>>> -     /* Module reference for configuring conntrack. */
>>>>>> -     bool xt_label;
>>>>>>    };
>>>>>>
>>>>>>    /**
>>>>>
>>>


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2025-03-03 10:56           ` Ilya Maximets
@ 2025-03-03 16:38             ` Xin Long
  2025-03-04 13:23               ` Ilya Maximets
  0 siblings, 1 reply; 17+ messages in thread
From: Xin Long @ 2025-03-03 16:38 UTC (permalink / raw)
  To: Ilya Maximets
  Cc: Jianbo Liu, network dev, dev, ovs-dev, davem, kuba, Eric Dumazet,
	Paolo Abeni, Pravin B Shelar, Aaron Conole, Florian Westphal

On Mon, Mar 3, 2025 at 5:56 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 3/2/25 19:22, Xin Long wrote:
> > On Tue, Feb 25, 2025 at 9:57 AM Xin Long <lucien.xin@gmail.com> wrote:
> >>
> >> On Mon, Feb 24, 2025 at 8:38 PM Jianbo Liu <jianbol@nvidia.com> wrote:
> >>>
> >>>
> >>>
> >>> On 2/25/2025 3:55 AM, Xin Long wrote:
> >>>> On Mon, Feb 24, 2025 at 4:01 AM Jianbo Liu <jianbol@nvidia.com> wrote:
> >>>>>
> >>>>>
> >>>>>
> >>>>> On 8/13/2024 1:17 AM, Xin Long wrote:
> >>>>>> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
> >>>>>> label counting"), we should also switch to per-action label counting
> >>>>>> in openvswitch conntrack, as Florian suggested.
> >>>>>>
> >>>>>> The difference is that nf_connlabels_get() is called unconditionally
> >>>>>> when creating an ct action in ovs_ct_copy_action(). As with these
> >>>>>> flows:
> >>>>>>
> >>>>>>     table=0,ip,actions=ct(commit,table=1)
> >>>>>>     table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
> >>>>>>
> >>>>>> it needs to make sure the label ext is created in the 1st flow before
> >>>>>> the ct is committed in ovs_ct_commit(). Otherwise, the warning in
> >>>>>> nf_ct_ext_add() when creating the label ext in the 2nd flow will
> >>>>>> be triggered:
> >>>>>>
> >>>>>>      WARN_ON(nf_ct_is_confirmed(ct));
> >>>>>>
> >>>>>
> >>>>> Hi Xin Long,
> >>>>>
> >>>>> The ct can be committed before openvswitch handles packets with CT
> >>>>> actions. And we can trigger the warning by creating VF and running ping
> >>>>> over it with the following configurations:
> >>>>>
> >>>>> ovs-vsctl add-br br
> >>>>> ovs-vsctl add-port br eth2
> >>>>> ovs-vsctl add-port br eth4
> >>>>> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
> >>>>> actions=ct(table=1)"
> >>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_state=+trk+new
> >>>>> actions=ct(exec(set_field:0xef7d->ct_label), commit), output:eth2"
> >>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_label=0xef7d,
> >>>>> ct_state=+trk+est actions=output:eth2"
> >>>>>
> >>>>> The eth2 is PF, and eth4 is VF's representor.
> >>>>> Would you like to fix it?
> >>>> Hi, Jianbo,
> >>>>
> >>>> Sure, we have to attach a new ct to the skb in __ovs_ct_lookup() for
> >>>> this case, and even delete the one created before ovs_ct.
> >>>>
> >>>> Can you check if this works on your env?
> >>>
> >>> Yes, it works.
> >>> Could you please submit the formal patch? Thanks!
> >> Great, I will post after running some of my local tests.
> >>
> > Hi Jianbo,
> >
> > I recently ran some tests and observed that the current approach cannot
> > completely avoid the warning. If an skb enters __ovs_ct_lookup() without
> > an attached connection tracking (ct) entry, it may still acquire an
> > existing ct created outside of OVS (possibly by netfilter) through
> > nf_conntrack_in(). This will trigger the warning in ovs_ct_set_labels().
> >
> > Deleting a ct created outside OVS and creating a new one within
> > __ovs_ct_lookup() doesn't seem reasonable and would be difficult to
> > implement. However, since OVS is not supposed to use ct entries created
> > externally,
>
> I'm not fully following this conversation, but this statement doesn't
> seem right.  OVS should be able to use ct entries created externally.
> i.e. if the packet already has some ct entry attached while entering
> OVS datapth, OVS should be able to match on the content of that entry.
>
Hi Ilya,

Thank you for your comment.

If OVS should use conntrack (ct) entries created externally, features
relying on NF_CT_EXT_XXX may not work on such entries. The NF_CT_EXT_LABELS
extension discussed in this thread is one example. Other extensions like
NF_CT_EXT_HELPER, NF_CT_EXT_ECACHE, and NF_CT_EXT_TIMEOUT may also be
affected.

If the ct entry already exists before the OVS flow with ct actions is
created, the entry might not have the required NF_CT_EXT extension as
it was not created with the tmpl set by OVS, even though the OVS flow
requests it.

As far as I know, using a separate conntrack zone to avoid reusing the
existing ct entry can work around this issue.

Let me know if I missed something, or do you have any better solutions?

Thanks.

> > I believe ct zones can be used to prevent this issue.
> > In your case, the following flows should work:
> >
> > ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
> > actions=ct(table=1,zone=1)"
> > ovs-ofctl add-flow br "table=1,
> > in_port=eth4,ip,ct_state=+trk+new,ct_zone=1
> > actions=ct(exec(set_field:0xef7d->ct_label),commit,zone=1),
> > output:eth2"
> > ovs-ofctl add-flow br "table=1,
> > in_port=eth4,ip,ct_label=0xef7d,ct_state=+trk+est,ct_zone=1
> > actions=output:eth2"
> >
> > Regarding the warning triggered by externally created ct entries, I plan
> > to remove the ovs_ct_get_conn_labels() call from ovs_ct_set_labels() and
> > I'll let nf_connlabels_replace() return an error in such cases, similar
> > to how tcf_ct_act_set_labels() handles this scenario in tc act_ct.
> >
> > Thanks.
> >>>
> >>>>
> >>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> >>>> index 3bb4810234aa..c599ee013dfe 100644
> >>>> --- a/net/openvswitch/conntrack.c
> >>>> +++ b/net/openvswitch/conntrack.c
> >>>> @@ -595,6 +595,11 @@ static bool skb_nfct_cached(struct net *net,
> >>>>               rcu_dereference(timeout_ext->timeout))
> >>>>               return false;
> >>>>       }
> >>>> +    if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && !nf_ct_labels_find(ct)) {
> >>>> +        if (nf_ct_is_confirmed(ct))
> >>>> +            nf_ct_delete(ct, 0, 0);
> >>>> +        return false;
> >>>> +    }
> >>>>
> >>>> Thanks.
> >>>>
> >>>>>
> >>>>> Thanks!
> >>>>> Jianbo
> >>>>>
> >>>>>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
> >>>>>> ---
> >>>>>> v2: move ovs_net into #if in ovs_ct_exit() as Jakub noticed.
> >>>>>> ---
> >>>>>>    net/openvswitch/conntrack.c | 30 ++++++++++++------------------
> >>>>>>    net/openvswitch/datapath.h  |  3 ---
> >>>>>>    2 files changed, 12 insertions(+), 21 deletions(-)
> >>>>>>
> >>>>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> >>>>>> index 8eb1d644b741..a3da5ee34f92 100644
> >>>>>> --- a/net/openvswitch/conntrack.c
> >>>>>> +++ b/net/openvswitch/conntrack.c
> >>>>>> @@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
> >>>>>>            attr == OVS_KEY_ATTR_CT_MARK)
> >>>>>>                return true;
> >>>>>>        if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
> >>>>>> -         attr == OVS_KEY_ATTR_CT_LABELS) {
> >>>>>> -             struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>>>>> -
> >>>>>> -             return ovs_net->xt_label;
> >>>>>> -     }
> >>>>>> +         attr == OVS_KEY_ATTR_CT_LABELS)
> >>>>>> +             return true;
> >>>>>>
> >>>>>>        return false;
> >>>>>>    }
> >>>>>> @@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
> >>>>>>                       const struct sw_flow_key *key,
> >>>>>>                       struct sw_flow_actions **sfa,  bool log)
> >>>>>>    {
> >>>>>> +     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> >>>>>>        struct ovs_conntrack_info ct_info;
> >>>>>>        const char *helper = NULL;
> >>>>>>        u16 family;
> >>>>>> @@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
> >>>>>>                return -ENOMEM;
> >>>>>>        }
> >>>>>>
> >>>>>> +     if (nf_connlabels_get(net, n_bits - 1)) {
> >>>>>> +             nf_ct_tmpl_free(ct_info.ct);
> >>>>>> +             OVS_NLERR(log, "Failed to set connlabel length");
> >>>>>> +             return -EOPNOTSUPP;
> >>>>>> +     }
> >>>>>> +
> >>>>>>        if (ct_info.timeout[0]) {
> >>>>>>                if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
> >>>>>>                                      ct_info.timeout))
> >>>>>> @@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
> >>>>>>        if (ct_info->ct) {
> >>>>>>                if (ct_info->timeout[0])
> >>>>>>                        nf_ct_destroy_timeout(ct_info->ct);
> >>>>>> +             nf_connlabels_put(nf_ct_net(ct_info->ct));
> >>>>>>                nf_ct_tmpl_free(ct_info->ct);
> >>>>>>        }
> >>>>>>    }
> >>>>>> @@ -2002,17 +2007,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
> >>>>>>
> >>>>>>    int ovs_ct_init(struct net *net)
> >>>>>>    {
> >>>>>> -     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> >>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>>>>>
> >>>>>> -     if (nf_connlabels_get(net, n_bits - 1)) {
> >>>>>> -             ovs_net->xt_label = false;
> >>>>>> -             OVS_NLERR(true, "Failed to set connlabel length");
> >>>>>> -     } else {
> >>>>>> -             ovs_net->xt_label = true;
> >>>>>> -     }
> >>>>>> -
> >>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>        return ovs_ct_limit_init(net, ovs_net);
> >>>>>>    #else
> >>>>>>        return 0;
> >>>>>> @@ -2021,12 +2018,9 @@ int ovs_ct_init(struct net *net)
> >>>>>>
> >>>>>>    void ovs_ct_exit(struct net *net)
> >>>>>>    {
> >>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>>>>>
> >>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>        ovs_ct_limit_exit(net, ovs_net);
> >>>>>>    #endif
> >>>>>> -
> >>>>>> -     if (ovs_net->xt_label)
> >>>>>> -             nf_connlabels_put(net);
> >>>>>>    }
> >>>>>> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> >>>>>> index 9ca6231ea647..365b9bb7f546 100644
> >>>>>> --- a/net/openvswitch/datapath.h
> >>>>>> +++ b/net/openvswitch/datapath.h
> >>>>>> @@ -160,9 +160,6 @@ struct ovs_net {
> >>>>>>    #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>        struct ovs_ct_limit_info *ct_limit_info;
> >>>>>>    #endif
> >>>>>> -
> >>>>>> -     /* Module reference for configuring conntrack. */
> >>>>>> -     bool xt_label;
> >>>>>>    };
> >>>>>>
> >>>>>>    /**
> >>>>>
> >>>
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2025-03-03  2:14           ` Jianbo Liu
@ 2025-03-03 16:42             ` Xin Long
  2025-03-04  1:20               ` Jianbo Liu
  0 siblings, 1 reply; 17+ messages in thread
From: Xin Long @ 2025-03-03 16:42 UTC (permalink / raw)
  To: Jianbo Liu
  Cc: network dev, dev, ovs-dev, davem, kuba, Eric Dumazet, Paolo Abeni,
	Pravin B Shelar, Ilya Maximets, Aaron Conole, Florian Westphal

On Sun, Mar 2, 2025 at 9:14 PM Jianbo Liu <jianbol@nvidia.com> wrote:
>
>
>
> On 3/3/2025 2:22 AM, Xin Long wrote:
> > On Tue, Feb 25, 2025 at 9:57 AM Xin Long <lucien.xin@gmail.com> wrote:
> >>
> >> On Mon, Feb 24, 2025 at 8:38 PM Jianbo Liu <jianbol@nvidia.com> wrote:
> >>>
> >>>
> >>>
> >>> On 2/25/2025 3:55 AM, Xin Long wrote:
> >>>> On Mon, Feb 24, 2025 at 4:01 AM Jianbo Liu <jianbol@nvidia.com> wrote:
> >>>>>
> >>>>>
> >>>>>
> >>>>> On 8/13/2024 1:17 AM, Xin Long wrote:
> >>>>>> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
> >>>>>> label counting"), we should also switch to per-action label counting
> >>>>>> in openvswitch conntrack, as Florian suggested.
> >>>>>>
> >>>>>> The difference is that nf_connlabels_get() is called unconditionally
> >>>>>> when creating an ct action in ovs_ct_copy_action(). As with these
> >>>>>> flows:
> >>>>>>
> >>>>>>      table=0,ip,actions=ct(commit,table=1)
> >>>>>>      table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
> >>>>>>
> >>>>>> it needs to make sure the label ext is created in the 1st flow before
> >>>>>> the ct is committed in ovs_ct_commit(). Otherwise, the warning in
> >>>>>> nf_ct_ext_add() when creating the label ext in the 2nd flow will
> >>>>>> be triggered:
> >>>>>>
> >>>>>>       WARN_ON(nf_ct_is_confirmed(ct));
> >>>>>>
> >>>>>
> >>>>> Hi Xin Long,
> >>>>>
> >>>>> The ct can be committed before openvswitch handles packets with CT
> >>>>> actions. And we can trigger the warning by creating VF and running ping
> >>>>> over it with the following configurations:
> >>>>>
> >>>>> ovs-vsctl add-br br
> >>>>> ovs-vsctl add-port br eth2
> >>>>> ovs-vsctl add-port br eth4
> >>>>> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
> >>>>> actions=ct(table=1)"
> >>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_state=+trk+new
> >>>>> actions=ct(exec(set_field:0xef7d->ct_label), commit), output:eth2"
> >>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_label=0xef7d,
> >>>>> ct_state=+trk+est actions=output:eth2"
> >>>>>
> >>>>> The eth2 is PF, and eth4 is VF's representor.
> >>>>> Would you like to fix it?
> >>>> Hi, Jianbo,
> >>>>
> >>>> Sure, we have to attach a new ct to the skb in __ovs_ct_lookup() for
> >>>> this case, and even delete the one created before ovs_ct.
> >>>>
> >>>> Can you check if this works on your env?
> >>>
> >>> Yes, it works.
> >>> Could you please submit the formal patch? Thanks!
> >> Great, I will post after running some of my local tests.
> >>
> > Hi Jianbo,
> >
> > I recently ran some tests and observed that the current approach cannot
> > completely avoid the warning. If an skb enters __ovs_ct_lookup() without
> > an attached connection tracking (ct) entry, it may still acquire an
> > existing ct created outside of OVS (possibly by netfilter) through
> > nf_conntrack_in(). This will trigger the warning in ovs_ct_set_labels().
> >
> > Deleting a ct created outside OVS and creating a new one within
> > __ovs_ct_lookup() doesn't seem reasonable and would be difficult to
>
> Yes, I'm also skeptical of your temporary fix, and waiting for your
> formal one.
Cool.

>
> > implement. However, since OVS is not supposed to use ct entries created
> > externally, I believe ct zones can be used to prevent this issue.
> > In your case, the following flows should work:
> >
> > ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
> > actions=ct(table=1,zone=1)"
> > ovs-ofctl add-flow br "table=1,
> > in_port=eth4,ip,ct_state=+trk+new,ct_zone=1
> > actions=ct(exec(set_field:0xef7d->ct_label),commit,zone=1),
> > output:eth2"
> > ovs-ofctl add-flow br "table=1,
> > in_port=eth4,ip,ct_label=0xef7d,ct_state=+trk+est,ct_zone=1
> > actions=output:eth2"
> >
> > Regarding the warning triggered by externally created ct entries, I plan
> > to remove the ovs_ct_get_conn_labels() call from ovs_ct_set_labels() and
> > I'll let nf_connlabels_replace() return an error in such cases, similar
> > to how tcf_ct_act_set_labels() handles this scenario in tc act_ct.
> >
>
> It's a good idea to be consistent with act_ct implementation. But, would
> you like to revert first if it takes long time to work on the fix?
Sorry, revert which one?
If you mean the fix in skb_nfct_cached(), it hasn't been posted and
will not be posted.

Thanks.
>
> Thanks!
>
> > Thanks.
> >>>
> >>>>
> >>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> >>>> index 3bb4810234aa..c599ee013dfe 100644
> >>>> --- a/net/openvswitch/conntrack.c
> >>>> +++ b/net/openvswitch/conntrack.c
> >>>> @@ -595,6 +595,11 @@ static bool skb_nfct_cached(struct net *net,
> >>>>                rcu_dereference(timeout_ext->timeout))
> >>>>                return false;
> >>>>        }
> >>>> +    if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && !nf_ct_labels_find(ct)) {
> >>>> +        if (nf_ct_is_confirmed(ct))
> >>>> +            nf_ct_delete(ct, 0, 0);
> >>>> +        return false;
> >>>> +    }
> >>>>
> >>>> Thanks.
> >>>>
> >>>>>
> >>>>> Thanks!
> >>>>> Jianbo
> >>>>>
> >>>>>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
> >>>>>> ---
> >>>>>> v2: move ovs_net into #if in ovs_ct_exit() as Jakub noticed.
> >>>>>> ---
> >>>>>>     net/openvswitch/conntrack.c | 30 ++++++++++++------------------
> >>>>>>     net/openvswitch/datapath.h  |  3 ---
> >>>>>>     2 files changed, 12 insertions(+), 21 deletions(-)
> >>>>>>
> >>>>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> >>>>>> index 8eb1d644b741..a3da5ee34f92 100644
> >>>>>> --- a/net/openvswitch/conntrack.c
> >>>>>> +++ b/net/openvswitch/conntrack.c
> >>>>>> @@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
> >>>>>>             attr == OVS_KEY_ATTR_CT_MARK)
> >>>>>>                 return true;
> >>>>>>         if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
> >>>>>> -         attr == OVS_KEY_ATTR_CT_LABELS) {
> >>>>>> -             struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>>>>> -
> >>>>>> -             return ovs_net->xt_label;
> >>>>>> -     }
> >>>>>> +         attr == OVS_KEY_ATTR_CT_LABELS)
> >>>>>> +             return true;
> >>>>>>
> >>>>>>         return false;
> >>>>>>     }
> >>>>>> @@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
> >>>>>>                        const struct sw_flow_key *key,
> >>>>>>                        struct sw_flow_actions **sfa,  bool log)
> >>>>>>     {
> >>>>>> +     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> >>>>>>         struct ovs_conntrack_info ct_info;
> >>>>>>         const char *helper = NULL;
> >>>>>>         u16 family;
> >>>>>> @@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
> >>>>>>                 return -ENOMEM;
> >>>>>>         }
> >>>>>>
> >>>>>> +     if (nf_connlabels_get(net, n_bits - 1)) {
> >>>>>> +             nf_ct_tmpl_free(ct_info.ct);
> >>>>>> +             OVS_NLERR(log, "Failed to set connlabel length");
> >>>>>> +             return -EOPNOTSUPP;
> >>>>>> +     }
> >>>>>> +
> >>>>>>         if (ct_info.timeout[0]) {
> >>>>>>                 if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
> >>>>>>                                       ct_info.timeout))
> >>>>>> @@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
> >>>>>>         if (ct_info->ct) {
> >>>>>>                 if (ct_info->timeout[0])
> >>>>>>                         nf_ct_destroy_timeout(ct_info->ct);
> >>>>>> +             nf_connlabels_put(nf_ct_net(ct_info->ct));
> >>>>>>                 nf_ct_tmpl_free(ct_info->ct);
> >>>>>>         }
> >>>>>>     }
> >>>>>> @@ -2002,17 +2007,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
> >>>>>>
> >>>>>>     int ovs_ct_init(struct net *net)
> >>>>>>     {
> >>>>>> -     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> >>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>>>>>
> >>>>>> -     if (nf_connlabels_get(net, n_bits - 1)) {
> >>>>>> -             ovs_net->xt_label = false;
> >>>>>> -             OVS_NLERR(true, "Failed to set connlabel length");
> >>>>>> -     } else {
> >>>>>> -             ovs_net->xt_label = true;
> >>>>>> -     }
> >>>>>> -
> >>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>         return ovs_ct_limit_init(net, ovs_net);
> >>>>>>     #else
> >>>>>>         return 0;
> >>>>>> @@ -2021,12 +2018,9 @@ int ovs_ct_init(struct net *net)
> >>>>>>
> >>>>>>     void ovs_ct_exit(struct net *net)
> >>>>>>     {
> >>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>>>>>
> >>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>         ovs_ct_limit_exit(net, ovs_net);
> >>>>>>     #endif
> >>>>>> -
> >>>>>> -     if (ovs_net->xt_label)
> >>>>>> -             nf_connlabels_put(net);
> >>>>>>     }
> >>>>>> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> >>>>>> index 9ca6231ea647..365b9bb7f546 100644
> >>>>>> --- a/net/openvswitch/datapath.h
> >>>>>> +++ b/net/openvswitch/datapath.h
> >>>>>> @@ -160,9 +160,6 @@ struct ovs_net {
> >>>>>>     #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>         struct ovs_ct_limit_info *ct_limit_info;
> >>>>>>     #endif
> >>>>>> -
> >>>>>> -     /* Module reference for configuring conntrack. */
> >>>>>> -     bool xt_label;
> >>>>>>     };
> >>>>>>
> >>>>>>     /**
> >>>>>
> >>>
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2025-03-03 16:42             ` Xin Long
@ 2025-03-04  1:20               ` Jianbo Liu
  2025-03-04 17:10                 ` Xin Long
  0 siblings, 1 reply; 17+ messages in thread
From: Jianbo Liu @ 2025-03-04  1:20 UTC (permalink / raw)
  To: Xin Long
  Cc: network dev, dev, ovs-dev, davem, kuba, Eric Dumazet, Paolo Abeni,
	Pravin B Shelar, Ilya Maximets, Aaron Conole, Florian Westphal



On 3/4/2025 12:42 AM, Xin Long wrote:
> On Sun, Mar 2, 2025 at 9:14 PM Jianbo Liu <jianbol@nvidia.com> wrote:
>>
>>
>>
>> On 3/3/2025 2:22 AM, Xin Long wrote:
>>> On Tue, Feb 25, 2025 at 9:57 AM Xin Long <lucien.xin@gmail.com> wrote:
>>>>
>>>> On Mon, Feb 24, 2025 at 8:38 PM Jianbo Liu <jianbol@nvidia.com> wrote:
>>>>>
>>>>>
>>>>>
>>>>> On 2/25/2025 3:55 AM, Xin Long wrote:
>>>>>> On Mon, Feb 24, 2025 at 4:01 AM Jianbo Liu <jianbol@nvidia.com> wrote:
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> On 8/13/2024 1:17 AM, Xin Long wrote:
>>>>>>>> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
>>>>>>>> label counting"), we should also switch to per-action label counting
>>>>>>>> in openvswitch conntrack, as Florian suggested.
>>>>>>>>
>>>>>>>> The difference is that nf_connlabels_get() is called unconditionally
>>>>>>>> when creating an ct action in ovs_ct_copy_action(). As with these
>>>>>>>> flows:
>>>>>>>>
>>>>>>>>       table=0,ip,actions=ct(commit,table=1)
>>>>>>>>       table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
>>>>>>>>
>>>>>>>> it needs to make sure the label ext is created in the 1st flow before
>>>>>>>> the ct is committed in ovs_ct_commit(). Otherwise, the warning in
>>>>>>>> nf_ct_ext_add() when creating the label ext in the 2nd flow will
>>>>>>>> be triggered:
>>>>>>>>
>>>>>>>>        WARN_ON(nf_ct_is_confirmed(ct));
>>>>>>>>
>>>>>>>
>>>>>>> Hi Xin Long,
>>>>>>>
>>>>>>> The ct can be committed before openvswitch handles packets with CT
>>>>>>> actions. And we can trigger the warning by creating VF and running ping
>>>>>>> over it with the following configurations:
>>>>>>>
>>>>>>> ovs-vsctl add-br br
>>>>>>> ovs-vsctl add-port br eth2
>>>>>>> ovs-vsctl add-port br eth4
>>>>>>> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
>>>>>>> actions=ct(table=1)"
>>>>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_state=+trk+new
>>>>>>> actions=ct(exec(set_field:0xef7d->ct_label), commit), output:eth2"
>>>>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_label=0xef7d,
>>>>>>> ct_state=+trk+est actions=output:eth2"
>>>>>>>
>>>>>>> The eth2 is PF, and eth4 is VF's representor.
>>>>>>> Would you like to fix it?
>>>>>> Hi, Jianbo,
>>>>>>
>>>>>> Sure, we have to attach a new ct to the skb in __ovs_ct_lookup() for
>>>>>> this case, and even delete the one created before ovs_ct.
>>>>>>
>>>>>> Can you check if this works on your env?
>>>>>
>>>>> Yes, it works.
>>>>> Could you please submit the formal patch? Thanks!
>>>> Great, I will post after running some of my local tests.
>>>>
>>> Hi Jianbo,
>>>
>>> I recently ran some tests and observed that the current approach cannot
>>> completely avoid the warning. If an skb enters __ovs_ct_lookup() without
>>> an attached connection tracking (ct) entry, it may still acquire an
>>> existing ct created outside of OVS (possibly by netfilter) through
>>> nf_conntrack_in(). This will trigger the warning in ovs_ct_set_labels().
>>>
>>> Deleting a ct created outside OVS and creating a new one within
>>> __ovs_ct_lookup() doesn't seem reasonable and would be difficult to
>>
>> Yes, I'm also skeptical of your temporary fix, and waiting for your
>> formal one.
> Cool.
> 
>>
>>> implement. However, since OVS is not supposed to use ct entries created
>>> externally, I believe ct zones can be used to prevent this issue.
>>> In your case, the following flows should work:
>>>
>>> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
>>> actions=ct(table=1,zone=1)"
>>> ovs-ofctl add-flow br "table=1,
>>> in_port=eth4,ip,ct_state=+trk+new,ct_zone=1
>>> actions=ct(exec(set_field:0xef7d->ct_label),commit,zone=1),
>>> output:eth2"
>>> ovs-ofctl add-flow br "table=1,
>>> in_port=eth4,ip,ct_label=0xef7d,ct_state=+trk+est,ct_zone=1
>>> actions=output:eth2"
>>>
>>> Regarding the warning triggered by externally created ct entries, I plan
>>> to remove the ovs_ct_get_conn_labels() call from ovs_ct_set_labels() and
>>> I'll let nf_connlabels_replace() return an error in such cases, similar
>>> to how tcf_ct_act_set_labels() handles this scenario in tc act_ct.
>>>
>>
>> It's a good idea to be consistent with act_ct implementation. But, would
>> you like to revert first if it takes long time to work on the fix?
> Sorry, revert which one?

Of course the one we are currently replying to - "openvswitch: switch to 
per-action label counting in conntrack".

> If you mean the fix in skb_nfct_cached(), it hasn't been posted and
> will not be posted.

No. I think we both know this is just a temporary fix, and it can help 
you to understand the issue.

> 
> Thanks.
>>
>> Thanks!
>>
>>> Thanks.
>>>>>
>>>>>>
>>>>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
>>>>>> index 3bb4810234aa..c599ee013dfe 100644
>>>>>> --- a/net/openvswitch/conntrack.c
>>>>>> +++ b/net/openvswitch/conntrack.c
>>>>>> @@ -595,6 +595,11 @@ static bool skb_nfct_cached(struct net *net,
>>>>>>                 rcu_dereference(timeout_ext->timeout))
>>>>>>                 return false;
>>>>>>         }
>>>>>> +    if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && !nf_ct_labels_find(ct)) {
>>>>>> +        if (nf_ct_is_confirmed(ct))
>>>>>> +            nf_ct_delete(ct, 0, 0);
>>>>>> +        return false;
>>>>>> +    }
>>>>>>
>>>>>> Thanks.
>>>>>>
>>>>>>>
>>>>>>> Thanks!
>>>>>>> Jianbo
>>>>>>>
>>>>>>>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
>>>>>>>> ---
>>>>>>>> v2: move ovs_net into #if in ovs_ct_exit() as Jakub noticed.
>>>>>>>> ---
>>>>>>>>      net/openvswitch/conntrack.c | 30 ++++++++++++------------------
>>>>>>>>      net/openvswitch/datapath.h  |  3 ---
>>>>>>>>      2 files changed, 12 insertions(+), 21 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
>>>>>>>> index 8eb1d644b741..a3da5ee34f92 100644
>>>>>>>> --- a/net/openvswitch/conntrack.c
>>>>>>>> +++ b/net/openvswitch/conntrack.c
>>>>>>>> @@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
>>>>>>>>              attr == OVS_KEY_ATTR_CT_MARK)
>>>>>>>>                  return true;
>>>>>>>>          if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
>>>>>>>> -         attr == OVS_KEY_ATTR_CT_LABELS) {
>>>>>>>> -             struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>>>>>>> -
>>>>>>>> -             return ovs_net->xt_label;
>>>>>>>> -     }
>>>>>>>> +         attr == OVS_KEY_ATTR_CT_LABELS)
>>>>>>>> +             return true;
>>>>>>>>
>>>>>>>>          return false;
>>>>>>>>      }
>>>>>>>> @@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
>>>>>>>>                         const struct sw_flow_key *key,
>>>>>>>>                         struct sw_flow_actions **sfa,  bool log)
>>>>>>>>      {
>>>>>>>> +     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
>>>>>>>>          struct ovs_conntrack_info ct_info;
>>>>>>>>          const char *helper = NULL;
>>>>>>>>          u16 family;
>>>>>>>> @@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
>>>>>>>>                  return -ENOMEM;
>>>>>>>>          }
>>>>>>>>
>>>>>>>> +     if (nf_connlabels_get(net, n_bits - 1)) {
>>>>>>>> +             nf_ct_tmpl_free(ct_info.ct);
>>>>>>>> +             OVS_NLERR(log, "Failed to set connlabel length");
>>>>>>>> +             return -EOPNOTSUPP;
>>>>>>>> +     }
>>>>>>>> +
>>>>>>>>          if (ct_info.timeout[0]) {
>>>>>>>>                  if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
>>>>>>>>                                        ct_info.timeout))
>>>>>>>> @@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
>>>>>>>>          if (ct_info->ct) {
>>>>>>>>                  if (ct_info->timeout[0])
>>>>>>>>                          nf_ct_destroy_timeout(ct_info->ct);
>>>>>>>> +             nf_connlabels_put(nf_ct_net(ct_info->ct));
>>>>>>>>                  nf_ct_tmpl_free(ct_info->ct);
>>>>>>>>          }
>>>>>>>>      }
>>>>>>>> @@ -2002,17 +2007,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
>>>>>>>>
>>>>>>>>      int ovs_ct_init(struct net *net)
>>>>>>>>      {
>>>>>>>> -     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
>>>>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>>>          struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>>>>>>>
>>>>>>>> -     if (nf_connlabels_get(net, n_bits - 1)) {
>>>>>>>> -             ovs_net->xt_label = false;
>>>>>>>> -             OVS_NLERR(true, "Failed to set connlabel length");
>>>>>>>> -     } else {
>>>>>>>> -             ovs_net->xt_label = true;
>>>>>>>> -     }
>>>>>>>> -
>>>>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>>>          return ovs_ct_limit_init(net, ovs_net);
>>>>>>>>      #else
>>>>>>>>          return 0;
>>>>>>>> @@ -2021,12 +2018,9 @@ int ovs_ct_init(struct net *net)
>>>>>>>>
>>>>>>>>      void ovs_ct_exit(struct net *net)
>>>>>>>>      {
>>>>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>>>          struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>>>>>>>
>>>>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>>>          ovs_ct_limit_exit(net, ovs_net);
>>>>>>>>      #endif
>>>>>>>> -
>>>>>>>> -     if (ovs_net->xt_label)
>>>>>>>> -             nf_connlabels_put(net);
>>>>>>>>      }
>>>>>>>> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
>>>>>>>> index 9ca6231ea647..365b9bb7f546 100644
>>>>>>>> --- a/net/openvswitch/datapath.h
>>>>>>>> +++ b/net/openvswitch/datapath.h
>>>>>>>> @@ -160,9 +160,6 @@ struct ovs_net {
>>>>>>>>      #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>>>          struct ovs_ct_limit_info *ct_limit_info;
>>>>>>>>      #endif
>>>>>>>> -
>>>>>>>> -     /* Module reference for configuring conntrack. */
>>>>>>>> -     bool xt_label;
>>>>>>>>      };
>>>>>>>>
>>>>>>>>      /**
>>>>>>>
>>>>>
>>


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2025-03-03 16:38             ` Xin Long
@ 2025-03-04 13:23               ` Ilya Maximets
  2025-03-04 15:21                 ` Xin Long
  0 siblings, 1 reply; 17+ messages in thread
From: Ilya Maximets @ 2025-03-04 13:23 UTC (permalink / raw)
  To: Xin Long
  Cc: i.maximets, Jianbo Liu, network dev, dev, ovs-dev, davem, kuba,
	Eric Dumazet, Paolo Abeni, Pravin B Shelar, Aaron Conole,
	Florian Westphal

On 3/3/25 17:38, Xin Long wrote:
> On Mon, Mar 3, 2025 at 5:56 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>>
>> On 3/2/25 19:22, Xin Long wrote:
>>> On Tue, Feb 25, 2025 at 9:57 AM Xin Long <lucien.xin@gmail.com> wrote:
>>>>
>>>> On Mon, Feb 24, 2025 at 8:38 PM Jianbo Liu <jianbol@nvidia.com> wrote:
>>>>>
>>>>>
>>>>>
>>>>> On 2/25/2025 3:55 AM, Xin Long wrote:
>>>>>> On Mon, Feb 24, 2025 at 4:01 AM Jianbo Liu <jianbol@nvidia.com> wrote:
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> On 8/13/2024 1:17 AM, Xin Long wrote:
>>>>>>>> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
>>>>>>>> label counting"), we should also switch to per-action label counting
>>>>>>>> in openvswitch conntrack, as Florian suggested.
>>>>>>>>
>>>>>>>> The difference is that nf_connlabels_get() is called unconditionally
>>>>>>>> when creating an ct action in ovs_ct_copy_action(). As with these
>>>>>>>> flows:
>>>>>>>>
>>>>>>>>     table=0,ip,actions=ct(commit,table=1)
>>>>>>>>     table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
>>>>>>>>
>>>>>>>> it needs to make sure the label ext is created in the 1st flow before
>>>>>>>> the ct is committed in ovs_ct_commit(). Otherwise, the warning in
>>>>>>>> nf_ct_ext_add() when creating the label ext in the 2nd flow will
>>>>>>>> be triggered:
>>>>>>>>
>>>>>>>>      WARN_ON(nf_ct_is_confirmed(ct));
>>>>>>>>
>>>>>>>
>>>>>>> Hi Xin Long,
>>>>>>>
>>>>>>> The ct can be committed before openvswitch handles packets with CT
>>>>>>> actions. And we can trigger the warning by creating VF and running ping
>>>>>>> over it with the following configurations:
>>>>>>>
>>>>>>> ovs-vsctl add-br br
>>>>>>> ovs-vsctl add-port br eth2
>>>>>>> ovs-vsctl add-port br eth4
>>>>>>> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
>>>>>>> actions=ct(table=1)"
>>>>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_state=+trk+new
>>>>>>> actions=ct(exec(set_field:0xef7d->ct_label), commit), output:eth2"
>>>>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_label=0xef7d,
>>>>>>> ct_state=+trk+est actions=output:eth2"
>>>>>>>
>>>>>>> The eth2 is PF, and eth4 is VF's representor.
>>>>>>> Would you like to fix it?
>>>>>> Hi, Jianbo,
>>>>>>
>>>>>> Sure, we have to attach a new ct to the skb in __ovs_ct_lookup() for
>>>>>> this case, and even delete the one created before ovs_ct.
>>>>>>
>>>>>> Can you check if this works on your env?
>>>>>
>>>>> Yes, it works.
>>>>> Could you please submit the formal patch? Thanks!
>>>> Great, I will post after running some of my local tests.
>>>>
>>> Hi Jianbo,
>>>
>>> I recently ran some tests and observed that the current approach cannot
>>> completely avoid the warning. If an skb enters __ovs_ct_lookup() without
>>> an attached connection tracking (ct) entry, it may still acquire an
>>> existing ct created outside of OVS (possibly by netfilter) through
>>> nf_conntrack_in(). This will trigger the warning in ovs_ct_set_labels().
>>>
>>> Deleting a ct created outside OVS and creating a new one within
>>> __ovs_ct_lookup() doesn't seem reasonable and would be difficult to
>>> implement. However, since OVS is not supposed to use ct entries created
>>> externally,
>>
>> I'm not fully following this conversation, but this statement doesn't
>> seem right.  OVS should be able to use ct entries created externally.
>> i.e. if the packet already has some ct entry attached while entering
>> OVS datapth, OVS should be able to match on the content of that entry.
>>
> Hi Ilya,
> 
> Thank you for your comment.
> 
> If OVS should use conntrack (ct) entries created externally, features
> relying on NF_CT_EXT_XXX may not work on such entries. The NF_CT_EXT_LABELS
> extension discussed in this thread is one example. Other extensions like
> NF_CT_EXT_HELPER, NF_CT_EXT_ECACHE, and NF_CT_EXT_TIMEOUT may also be
> affected.
> 
> If the ct entry already exists before the OVS flow with ct actions is
> created, the entry might not have the required NF_CT_EXT extension as
> it was not created with the tmpl set by OVS, even though the OVS flow
> requests it.

You're right about extensions and it makes sense that they are not actually
available, since the data inside labels, for example, is specific for the
application.  However, there are use cases for matching on the externally
obtained ct state, which is available.  IIRC, we have a few tests in the
main OVS system test suite that cover such scenarios.

> 
> As far as I know, using a separate conntrack zone to avoid reusing the
> existing ct entry can work around this issue.
> 
> Let me know if I missed something, or do you have any better solutions?
> 
> Thanks.
> 
>>> I believe ct zones can be used to prevent this issue.
>>> In your case, the following flows should work:
>>>
>>> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
>>> actions=ct(table=1,zone=1)"
>>> ovs-ofctl add-flow br "table=1,
>>> in_port=eth4,ip,ct_state=+trk+new,ct_zone=1
>>> actions=ct(exec(set_field:0xef7d->ct_label),commit,zone=1),
>>> output:eth2"
>>> ovs-ofctl add-flow br "table=1,
>>> in_port=eth4,ip,ct_label=0xef7d,ct_state=+trk+est,ct_zone=1
>>> actions=output:eth2"
>>>
>>> Regarding the warning triggered by externally created ct entries, I plan
>>> to remove the ovs_ct_get_conn_labels() call from ovs_ct_set_labels() and
>>> I'll let nf_connlabels_replace() return an error in such cases, similar
>>> to how tcf_ct_act_set_labels() handles this scenario in tc act_ct.
>>>
>>> Thanks.
>>>>>
>>>>>>
>>>>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
>>>>>> index 3bb4810234aa..c599ee013dfe 100644
>>>>>> --- a/net/openvswitch/conntrack.c
>>>>>> +++ b/net/openvswitch/conntrack.c
>>>>>> @@ -595,6 +595,11 @@ static bool skb_nfct_cached(struct net *net,
>>>>>>               rcu_dereference(timeout_ext->timeout))
>>>>>>               return false;
>>>>>>       }
>>>>>> +    if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && !nf_ct_labels_find(ct)) {
>>>>>> +        if (nf_ct_is_confirmed(ct))
>>>>>> +            nf_ct_delete(ct, 0, 0);
>>>>>> +        return false;
>>>>>> +    }
>>>>>>
>>>>>> Thanks.
>>>>>>
>>>>>>>
>>>>>>> Thanks!
>>>>>>> Jianbo
>>>>>>>
>>>>>>>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
>>>>>>>> ---
>>>>>>>> v2: move ovs_net into #if in ovs_ct_exit() as Jakub noticed.
>>>>>>>> ---
>>>>>>>>    net/openvswitch/conntrack.c | 30 ++++++++++++------------------
>>>>>>>>    net/openvswitch/datapath.h  |  3 ---
>>>>>>>>    2 files changed, 12 insertions(+), 21 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
>>>>>>>> index 8eb1d644b741..a3da5ee34f92 100644
>>>>>>>> --- a/net/openvswitch/conntrack.c
>>>>>>>> +++ b/net/openvswitch/conntrack.c
>>>>>>>> @@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
>>>>>>>>            attr == OVS_KEY_ATTR_CT_MARK)
>>>>>>>>                return true;
>>>>>>>>        if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
>>>>>>>> -         attr == OVS_KEY_ATTR_CT_LABELS) {
>>>>>>>> -             struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>>>>>>> -
>>>>>>>> -             return ovs_net->xt_label;
>>>>>>>> -     }
>>>>>>>> +         attr == OVS_KEY_ATTR_CT_LABELS)
>>>>>>>> +             return true;
>>>>>>>>
>>>>>>>>        return false;
>>>>>>>>    }
>>>>>>>> @@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
>>>>>>>>                       const struct sw_flow_key *key,
>>>>>>>>                       struct sw_flow_actions **sfa,  bool log)
>>>>>>>>    {
>>>>>>>> +     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
>>>>>>>>        struct ovs_conntrack_info ct_info;
>>>>>>>>        const char *helper = NULL;
>>>>>>>>        u16 family;
>>>>>>>> @@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
>>>>>>>>                return -ENOMEM;
>>>>>>>>        }
>>>>>>>>
>>>>>>>> +     if (nf_connlabels_get(net, n_bits - 1)) {
>>>>>>>> +             nf_ct_tmpl_free(ct_info.ct);
>>>>>>>> +             OVS_NLERR(log, "Failed to set connlabel length");
>>>>>>>> +             return -EOPNOTSUPP;
>>>>>>>> +     }
>>>>>>>> +
>>>>>>>>        if (ct_info.timeout[0]) {
>>>>>>>>                if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
>>>>>>>>                                      ct_info.timeout))
>>>>>>>> @@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
>>>>>>>>        if (ct_info->ct) {
>>>>>>>>                if (ct_info->timeout[0])
>>>>>>>>                        nf_ct_destroy_timeout(ct_info->ct);
>>>>>>>> +             nf_connlabels_put(nf_ct_net(ct_info->ct));
>>>>>>>>                nf_ct_tmpl_free(ct_info->ct);
>>>>>>>>        }
>>>>>>>>    }
>>>>>>>> @@ -2002,17 +2007,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
>>>>>>>>
>>>>>>>>    int ovs_ct_init(struct net *net)
>>>>>>>>    {
>>>>>>>> -     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
>>>>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>>>        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>>>>>>>
>>>>>>>> -     if (nf_connlabels_get(net, n_bits - 1)) {
>>>>>>>> -             ovs_net->xt_label = false;
>>>>>>>> -             OVS_NLERR(true, "Failed to set connlabel length");
>>>>>>>> -     } else {
>>>>>>>> -             ovs_net->xt_label = true;
>>>>>>>> -     }
>>>>>>>> -
>>>>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>>>        return ovs_ct_limit_init(net, ovs_net);
>>>>>>>>    #else
>>>>>>>>        return 0;
>>>>>>>> @@ -2021,12 +2018,9 @@ int ovs_ct_init(struct net *net)
>>>>>>>>
>>>>>>>>    void ovs_ct_exit(struct net *net)
>>>>>>>>    {
>>>>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>>>        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
>>>>>>>>
>>>>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>>>        ovs_ct_limit_exit(net, ovs_net);
>>>>>>>>    #endif
>>>>>>>> -
>>>>>>>> -     if (ovs_net->xt_label)
>>>>>>>> -             nf_connlabels_put(net);
>>>>>>>>    }
>>>>>>>> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
>>>>>>>> index 9ca6231ea647..365b9bb7f546 100644
>>>>>>>> --- a/net/openvswitch/datapath.h
>>>>>>>> +++ b/net/openvswitch/datapath.h
>>>>>>>> @@ -160,9 +160,6 @@ struct ovs_net {
>>>>>>>>    #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
>>>>>>>>        struct ovs_ct_limit_info *ct_limit_info;
>>>>>>>>    #endif
>>>>>>>> -
>>>>>>>> -     /* Module reference for configuring conntrack. */
>>>>>>>> -     bool xt_label;
>>>>>>>>    };
>>>>>>>>
>>>>>>>>    /**
>>>>>>>
>>>>>
>>


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2025-03-04 13:23               ` Ilya Maximets
@ 2025-03-04 15:21                 ` Xin Long
  0 siblings, 0 replies; 17+ messages in thread
From: Xin Long @ 2025-03-04 15:21 UTC (permalink / raw)
  To: Ilya Maximets
  Cc: Jianbo Liu, network dev, dev, ovs-dev, davem, kuba, Eric Dumazet,
	Paolo Abeni, Pravin B Shelar, Aaron Conole, Florian Westphal

On Tue, Mar 4, 2025 at 8:23 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 3/3/25 17:38, Xin Long wrote:
> > On Mon, Mar 3, 2025 at 5:56 AM Ilya Maximets <i.maximets@ovn.org> wrote:
> >>
> >> On 3/2/25 19:22, Xin Long wrote:
> >>> On Tue, Feb 25, 2025 at 9:57 AM Xin Long <lucien.xin@gmail.com> wrote:
> >>>>
> >>>> On Mon, Feb 24, 2025 at 8:38 PM Jianbo Liu <jianbol@nvidia.com> wrote:
> >>>>>
> >>>>>
> >>>>>
> >>>>> On 2/25/2025 3:55 AM, Xin Long wrote:
> >>>>>> On Mon, Feb 24, 2025 at 4:01 AM Jianbo Liu <jianbol@nvidia.com> wrote:
> >>>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>>> On 8/13/2024 1:17 AM, Xin Long wrote:
> >>>>>>>> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
> >>>>>>>> label counting"), we should also switch to per-action label counting
> >>>>>>>> in openvswitch conntrack, as Florian suggested.
> >>>>>>>>
> >>>>>>>> The difference is that nf_connlabels_get() is called unconditionally
> >>>>>>>> when creating an ct action in ovs_ct_copy_action(). As with these
> >>>>>>>> flows:
> >>>>>>>>
> >>>>>>>>     table=0,ip,actions=ct(commit,table=1)
> >>>>>>>>     table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
> >>>>>>>>
> >>>>>>>> it needs to make sure the label ext is created in the 1st flow before
> >>>>>>>> the ct is committed in ovs_ct_commit(). Otherwise, the warning in
> >>>>>>>> nf_ct_ext_add() when creating the label ext in the 2nd flow will
> >>>>>>>> be triggered:
> >>>>>>>>
> >>>>>>>>      WARN_ON(nf_ct_is_confirmed(ct));
> >>>>>>>>
> >>>>>>>
> >>>>>>> Hi Xin Long,
> >>>>>>>
> >>>>>>> The ct can be committed before openvswitch handles packets with CT
> >>>>>>> actions. And we can trigger the warning by creating VF and running ping
> >>>>>>> over it with the following configurations:
> >>>>>>>
> >>>>>>> ovs-vsctl add-br br
> >>>>>>> ovs-vsctl add-port br eth2
> >>>>>>> ovs-vsctl add-port br eth4
> >>>>>>> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
> >>>>>>> actions=ct(table=1)"
> >>>>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_state=+trk+new
> >>>>>>> actions=ct(exec(set_field:0xef7d->ct_label), commit), output:eth2"
> >>>>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_label=0xef7d,
> >>>>>>> ct_state=+trk+est actions=output:eth2"
> >>>>>>>
> >>>>>>> The eth2 is PF, and eth4 is VF's representor.
> >>>>>>> Would you like to fix it?
> >>>>>> Hi, Jianbo,
> >>>>>>
> >>>>>> Sure, we have to attach a new ct to the skb in __ovs_ct_lookup() for
> >>>>>> this case, and even delete the one created before ovs_ct.
> >>>>>>
> >>>>>> Can you check if this works on your env?
> >>>>>
> >>>>> Yes, it works.
> >>>>> Could you please submit the formal patch? Thanks!
> >>>> Great, I will post after running some of my local tests.
> >>>>
> >>> Hi Jianbo,
> >>>
> >>> I recently ran some tests and observed that the current approach cannot
> >>> completely avoid the warning. If an skb enters __ovs_ct_lookup() without
> >>> an attached connection tracking (ct) entry, it may still acquire an
> >>> existing ct created outside of OVS (possibly by netfilter) through
> >>> nf_conntrack_in(). This will trigger the warning in ovs_ct_set_labels().
> >>>
> >>> Deleting a ct created outside OVS and creating a new one within
> >>> __ovs_ct_lookup() doesn't seem reasonable and would be difficult to
> >>> implement. However, since OVS is not supposed to use ct entries created
> >>> externally,
> >>
> >> I'm not fully following this conversation, but this statement doesn't
> >> seem right.  OVS should be able to use ct entries created externally.
> >> i.e. if the packet already has some ct entry attached while entering
> >> OVS datapth, OVS should be able to match on the content of that entry.
> >>
> > Hi Ilya,
> >
> > Thank you for your comment.
> >
> > If OVS should use conntrack (ct) entries created externally, features
> > relying on NF_CT_EXT_XXX may not work on such entries. The NF_CT_EXT_LABELS
> > extension discussed in this thread is one example. Other extensions like
> > NF_CT_EXT_HELPER, NF_CT_EXT_ECACHE, and NF_CT_EXT_TIMEOUT may also be
> > affected.
> >
> > If the ct entry already exists before the OVS flow with ct actions is
> > created, the entry might not have the required NF_CT_EXT extension as
> > it was not created with the tmpl set by OVS, even though the OVS flow
> > requests it.
>
> You're right about extensions and it makes sense that they are not actually
> available, since the data inside labels, for example, is specific for the
> application.  However, there are use cases for matching on the externally
> obtained ct state, which is available.  IIRC, we have a few tests in the
> main OVS system test suite that cover such scenarios.
>
Got it, thank you for the explanation.

> >
> > As far as I know, using a separate conntrack zone to avoid reusing the
> > existing ct entry can work around this issue.
> >
> > Let me know if I missed something, or do you have any better solutions?
> >
> > Thanks.
> >
> >>> I believe ct zones can be used to prevent this issue.
> >>> In your case, the following flows should work:
> >>>
> >>> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
> >>> actions=ct(table=1,zone=1)"
> >>> ovs-ofctl add-flow br "table=1,
> >>> in_port=eth4,ip,ct_state=+trk+new,ct_zone=1
> >>> actions=ct(exec(set_field:0xef7d->ct_label),commit,zone=1),
> >>> output:eth2"
> >>> ovs-ofctl add-flow br "table=1,
> >>> in_port=eth4,ip,ct_label=0xef7d,ct_state=+trk+est,ct_zone=1
> >>> actions=output:eth2"
> >>>
> >>> Regarding the warning triggered by externally created ct entries, I plan
> >>> to remove the ovs_ct_get_conn_labels() call from ovs_ct_set_labels() and
> >>> I'll let nf_connlabels_replace() return an error in such cases, similar
> >>> to how tcf_ct_act_set_labels() handles this scenario in tc act_ct.
> >>>
> >>> Thanks.
> >>>>>
> >>>>>>
> >>>>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> >>>>>> index 3bb4810234aa..c599ee013dfe 100644
> >>>>>> --- a/net/openvswitch/conntrack.c
> >>>>>> +++ b/net/openvswitch/conntrack.c
> >>>>>> @@ -595,6 +595,11 @@ static bool skb_nfct_cached(struct net *net,
> >>>>>>               rcu_dereference(timeout_ext->timeout))
> >>>>>>               return false;
> >>>>>>       }
> >>>>>> +    if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && !nf_ct_labels_find(ct)) {
> >>>>>> +        if (nf_ct_is_confirmed(ct))
> >>>>>> +            nf_ct_delete(ct, 0, 0);
> >>>>>> +        return false;
> >>>>>> +    }
> >>>>>>
> >>>>>> Thanks.
> >>>>>>
> >>>>>>>
> >>>>>>> Thanks!
> >>>>>>> Jianbo
> >>>>>>>
> >>>>>>>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
> >>>>>>>> ---
> >>>>>>>> v2: move ovs_net into #if in ovs_ct_exit() as Jakub noticed.
> >>>>>>>> ---
> >>>>>>>>    net/openvswitch/conntrack.c | 30 ++++++++++++------------------
> >>>>>>>>    net/openvswitch/datapath.h  |  3 ---
> >>>>>>>>    2 files changed, 12 insertions(+), 21 deletions(-)
> >>>>>>>>
> >>>>>>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> >>>>>>>> index 8eb1d644b741..a3da5ee34f92 100644
> >>>>>>>> --- a/net/openvswitch/conntrack.c
> >>>>>>>> +++ b/net/openvswitch/conntrack.c
> >>>>>>>> @@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
> >>>>>>>>            attr == OVS_KEY_ATTR_CT_MARK)
> >>>>>>>>                return true;
> >>>>>>>>        if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
> >>>>>>>> -         attr == OVS_KEY_ATTR_CT_LABELS) {
> >>>>>>>> -             struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>>>>>>> -
> >>>>>>>> -             return ovs_net->xt_label;
> >>>>>>>> -     }
> >>>>>>>> +         attr == OVS_KEY_ATTR_CT_LABELS)
> >>>>>>>> +             return true;
> >>>>>>>>
> >>>>>>>>        return false;
> >>>>>>>>    }
> >>>>>>>> @@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
> >>>>>>>>                       const struct sw_flow_key *key,
> >>>>>>>>                       struct sw_flow_actions **sfa,  bool log)
> >>>>>>>>    {
> >>>>>>>> +     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> >>>>>>>>        struct ovs_conntrack_info ct_info;
> >>>>>>>>        const char *helper = NULL;
> >>>>>>>>        u16 family;
> >>>>>>>> @@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
> >>>>>>>>                return -ENOMEM;
> >>>>>>>>        }
> >>>>>>>>
> >>>>>>>> +     if (nf_connlabels_get(net, n_bits - 1)) {
> >>>>>>>> +             nf_ct_tmpl_free(ct_info.ct);
> >>>>>>>> +             OVS_NLERR(log, "Failed to set connlabel length");
> >>>>>>>> +             return -EOPNOTSUPP;
> >>>>>>>> +     }
> >>>>>>>> +
> >>>>>>>>        if (ct_info.timeout[0]) {
> >>>>>>>>                if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
> >>>>>>>>                                      ct_info.timeout))
> >>>>>>>> @@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
> >>>>>>>>        if (ct_info->ct) {
> >>>>>>>>                if (ct_info->timeout[0])
> >>>>>>>>                        nf_ct_destroy_timeout(ct_info->ct);
> >>>>>>>> +             nf_connlabels_put(nf_ct_net(ct_info->ct));
> >>>>>>>>                nf_ct_tmpl_free(ct_info->ct);
> >>>>>>>>        }
> >>>>>>>>    }
> >>>>>>>> @@ -2002,17 +2007,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
> >>>>>>>>
> >>>>>>>>    int ovs_ct_init(struct net *net)
> >>>>>>>>    {
> >>>>>>>> -     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> >>>>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>>>        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>>>>>>>
> >>>>>>>> -     if (nf_connlabels_get(net, n_bits - 1)) {
> >>>>>>>> -             ovs_net->xt_label = false;
> >>>>>>>> -             OVS_NLERR(true, "Failed to set connlabel length");
> >>>>>>>> -     } else {
> >>>>>>>> -             ovs_net->xt_label = true;
> >>>>>>>> -     }
> >>>>>>>> -
> >>>>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>>>        return ovs_ct_limit_init(net, ovs_net);
> >>>>>>>>    #else
> >>>>>>>>        return 0;
> >>>>>>>> @@ -2021,12 +2018,9 @@ int ovs_ct_init(struct net *net)
> >>>>>>>>
> >>>>>>>>    void ovs_ct_exit(struct net *net)
> >>>>>>>>    {
> >>>>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>>>        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>>>>>>>
> >>>>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>>>        ovs_ct_limit_exit(net, ovs_net);
> >>>>>>>>    #endif
> >>>>>>>> -
> >>>>>>>> -     if (ovs_net->xt_label)
> >>>>>>>> -             nf_connlabels_put(net);
> >>>>>>>>    }
> >>>>>>>> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> >>>>>>>> index 9ca6231ea647..365b9bb7f546 100644
> >>>>>>>> --- a/net/openvswitch/datapath.h
> >>>>>>>> +++ b/net/openvswitch/datapath.h
> >>>>>>>> @@ -160,9 +160,6 @@ struct ovs_net {
> >>>>>>>>    #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>>>        struct ovs_ct_limit_info *ct_limit_info;
> >>>>>>>>    #endif
> >>>>>>>> -
> >>>>>>>> -     /* Module reference for configuring conntrack. */
> >>>>>>>> -     bool xt_label;
> >>>>>>>>    };
> >>>>>>>>
> >>>>>>>>    /**
> >>>>>>>
> >>>>>
> >>
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack
  2025-03-04  1:20               ` Jianbo Liu
@ 2025-03-04 17:10                 ` Xin Long
  0 siblings, 0 replies; 17+ messages in thread
From: Xin Long @ 2025-03-04 17:10 UTC (permalink / raw)
  To: Jianbo Liu
  Cc: network dev, dev, ovs-dev, davem, kuba, Eric Dumazet, Paolo Abeni,
	Pravin B Shelar, Ilya Maximets, Aaron Conole, Florian Westphal

On Mon, Mar 3, 2025 at 8:20 PM Jianbo Liu <jianbol@nvidia.com> wrote:
>
>
>
> On 3/4/2025 12:42 AM, Xin Long wrote:
> > On Sun, Mar 2, 2025 at 9:14 PM Jianbo Liu <jianbol@nvidia.com> wrote:
> >>
> >>
> >>
> >> On 3/3/2025 2:22 AM, Xin Long wrote:
> >>> On Tue, Feb 25, 2025 at 9:57 AM Xin Long <lucien.xin@gmail.com> wrote:
> >>>>
> >>>> On Mon, Feb 24, 2025 at 8:38 PM Jianbo Liu <jianbol@nvidia.com> wrote:
> >>>>>
> >>>>>
> >>>>>
> >>>>> On 2/25/2025 3:55 AM, Xin Long wrote:
> >>>>>> On Mon, Feb 24, 2025 at 4:01 AM Jianbo Liu <jianbol@nvidia.com> wrote:
> >>>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>>> On 8/13/2024 1:17 AM, Xin Long wrote:
> >>>>>>>> Similar to commit 70f06c115bcc ("sched: act_ct: switch to per-action
> >>>>>>>> label counting"), we should also switch to per-action label counting
> >>>>>>>> in openvswitch conntrack, as Florian suggested.
> >>>>>>>>
> >>>>>>>> The difference is that nf_connlabels_get() is called unconditionally
> >>>>>>>> when creating an ct action in ovs_ct_copy_action(). As with these
> >>>>>>>> flows:
> >>>>>>>>
> >>>>>>>>       table=0,ip,actions=ct(commit,table=1)
> >>>>>>>>       table=1,ip,actions=ct(commit,exec(set_field:0xac->ct_label),table=2)
> >>>>>>>>
> >>>>>>>> it needs to make sure the label ext is created in the 1st flow before
> >>>>>>>> the ct is committed in ovs_ct_commit(). Otherwise, the warning in
> >>>>>>>> nf_ct_ext_add() when creating the label ext in the 2nd flow will
> >>>>>>>> be triggered:
> >>>>>>>>
> >>>>>>>>        WARN_ON(nf_ct_is_confirmed(ct));
> >>>>>>>>
> >>>>>>>
> >>>>>>> Hi Xin Long,
> >>>>>>>
> >>>>>>> The ct can be committed before openvswitch handles packets with CT
> >>>>>>> actions. And we can trigger the warning by creating VF and running ping
> >>>>>>> over it with the following configurations:
> >>>>>>>
> >>>>>>> ovs-vsctl add-br br
> >>>>>>> ovs-vsctl add-port br eth2
> >>>>>>> ovs-vsctl add-port br eth4
> >>>>>>> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
> >>>>>>> actions=ct(table=1)"
> >>>>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_state=+trk+new
> >>>>>>> actions=ct(exec(set_field:0xef7d->ct_label), commit), output:eth2"
> >>>>>>> ovs-ofctl add-flow br "table=1, in_port=eth4,ip,ct_label=0xef7d,
> >>>>>>> ct_state=+trk+est actions=output:eth2"
> >>>>>>>
> >>>>>>> The eth2 is PF, and eth4 is VF's representor.
> >>>>>>> Would you like to fix it?
> >>>>>> Hi, Jianbo,
> >>>>>>
> >>>>>> Sure, we have to attach a new ct to the skb in __ovs_ct_lookup() for
> >>>>>> this case, and even delete the one created before ovs_ct.
> >>>>>>
> >>>>>> Can you check if this works on your env?
> >>>>>
> >>>>> Yes, it works.
> >>>>> Could you please submit the formal patch? Thanks!
> >>>> Great, I will post after running some of my local tests.
> >>>>
> >>> Hi Jianbo,
> >>>
> >>> I recently ran some tests and observed that the current approach cannot
> >>> completely avoid the warning. If an skb enters __ovs_ct_lookup() without
> >>> an attached connection tracking (ct) entry, it may still acquire an
> >>> existing ct created outside of OVS (possibly by netfilter) through
> >>> nf_conntrack_in(). This will trigger the warning in ovs_ct_set_labels().
> >>>
> >>> Deleting a ct created outside OVS and creating a new one within
> >>> __ovs_ct_lookup() doesn't seem reasonable and would be difficult to
> >>
> >> Yes, I'm also skeptical of your temporary fix, and waiting for your
> >> formal one.
> > Cool.
> >
> >>
> >>> implement. However, since OVS is not supposed to use ct entries created
> >>> externally, I believe ct zones can be used to prevent this issue.
> >>> In your case, the following flows should work:
> >>>
> >>> ovs-ofctl add-flow br "table=0, in_port=eth4,ip,ct_state=-trk
> >>> actions=ct(table=1,zone=1)"
> >>> ovs-ofctl add-flow br "table=1,
> >>> in_port=eth4,ip,ct_state=+trk+new,ct_zone=1
> >>> actions=ct(exec(set_field:0xef7d->ct_label),commit,zone=1),
> >>> output:eth2"
> >>> ovs-ofctl add-flow br "table=1,
> >>> in_port=eth4,ip,ct_label=0xef7d,ct_state=+trk+est,ct_zone=1
> >>> actions=output:eth2"
> >>>
> >>> Regarding the warning triggered by externally created ct entries, I plan
> >>> to remove the ovs_ct_get_conn_labels() call from ovs_ct_set_labels() and
> >>> I'll let nf_connlabels_replace() return an error in such cases, similar
> >>> to how tcf_ct_act_set_labels() handles this scenario in tc act_ct.
> >>>
> >>
> >> It's a good idea to be consistent with act_ct implementation. But, would
> >> you like to revert first if it takes long time to work on the fix?
> > Sorry, revert which one?
>
> Of course the one we are currently replying to - "openvswitch: switch to
> per-action label counting in conntrack".
>
It's true the commit "openvswitch: switch to per-action label counting
in conntrack"
makes the issue more likely to occur.

However, I've reproduced the warning in local tests even without the
commit, so the root cause appears to be independent of this change.

I will post the fix later today.

Thank you for your report!

> > If you mean the fix in skb_nfct_cached(), it hasn't been posted and
> > will not be posted.
>
> No. I think we both know this is just a temporary fix, and it can help
> you to understand the issue.
>
> >
> > Thanks.
> >>
> >> Thanks!
> >>
> >>> Thanks.
> >>>>>
> >>>>>>
> >>>>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> >>>>>> index 3bb4810234aa..c599ee013dfe 100644
> >>>>>> --- a/net/openvswitch/conntrack.c
> >>>>>> +++ b/net/openvswitch/conntrack.c
> >>>>>> @@ -595,6 +595,11 @@ static bool skb_nfct_cached(struct net *net,
> >>>>>>                 rcu_dereference(timeout_ext->timeout))
> >>>>>>                 return false;
> >>>>>>         }
> >>>>>> +    if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && !nf_ct_labels_find(ct)) {
> >>>>>> +        if (nf_ct_is_confirmed(ct))
> >>>>>> +            nf_ct_delete(ct, 0, 0);
> >>>>>> +        return false;
> >>>>>> +    }
> >>>>>>
> >>>>>> Thanks.
> >>>>>>
> >>>>>>>
> >>>>>>> Thanks!
> >>>>>>> Jianbo
> >>>>>>>
> >>>>>>>> Signed-off-by: Xin Long <lucien.xin@gmail.com>
> >>>>>>>> ---
> >>>>>>>> v2: move ovs_net into #if in ovs_ct_exit() as Jakub noticed.
> >>>>>>>> ---
> >>>>>>>>      net/openvswitch/conntrack.c | 30 ++++++++++++------------------
> >>>>>>>>      net/openvswitch/datapath.h  |  3 ---
> >>>>>>>>      2 files changed, 12 insertions(+), 21 deletions(-)
> >>>>>>>>
> >>>>>>>> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> >>>>>>>> index 8eb1d644b741..a3da5ee34f92 100644
> >>>>>>>> --- a/net/openvswitch/conntrack.c
> >>>>>>>> +++ b/net/openvswitch/conntrack.c
> >>>>>>>> @@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
> >>>>>>>>              attr == OVS_KEY_ATTR_CT_MARK)
> >>>>>>>>                  return true;
> >>>>>>>>          if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
> >>>>>>>> -         attr == OVS_KEY_ATTR_CT_LABELS) {
> >>>>>>>> -             struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>>>>>>> -
> >>>>>>>> -             return ovs_net->xt_label;
> >>>>>>>> -     }
> >>>>>>>> +         attr == OVS_KEY_ATTR_CT_LABELS)
> >>>>>>>> +             return true;
> >>>>>>>>
> >>>>>>>>          return false;
> >>>>>>>>      }
> >>>>>>>> @@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
> >>>>>>>>                         const struct sw_flow_key *key,
> >>>>>>>>                         struct sw_flow_actions **sfa,  bool log)
> >>>>>>>>      {
> >>>>>>>> +     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> >>>>>>>>          struct ovs_conntrack_info ct_info;
> >>>>>>>>          const char *helper = NULL;
> >>>>>>>>          u16 family;
> >>>>>>>> @@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
> >>>>>>>>                  return -ENOMEM;
> >>>>>>>>          }
> >>>>>>>>
> >>>>>>>> +     if (nf_connlabels_get(net, n_bits - 1)) {
> >>>>>>>> +             nf_ct_tmpl_free(ct_info.ct);
> >>>>>>>> +             OVS_NLERR(log, "Failed to set connlabel length");
> >>>>>>>> +             return -EOPNOTSUPP;
> >>>>>>>> +     }
> >>>>>>>> +
> >>>>>>>>          if (ct_info.timeout[0]) {
> >>>>>>>>                  if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
> >>>>>>>>                                        ct_info.timeout))
> >>>>>>>> @@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
> >>>>>>>>          if (ct_info->ct) {
> >>>>>>>>                  if (ct_info->timeout[0])
> >>>>>>>>                          nf_ct_destroy_timeout(ct_info->ct);
> >>>>>>>> +             nf_connlabels_put(nf_ct_net(ct_info->ct));
> >>>>>>>>                  nf_ct_tmpl_free(ct_info->ct);
> >>>>>>>>          }
> >>>>>>>>      }
> >>>>>>>> @@ -2002,17 +2007,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
> >>>>>>>>
> >>>>>>>>      int ovs_ct_init(struct net *net)
> >>>>>>>>      {
> >>>>>>>> -     unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
> >>>>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>>>          struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>>>>>>>
> >>>>>>>> -     if (nf_connlabels_get(net, n_bits - 1)) {
> >>>>>>>> -             ovs_net->xt_label = false;
> >>>>>>>> -             OVS_NLERR(true, "Failed to set connlabel length");
> >>>>>>>> -     } else {
> >>>>>>>> -             ovs_net->xt_label = true;
> >>>>>>>> -     }
> >>>>>>>> -
> >>>>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>>>          return ovs_ct_limit_init(net, ovs_net);
> >>>>>>>>      #else
> >>>>>>>>          return 0;
> >>>>>>>> @@ -2021,12 +2018,9 @@ int ovs_ct_init(struct net *net)
> >>>>>>>>
> >>>>>>>>      void ovs_ct_exit(struct net *net)
> >>>>>>>>      {
> >>>>>>>> +#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>>>          struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >>>>>>>>
> >>>>>>>> -#if  IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>>>          ovs_ct_limit_exit(net, ovs_net);
> >>>>>>>>      #endif
> >>>>>>>> -
> >>>>>>>> -     if (ovs_net->xt_label)
> >>>>>>>> -             nf_connlabels_put(net);
> >>>>>>>>      }
> >>>>>>>> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> >>>>>>>> index 9ca6231ea647..365b9bb7f546 100644
> >>>>>>>> --- a/net/openvswitch/datapath.h
> >>>>>>>> +++ b/net/openvswitch/datapath.h
> >>>>>>>> @@ -160,9 +160,6 @@ struct ovs_net {
> >>>>>>>>      #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
> >>>>>>>>          struct ovs_ct_limit_info *ct_limit_info;
> >>>>>>>>      #endif
> >>>>>>>> -
> >>>>>>>> -     /* Module reference for configuring conntrack. */
> >>>>>>>> -     bool xt_label;
> >>>>>>>>      };
> >>>>>>>>
> >>>>>>>>      /**
> >>>>>>>
> >>>>>
> >>
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2025-03-04 17:10 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-12 17:17 [PATCHv2 net-next] openvswitch: switch to per-action label counting in conntrack Xin Long
2024-08-13 17:57 ` Aaron Conole
2024-08-13 18:58 ` Florian Westphal
2024-08-16  2:20 ` patchwork-bot+netdevbpf
2025-02-24  9:01 ` Jianbo Liu
2025-02-24 19:55   ` Xin Long
2025-02-25  1:38     ` Jianbo Liu
2025-02-25 14:57       ` Xin Long
2025-03-02 18:22         ` Xin Long
2025-03-03  2:14           ` Jianbo Liu
2025-03-03 16:42             ` Xin Long
2025-03-04  1:20               ` Jianbo Liu
2025-03-04 17:10                 ` Xin Long
2025-03-03 10:56           ` Ilya Maximets
2025-03-03 16:38             ` Xin Long
2025-03-04 13:23               ` Ilya Maximets
2025-03-04 15:21                 ` Xin Long

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).