All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] netfilter: conntrack: fix wrong ct->timeout value
@ 2023-04-10  6:09 Tzung-Bi Shih
  2023-04-10  8:33 ` Pablo Neira Ayuso
  0 siblings, 1 reply; 12+ messages in thread
From: Tzung-Bi Shih @ 2023-04-10  6:09 UTC (permalink / raw)
  To: pablo, kadlec, fw
  Cc: netfilter-devel, coreteam, tzungbi, jiejiang, jasongustaman,
	garrick

(struct nf_conn)->timeout is an interval before the conntrack
confirmed.  After confirmed, it becomes a timestamp[1].

It is observed that timeout of an unconfirmed conntrack have been
altered by calling ctnetlink_change_timeout().  As a result,
`nfct_time_stamp` was wrongly added to `ct->timeout` twice[2].

Differentiate the 2 cases in all `ct->timeout` accesses.

[1]: https://elixir.bootlin.com/linux/v6.3-rc5/source/net/netfilter/nf_conntrack_core.c#L1257
[2]: https://elixir.bootlin.com/linux/v6.3-rc5/source/include/net/netfilter/nf_conntrack_core.h#L92

Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 include/net/netfilter/nf_conntrack.h      | 27 +++++++++++++++++++----
 include/net/netfilter/nf_conntrack_core.h |  2 +-
 net/netfilter/nf_conntrack_core.c         | 11 ++++-----
 net/netfilter/nf_conntrack_proto_tcp.c    |  4 ++--
 net/netfilter/nf_flow_table_core.c        |  4 ++--
 5 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index a72028dbef0c..48e020db3fb3 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -290,17 +290,36 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
 
 #define nfct_time_stamp ((u32)(jiffies))
 
+static inline s32 nf_ct_read_timeout(const struct nf_conn *ct)
+{
+	s32 timeout;
+
+	if (nf_ct_is_confirmed(ct))
+		timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
+	else
+		timeout = READ_ONCE(ct->timeout);
+
+	return timeout;
+}
+
+static inline void nf_ct_write_timeout(struct nf_conn *ct, s32 timeout)
+{
+	if (nf_ct_is_confirmed(ct))
+		WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
+	else
+		WRITE_ONCE(ct->timeout, timeout);
+}
+
 /* jiffies until ct expires, 0 if already expired */
 static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
 {
-	s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
-
+	s32 timeout = nf_ct_read_timeout(ct);
 	return max(timeout, 0);
 }
 
 static inline bool nf_ct_is_expired(const struct nf_conn *ct)
 {
-	return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0;
+	return nf_ct_expires(ct) == 0;
 }
 
 /* use after obtaining a reference count */
@@ -319,7 +338,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
 static inline void nf_ct_offload_timeout(struct nf_conn *ct)
 {
 	if (nf_ct_expires(ct) < NF_CT_DAY / 2)
-		WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
+		nf_ct_write_timeout(ct, NF_CT_DAY);
 }
 
 struct kernel_param;
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 71d1269fe4d4..7a6e49a66d20 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -89,7 +89,7 @@ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
 {
 	if (timeout > INT_MAX)
 		timeout = INT_MAX;
-	WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
+	nf_ct_write_timeout(ct, (u32)timeout);
 }
 
 int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index db1ea361f2da..47166576c195 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -657,7 +657,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
 
 	tstamp = nf_conn_tstamp_find(ct);
 	if (tstamp) {
-		s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
+		s32 timeout = nf_ct_read_timeout(ct);
 
 		tstamp->stop = ktime_get_real_ns();
 		if (timeout < 0)
@@ -1063,7 +1063,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
 	}
 
 	/* We want the clashing entry to go away real soon: 1 second timeout. */
-	WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ);
+	nf_ct_write_timeout(loser_ct, HZ);
 
 	/* IPS_NAT_CLASH removes the entry automatically on the first
 	 * reply.  Also prevents UDP tracker from moving the entry to
@@ -2079,11 +2079,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 		goto acct;
 
 	/* If not in hash table, timer will not be active yet */
-	if (nf_ct_is_confirmed(ct))
-		extra_jiffies += nfct_time_stamp;
-
-	if (READ_ONCE(ct->timeout) != extra_jiffies)
-		WRITE_ONCE(ct->timeout, extra_jiffies);
+	if (nf_ct_read_timeout(ct) != extra_jiffies)
+		nf_ct_write_timeout(ct, extra_jiffies);
 acct:
 	if (do_acct)
 		nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 4018acb1d674..a2797d026943 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -766,7 +766,7 @@ static void __cold nf_tcp_handle_invalid(struct nf_conn *ct,
 					  "packet (index %d, dir %d) response for index %d lower timeout to %u",
 					  index, dir, ct->proto.tcp.last_index, timeout);
 
-			WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
+			nf_ct_write_timeout(ct, timeout);
 		}
 	} else {
 		ct->proto.tcp.last_index = index;
@@ -939,7 +939,7 @@ void nf_conntrack_tcp_set_closing(struct nf_conn *ct)
 	}
 
 	timeout = timeouts[TCP_CONNTRACK_CLOSE];
-	WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
+	nf_ct_write_timeout(ct, timeout);
 
 	spin_unlock_bh(&ct->lock);
 
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 04bd0ed4d2ae..113bd361e537 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -206,8 +206,8 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
 	if (timeout < 0)
 		timeout = 0;
 
-	if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
-		WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
+	if (nf_flow_timeout_delta(nf_ct_read_timeout(ct)) > (__s32)timeout)
+		nf_ct_write_timeout(ct, timeout);
 }
 
 static void flow_offload_route_release(struct flow_offload *flow)
-- 
2.40.0.577.gac1e443424-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
  2023-04-10  6:09 [PATCH] netfilter: conntrack: fix wrong ct->timeout value Tzung-Bi Shih
@ 2023-04-10  8:33 ` Pablo Neira Ayuso
  2023-04-10  9:31   ` Pablo Neira Ayuso
  0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2023-04-10  8:33 UTC (permalink / raw)
  To: Tzung-Bi Shih
  Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
	garrick

Hi,

On Mon, Apr 10, 2023 at 02:09:35PM +0800, Tzung-Bi Shih wrote:
> (struct nf_conn)->timeout is an interval before the conntrack
> confirmed.  After confirmed, it becomes a timestamp[1].
> 
> It is observed that timeout of an unconfirmed conntrack have been
> altered by calling ctnetlink_change_timeout().  As a result,
> `nfct_time_stamp` was wrongly added to `ct->timeout` twice[2].
> 
> Differentiate the 2 cases in all `ct->timeout` accesses.

You can just skip refreshing the timeout for unconfirmed conntrack
entries in ctnetlink_change_timeout().

> [1]: https://elixir.bootlin.com/linux/v6.3-rc5/source/net/netfilter/nf_conntrack_core.c#L1257
> [2]: https://elixir.bootlin.com/linux/v6.3-rc5/source/include/net/netfilter/nf_conntrack_core.h#L92
> 
> Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
> ---
>  include/net/netfilter/nf_conntrack.h      | 27 +++++++++++++++++++----
>  include/net/netfilter/nf_conntrack_core.h |  2 +-
>  net/netfilter/nf_conntrack_core.c         | 11 ++++-----
>  net/netfilter/nf_conntrack_proto_tcp.c    |  4 ++--
>  net/netfilter/nf_flow_table_core.c        |  4 ++--
>  5 files changed, 32 insertions(+), 16 deletions(-)
> 
> diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
> index a72028dbef0c..48e020db3fb3 100644
> --- a/include/net/netfilter/nf_conntrack.h
> +++ b/include/net/netfilter/nf_conntrack.h
> @@ -290,17 +290,36 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
>  
>  #define nfct_time_stamp ((u32)(jiffies))
>  
> +static inline s32 nf_ct_read_timeout(const struct nf_conn *ct)
> +{
> +	s32 timeout;
> +
> +	if (nf_ct_is_confirmed(ct))
> +		timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
> +	else
> +		timeout = READ_ONCE(ct->timeout);
> +
> +	return timeout;
> +}
> +
> +static inline void nf_ct_write_timeout(struct nf_conn *ct, s32 timeout)
> +{
> +	if (nf_ct_is_confirmed(ct))
> +		WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
> +	else
> +		WRITE_ONCE(ct->timeout, timeout);
> +}
> +
>  /* jiffies until ct expires, 0 if already expired */
>  static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
>  {
> -	s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
> -
> +	s32 timeout = nf_ct_read_timeout(ct);
>  	return max(timeout, 0);
>  }
>  
>  static inline bool nf_ct_is_expired(const struct nf_conn *ct)
>  {
> -	return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0;
> +	return nf_ct_expires(ct) == 0;
>  }
>  
>  /* use after obtaining a reference count */
> @@ -319,7 +338,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
>  static inline void nf_ct_offload_timeout(struct nf_conn *ct)
>  {
>  	if (nf_ct_expires(ct) < NF_CT_DAY / 2)
> -		WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
> +		nf_ct_write_timeout(ct, NF_CT_DAY);
>  }
>  
>  struct kernel_param;
> diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
> index 71d1269fe4d4..7a6e49a66d20 100644
> --- a/include/net/netfilter/nf_conntrack_core.h
> +++ b/include/net/netfilter/nf_conntrack_core.h
> @@ -89,7 +89,7 @@ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
>  {
>  	if (timeout > INT_MAX)
>  		timeout = INT_MAX;
> -	WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
> +	nf_ct_write_timeout(ct, (u32)timeout);
>  }
>  
>  int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout);
> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
> index db1ea361f2da..47166576c195 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -657,7 +657,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
>  
>  	tstamp = nf_conn_tstamp_find(ct);
>  	if (tstamp) {
> -		s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
> +		s32 timeout = nf_ct_read_timeout(ct);
>  
>  		tstamp->stop = ktime_get_real_ns();
>  		if (timeout < 0)
> @@ -1063,7 +1063,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
>  	}
>  
>  	/* We want the clashing entry to go away real soon: 1 second timeout. */
> -	WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ);
> +	nf_ct_write_timeout(loser_ct, HZ);
>  
>  	/* IPS_NAT_CLASH removes the entry automatically on the first
>  	 * reply.  Also prevents UDP tracker from moving the entry to
> @@ -2079,11 +2079,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
>  		goto acct;
>  
>  	/* If not in hash table, timer will not be active yet */
> -	if (nf_ct_is_confirmed(ct))
> -		extra_jiffies += nfct_time_stamp;
> -
> -	if (READ_ONCE(ct->timeout) != extra_jiffies)
> -		WRITE_ONCE(ct->timeout, extra_jiffies);
> +	if (nf_ct_read_timeout(ct) != extra_jiffies)
> +		nf_ct_write_timeout(ct, extra_jiffies);
>  acct:
>  	if (do_acct)
>  		nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
> diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
> index 4018acb1d674..a2797d026943 100644
> --- a/net/netfilter/nf_conntrack_proto_tcp.c
> +++ b/net/netfilter/nf_conntrack_proto_tcp.c
> @@ -766,7 +766,7 @@ static void __cold nf_tcp_handle_invalid(struct nf_conn *ct,
>  					  "packet (index %d, dir %d) response for index %d lower timeout to %u",
>  					  index, dir, ct->proto.tcp.last_index, timeout);
>  
> -			WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
> +			nf_ct_write_timeout(ct, timeout);
>  		}
>  	} else {
>  		ct->proto.tcp.last_index = index;
> @@ -939,7 +939,7 @@ void nf_conntrack_tcp_set_closing(struct nf_conn *ct)
>  	}
>  
>  	timeout = timeouts[TCP_CONNTRACK_CLOSE];
> -	WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
> +	nf_ct_write_timeout(ct, timeout);
>  
>  	spin_unlock_bh(&ct->lock);
>  
> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
> index 04bd0ed4d2ae..113bd361e537 100644
> --- a/net/netfilter/nf_flow_table_core.c
> +++ b/net/netfilter/nf_flow_table_core.c
> @@ -206,8 +206,8 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
>  	if (timeout < 0)
>  		timeout = 0;
>  
> -	if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
> -		WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
> +	if (nf_flow_timeout_delta(nf_ct_read_timeout(ct)) > (__s32)timeout)
> +		nf_ct_write_timeout(ct, timeout);
>  }
>  
>  static void flow_offload_route_release(struct flow_offload *flow)
> -- 
> 2.40.0.577.gac1e443424-goog
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
  2023-04-10  8:33 ` Pablo Neira Ayuso
@ 2023-04-10  9:31   ` Pablo Neira Ayuso
  2023-04-10  9:59     ` Tzung-Bi Shih
  0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2023-04-10  9:31 UTC (permalink / raw)
  To: Tzung-Bi Shih
  Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
	garrick

[-- Attachment #1: Type: text/plain, Size: 690 bytes --]

On Mon, Apr 10, 2023 at 10:33:32AM +0200, Pablo Neira Ayuso wrote:
> Hi,
> 
> On Mon, Apr 10, 2023 at 02:09:35PM +0800, Tzung-Bi Shih wrote:
> > (struct nf_conn)->timeout is an interval before the conntrack
> > confirmed.  After confirmed, it becomes a timestamp[1].
> > 
> > It is observed that timeout of an unconfirmed conntrack have been
> > altered by calling ctnetlink_change_timeout().  As a result,
> > `nfct_time_stamp` was wrongly added to `ct->timeout` twice[2].
> > 
> > Differentiate the 2 cases in all `ct->timeout` accesses.
> 
> You can just skip refreshing the timeout for unconfirmed conntrack
> entries in ctnetlink_change_timeout().

Something like this patch probably?

[-- Attachment #2: x.patch --]
[-- Type: text/x-diff, Size: 596 bytes --]

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index bfc3aaa2c872..6556f5f30844 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2466,7 +2466,8 @@ static int ctnetlink_new_conntrack(struct sk_buff *skb,
 
 	err = -EEXIST;
 	ct = nf_ct_tuplehash_to_ctrack(h);
-	if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) {
+	if (!(info->nlh->nlmsg_flags & NLM_F_EXCL) &&
+	    nf_ct_is_confirmed(ct)) {
 		err = ctnetlink_change_conntrack(ct, cda);
 		if (err == 0) {
 			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
  2023-04-10  9:31   ` Pablo Neira Ayuso
@ 2023-04-10  9:59     ` Tzung-Bi Shih
  2023-04-12 22:56       ` Pablo Neira Ayuso
  0 siblings, 1 reply; 12+ messages in thread
From: Tzung-Bi Shih @ 2023-04-10  9:59 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
	garrick

On Mon, Apr 10, 2023 at 11:31:21AM +0200, Pablo Neira Ayuso wrote:
> On Mon, Apr 10, 2023 at 10:33:32AM +0200, Pablo Neira Ayuso wrote:
> > On Mon, Apr 10, 2023 at 02:09:35PM +0800, Tzung-Bi Shih wrote:
> > > (struct nf_conn)->timeout is an interval before the conntrack
> > > confirmed.  After confirmed, it becomes a timestamp[1].
> > > 
> > > It is observed that timeout of an unconfirmed conntrack have been
> > > altered by calling ctnetlink_change_timeout().  As a result,
> > > `nfct_time_stamp` was wrongly added to `ct->timeout` twice[2].
> > > 
> > > Differentiate the 2 cases in all `ct->timeout` accesses.
> > 
> > You can just skip refreshing the timeout for unconfirmed conntrack
> > entries in ctnetlink_change_timeout().
> 
> Something like this patch probably?

Pardon me, I sent a v2[3] before seeing the message.

[3]: https://lore.kernel.org/netfilter-devel/20230410093454.853575-1-tzungbi@kernel.org/T/#u

> diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
> index bfc3aaa2c872..6556f5f30844 100644
> --- a/net/netfilter/nf_conntrack_netlink.c
> +++ b/net/netfilter/nf_conntrack_netlink.c
> @@ -2466,7 +2466,8 @@ static int ctnetlink_new_conntrack(struct sk_buff *skb,
>  
>  	err = -EEXIST;
>  	ct = nf_ct_tuplehash_to_ctrack(h);
> -	if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) {
> +	if (!(info->nlh->nlmsg_flags & NLM_F_EXCL) &&
> +	    nf_ct_is_confirmed(ct)) {
>  		err = ctnetlink_change_conntrack(ct, cda);
>  		if (err == 0) {
>  			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |

The patch can't fix the issue we observed.

Here is the calling stack:
  ctnetlink_glue_parse
  [...]
  __sys_sendto
  __x64_sys_sendto
  [...]

It was on another path:
ctnetlink_glue_parse_ct() -> ctnetlink_change_timeout().

I guess we should skip it in ctnetlink_change_timeout().  Something like v2.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
  2023-04-10  9:59     ` Tzung-Bi Shih
@ 2023-04-12 22:56       ` Pablo Neira Ayuso
  2023-04-13  3:23         ` Tzung-Bi Shih
  0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2023-04-12 22:56 UTC (permalink / raw)
  To: Tzung-Bi Shih
  Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
	garrick

On Mon, Apr 10, 2023 at 05:59:54PM +0800, Tzung-Bi Shih wrote:
> On Mon, Apr 10, 2023 at 11:31:21AM +0200, Pablo Neira Ayuso wrote:
> > On Mon, Apr 10, 2023 at 10:33:32AM +0200, Pablo Neira Ayuso wrote:
> > > On Mon, Apr 10, 2023 at 02:09:35PM +0800, Tzung-Bi Shih wrote:
> > > > (struct nf_conn)->timeout is an interval before the conntrack
> > > > confirmed.  After confirmed, it becomes a timestamp[1].
> > > > 
> > > > It is observed that timeout of an unconfirmed conntrack have been
> > > > altered by calling ctnetlink_change_timeout().  As a result,
> > > > `nfct_time_stamp` was wrongly added to `ct->timeout` twice[2].
> > > > 
> > > > Differentiate the 2 cases in all `ct->timeout` accesses.
> > > 
> > > You can just skip refreshing the timeout for unconfirmed conntrack
> > > entries in ctnetlink_change_timeout().
> > 
> > Something like this patch probably?
> 
> Pardon me, I sent a v2[3] before seeing the message.
> 
> [3]: https://lore.kernel.org/netfilter-devel/20230410093454.853575-1-tzungbi@kernel.org/T/#u
> 
> > diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
> > index bfc3aaa2c872..6556f5f30844 100644
> > --- a/net/netfilter/nf_conntrack_netlink.c
> > +++ b/net/netfilter/nf_conntrack_netlink.c
> > @@ -2466,7 +2466,8 @@ static int ctnetlink_new_conntrack(struct sk_buff *skb,
> >  
> >  	err = -EEXIST;
> >  	ct = nf_ct_tuplehash_to_ctrack(h);
> > -	if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) {
> > +	if (!(info->nlh->nlmsg_flags & NLM_F_EXCL) &&
> > +	    nf_ct_is_confirmed(ct)) {
> >  		err = ctnetlink_change_conntrack(ct, cda);
> >  		if (err == 0) {
> >  			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
> 
> The patch can't fix the issue we observed.
> 
> Here is the calling stack:
>   ctnetlink_glue_parse
>   [...]
>   __sys_sendto
>   __x64_sys_sendto
>   [...]

I see. So this is from nfqueue path, now I understand better, thanks.

Maybe just do this special handling:

+       if (nf_ct_is_confirmed(ct))
+               WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
+       else
+               WRITE_ONCE(ct->timeout, timeout);

for ctnetlink_change_timeout().

Just replace __nf_ct_set_timeout(), by this code above in
nf_conntrack_netlink.c? I think the __nf_ct_set_timeout() helper is
not very useful.

Better not to cripple features, even if this was broken :-).

Thanks.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
  2023-04-12 22:56       ` Pablo Neira Ayuso
@ 2023-04-13  3:23         ` Tzung-Bi Shih
  2023-04-13  9:04           ` Pablo Neira Ayuso
  0 siblings, 1 reply; 12+ messages in thread
From: Tzung-Bi Shih @ 2023-04-13  3:23 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
	garrick

On Thu, Apr 13, 2023 at 12:56:01AM +0200, Pablo Neira Ayuso wrote:
> Maybe just do this special handling:
> 
> +       if (nf_ct_is_confirmed(ct))
> +               WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
> +       else
> +               WRITE_ONCE(ct->timeout, timeout);
> 
> for ctnetlink_change_timeout().
> 
> Just replace __nf_ct_set_timeout(), by this code above in
> nf_conntrack_netlink.c? I think the __nf_ct_set_timeout() helper is
> not very useful.

I don't quite understand the message above.

Calling path in v6.3-rc6:
ctnetlink_change_timeout() in net/netfilter/nf_conntrack_netlink.c
-> __nf_ct_change_timeout() in net/netfilter/nf_conntrack_core.c
-> __nf_ct_set_timeout() in include/net/netfilter/nf_conntrack_core.h

To clarify, which one did you mean:

Option 1: replace the __nf_ct_change_timeout() invocation to the special
          handling in net/netfilter/nf_conntrack_netlink.c
Option 2: replace the __nf_ct_set_timeout() invocation to the special
          handling in net/netfilter/nf_conntrack_core.c
Option 3: put the special handling in __nf_ct_set_timeout() in
          include/net/netfilter/nf_conntrack_core.h

In either case, the fix would be a subset of v1.

I'm not sure other use cases.  In our environment, we observed an
inconsistent state by a partial fix of v1.  nf_ct_expires() got called
by userspace program.  And the return value (which means the remaining
timeout) will be the parameter for the next ctnetlink_change_timeout().
As you can see in [4], if this happens on an unconfirmed conntrack, the
`nfct_time_stamp` would be wrongly invoved in the calculation again.
That's why we take care of all `ct->timeout` accesses in v1.

[4]: https://elixir.bootlin.com/linux/v6.3-rc6/source/include/net/netfilter/nf_conntrack.h#L296

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
  2023-04-13  3:23         ` Tzung-Bi Shih
@ 2023-04-13  9:04           ` Pablo Neira Ayuso
  2023-04-14  3:52             ` Tzung-Bi Shih
  0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2023-04-13  9:04 UTC (permalink / raw)
  To: Tzung-Bi Shih
  Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
	garrick

On Thu, Apr 13, 2023 at 11:23:11AM +0800, Tzung-Bi Shih wrote:
> On Thu, Apr 13, 2023 at 12:56:01AM +0200, Pablo Neira Ayuso wrote:
> > Maybe just do this special handling:
> > 
> > +       if (nf_ct_is_confirmed(ct))
> > +               WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
> > +       else
> > +               WRITE_ONCE(ct->timeout, timeout);
> > 
> > for ctnetlink_change_timeout().
> > 
> > Just replace __nf_ct_set_timeout(), by this code above in
> > nf_conntrack_netlink.c? I think the __nf_ct_set_timeout() helper is
> > not very useful.
> 
> I don't quite understand the message above.
> 
> Calling path in v6.3-rc6:
> ctnetlink_change_timeout() in net/netfilter/nf_conntrack_netlink.c
> -> __nf_ct_change_timeout() in net/netfilter/nf_conntrack_core.c
> -> __nf_ct_set_timeout() in include/net/netfilter/nf_conntrack_core.h
> 
> To clarify, which one did you mean:
> 
> Option 1: replace the __nf_ct_change_timeout() invocation to the special
>           handling in net/netfilter/nf_conntrack_netlink.c
> Option 2: replace the __nf_ct_set_timeout() invocation to the special
>           handling in net/netfilter/nf_conntrack_core.c
> Option 3: put the special handling in __nf_ct_set_timeout() in
>           include/net/netfilter/nf_conntrack_core.h
> 
> In either case, the fix would be a subset of v1.

Yes, I think this is Option 3:

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 71d1269fe4d4..9c2cd69bbdc6 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -89,7 +89,11 @@ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
 {
        if (timeout > INT_MAX)
                timeout = INT_MAX;
-       WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
+
+       if (nf_ct_is_confirmed(ct))
+               WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout;
+       else
+               ct->timeout = (u32)timeout;
 }
 
 int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout);

Note:

                WRITE_ONCE(ct->timeout, (u32)timeout);

is not required, because unconfirmed conntrack object is owned by the
packet (not yet in the hashes).


BTW, not related to this patch, but I would like to understand why
this __nf_ct_set_timeout() function is inline, but that is a different
issue.

> I'm not sure other use cases.  In our environment, we observed an
> inconsistent state by a partial fix of v1. 

Thanks for explaining, extending patch description would be good.

> nf_ct_expires() got called by userspace program.  And the return
> value (which means the remaining timeout) will be the parameter for
> the next ctnetlink_change_timeout().

Unconfirmed conntrack is owned by the packet that refers to it, it is
not yet in the hashes. I don't see how concurrent access to the
timeout might occur.

Or are you referring to a different scenario that triggers the partial
state?

> As you can see in [4], if this happens on an unconfirmed conntrack, the
> `nfct_time_stamp` would be wrongly invoved in the calculation again.
> That's why we take care of all `ct->timeout` accesses in v1.

If you are observing a partial state, that is a different issue and I
think it deserves a separated patch with a description? Probably
including KCSAN splat if this is what you used to catch the partial
state.

Thanks!

> [4]: https://elixir.bootlin.com/linux/v6.3-rc6/source/include/net/netfilter/nf_conntrack.h#L296

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
  2023-04-13  9:04           ` Pablo Neira Ayuso
@ 2023-04-14  3:52             ` Tzung-Bi Shih
  2023-04-14  8:12               ` Pablo Neira Ayuso
  0 siblings, 1 reply; 12+ messages in thread
From: Tzung-Bi Shih @ 2023-04-14  3:52 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
	garrick

On Thu, Apr 13, 2023 at 11:04:24AM +0200, Pablo Neira Ayuso wrote:
> On Thu, Apr 13, 2023 at 11:23:11AM +0800, Tzung-Bi Shih wrote:
> > nf_ct_expires() got called by userspace program.  And the return
> > value (which means the remaining timeout) will be the parameter for
> > the next ctnetlink_change_timeout().
> 
> Unconfirmed conntrack is owned by the packet that refers to it, it is
> not yet in the hashes. I don't see how concurrent access to the
> timeout might occur.
> 
> Or are you referring to a different scenario that triggers the partial
> state?

I think it isn't a concurrent access.  We observed that userspace program
reads the remaining timeout and sets it back for unconfirmed conntrack.

Conceptually, it looks like:
timeout = nf_ct_expires(...);  (1)
ctnetlink_change_timeout(...timeout);

At (1), `nfct_time_stamp` is wrongly involved in the calculation[4] because
the conntrack is unconfirmed.  The `ct->timeout` is an internal but not a
timestamp.

As a result, ctnetlink_change_timeout() sets the wrong value to `ct->timeout`.

[4]: https://elixir.bootlin.com/linux/v6.3-rc6/source/include/net/netfilter/nf_conntrack.h#L296

> > As you can see in [4], if this happens on an unconfirmed conntrack, the
> > `nfct_time_stamp` would be wrongly invoved in the calculation again.
> > That's why we take care of all `ct->timeout` accesses in v1.

Again, that's why v1 separates all `ct->timeout` accesses.

If the conntrack is confirmed:
- `ct->timeout` is a timestamp.
- `nfct_time_stamp` should be involved when getting/setting `ct->timeout`.

If the conntrack is unconfirmed:
- `ct->timeout` is an interval.
- `nfct_time_stamp` shouldn't be involved.

Only separate `ct->timeout` access in __nf_ct_set_timeout() is obviously
insufficient.  I would suggest either take care of all `ct->timeout`
accesses as v1 or at least change both __nf_ct_set_timeout() and
nf_ct_expires().

In the latter case, it looks fine in our environment.  However, I'm not
sure if any hidden cases we haven't seen.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
  2023-04-14  3:52             ` Tzung-Bi Shih
@ 2023-04-14  8:12               ` Pablo Neira Ayuso
  2023-04-17  3:41                 ` Tzung-Bi Shih
  0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2023-04-14  8:12 UTC (permalink / raw)
  To: Tzung-Bi Shih
  Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
	garrick

On Fri, Apr 14, 2023 at 11:52:02AM +0800, Tzung-Bi Shih wrote:
> On Thu, Apr 13, 2023 at 11:04:24AM +0200, Pablo Neira Ayuso wrote:
> > On Thu, Apr 13, 2023 at 11:23:11AM +0800, Tzung-Bi Shih wrote:
> > > nf_ct_expires() got called by userspace program.  And the return
> > > value (which means the remaining timeout) will be the parameter for
> > > the next ctnetlink_change_timeout().
> > 
> > Unconfirmed conntrack is owned by the packet that refers to it, it is
> > not yet in the hashes. I don't see how concurrent access to the
> > timeout might occur.
> > 
> > Or are you referring to a different scenario that triggers the partial
> > state?
> 
> I think it isn't a concurrent access.  We observed that userspace program
> reads the remaining timeout and sets it back for unconfirmed conntrack.
> 
> Conceptually, it looks like:
> timeout = nf_ct_expires(...);  (1)
> ctnetlink_change_timeout(...timeout);

How could this possibly happen?

nf_ct_expires() is called from:

- ctnetlink_dump_timeout(), this is netlink dump path, since:

commit 1397af5bfd7d32b0cf2adb70a78c9a9e8f11d912
Author: Florian Westphal <fw@strlen.de>
Date:   Mon Apr 11 13:01:18 2022 +0200

    netfilter: conntrack: remove the percpu dying list

it is not possible to do any listing of unconfirmed, and that is fine.

As I said, nfnetlink_queue operates with an unconfirmed conntrack with
owns exclusively, which is not in the hashes.

- nf_ct_expires() calls from xt and nft matches are irrelevant:

net/netfilter/nft_ct.c:         *dest = jiffies_to_msecs(nf_ct_expires(ct));
net/netfilter/xt_conntrack.c:           unsigned long expires = nf_ct_expires(ct) / HZ;

- there is the garbage collector, but that only works with conntrack
  entries in the hashes:

net/netfilter/nf_conntrack_core.c:                      expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);

- there is also /proc interface, but that only works with conntrack
  entries in the hashes:

net/netfilter/nf_conntrack_standalone.c:                seq_printf(s, "%ld ", nf_ct_expires(ct)  / HZ);

> At (1), `nfct_time_stamp` is wrongly involved in the calculation[4] because
> the conntrack is unconfirmed.  The `ct->timeout` is an internal but not a
> timestamp.

I can see there are two possible states for ct->timeout, thanks for
explaining this again.

> As a result, ctnetlink_change_timeout() sets the wrong value to `ct->timeout`.
> 
> [4]: https://elixir.bootlin.com/linux/v6.3-rc6/source/include/net/netfilter/nf_conntrack.h#L296
> 
> > > As you can see in [4], if this happens on an unconfirmed conntrack, the
> > > `nfct_time_stamp` would be wrongly invoved in the calculation again.
> > > That's why we take care of all `ct->timeout` accesses in v1.
> 
> Again, that's why v1 separates all `ct->timeout` accesses.
> 
> If the conntrack is confirmed:
> - `ct->timeout` is a timestamp.
> - `nfct_time_stamp` should be involved when getting/setting `ct->timeout`.
> 
> If the conntrack is unconfirmed:
> - `ct->timeout` is an interval.
> - `nfct_time_stamp` shouldn't be involved.
> 
> Only separate `ct->timeout` access in __nf_ct_set_timeout() is obviously
> insufficient.  I would suggest either take care of all `ct->timeout`
> accesses as v1 or at least change both __nf_ct_set_timeout() and
> nf_ct_expires().

it does not even make sense to use WRITE_ONCE from
__nf_conntrack_confirm(), see:

https://elixir.bootlin.com/linux/latest/source/net/netfilter/nf_conntrack_core.c#L1260

because the unconfirmed conntrack object is owned exclusively by the packet.

> In the latter case, it looks fine in our environment.  However, I'm not
> sure if any hidden cases we haven't seen.

Maybe you have an old kernel?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
  2023-04-14  8:12               ` Pablo Neira Ayuso
@ 2023-04-17  3:41                 ` Tzung-Bi Shih
  2023-04-18  8:17                   ` Pablo Neira Ayuso
  0 siblings, 1 reply; 12+ messages in thread
From: Tzung-Bi Shih @ 2023-04-17  3:41 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
	garrick

On Fri, Apr 14, 2023 at 10:12:14AM +0200, Pablo Neira Ayuso wrote:
> On Fri, Apr 14, 2023 at 11:52:02AM +0800, Tzung-Bi Shih wrote:
> > On Thu, Apr 13, 2023 at 11:04:24AM +0200, Pablo Neira Ayuso wrote:
> > > On Thu, Apr 13, 2023 at 11:23:11AM +0800, Tzung-Bi Shih wrote:
> > > > nf_ct_expires() got called by userspace program.  And the return
> > > > value (which means the remaining timeout) will be the parameter for
> > > > the next ctnetlink_change_timeout().
> > > 
> > > Unconfirmed conntrack is owned by the packet that refers to it, it is
> > > not yet in the hashes. I don't see how concurrent access to the
> > > timeout might occur.
> > > 
> > > Or are you referring to a different scenario that triggers the partial
> > > state?
> > 
> > I think it isn't a concurrent access.  We observed that userspace program
> > reads the remaining timeout and sets it back for unconfirmed conntrack.
> > 
> > Conceptually, it looks like:
> > timeout = nf_ct_expires(...);  (1)
> > ctnetlink_change_timeout(...timeout);
> 
> How could this possibly happen?
> 
> nf_ct_expires() is called from:
> 
> - ctnetlink_dump_timeout(), this is netlink dump path, since:
> 
> commit 1397af5bfd7d32b0cf2adb70a78c9a9e8f11d912
> Author: Florian Westphal <fw@strlen.de>
> Date:   Mon Apr 11 13:01:18 2022 +0200
> 
>     netfilter: conntrack: remove the percpu dying list
> 
> it is not possible to do any listing of unconfirmed, and that is fine.
> 
> As I said, nfnetlink_queue operates with an unconfirmed conntrack with
> owns exclusively, which is not in the hashes.

I applied the following patches on top of v6.3-rc5[5].

As you suggested:

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 71d1269fe4d4..3384859a8921 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -89,7 +89,11 @@ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
 {
        if (timeout > INT_MAX)
                timeout = INT_MAX;
-       WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
+
+       if (nf_ct_is_confirmed(ct))
+               WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
+       else
+               ct->timeout = (u32)timeout;
 }
 
And this patch for dumping the debug information:

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index bfc3aaa2c872..b4c016ff07e9 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -178,6 +178,11 @@ static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct,
 {
        long timeout = nf_ct_expires(ct) / HZ;
 
+       if (!nf_ct_is_confirmed(ct)) {
+               dump_stack();
+               printk(KERN_ERR "===ct->timeout=%u===timeout=%ld\n", ct->timeout, timeout);
+       }
+
        if (skip_zero && timeout == 0)
                return 0;

And here is the trimmed dmesg:

===ct->timeout=30000===timeout=0
[...] 6.3.0-rc5-00675-ged0c7cbf5b2c [...]
Call Trace:
 <TASK>
 dump_stack_lvl
 ctnetlink_dump_timeout
 __ctnetlink_glue_build
 ctnetlink_glue_build
 __nfqnl_enqueue_packet
 nf_queue
 nf_hook_slow
 ip_mc_output
 ? __pfx_ip_finish_output
 ip_send_skb
 ? __pfx_dst_output
 udp_send_skb
 udp_sendmsg
 ? __pfx_ip_generic_getfrag
 sock_sendmsg

As the dmesg showed, the unconfirmed conntrack did call into
ctnetlink_dump_timeout().  As a result, the value returned from
nf_ct_expires() is always 0 in the case (because the `ct->timeout` got
subtracted by current timestamp `nfct_time_stamp`[4]).

[4]: https://elixir.bootlin.com/linux/v6.3-rc6/source/include/net/netfilter/nf_conntrack.h#L296
[5]: https://chromium.googlesource.com/chromiumos/third_party/kernel/+/refs/heads/merge/continuous/chromeos-kernelupstream-6.3-rc5

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
  2023-04-17  3:41                 ` Tzung-Bi Shih
@ 2023-04-18  8:17                   ` Pablo Neira Ayuso
  2023-04-19  5:20                     ` Tzung-Bi Shih
  0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2023-04-18  8:17 UTC (permalink / raw)
  To: Tzung-Bi Shih
  Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
	garrick

Hi,

On Mon, Apr 17, 2023 at 11:41:41AM +0800, Tzung-Bi Shih wrote:
> On Fri, Apr 14, 2023 at 10:12:14AM +0200, Pablo Neira Ayuso wrote:
> > On Fri, Apr 14, 2023 at 11:52:02AM +0800, Tzung-Bi Shih wrote:
> > > On Thu, Apr 13, 2023 at 11:04:24AM +0200, Pablo Neira Ayuso wrote:
> > > > On Thu, Apr 13, 2023 at 11:23:11AM +0800, Tzung-Bi Shih wrote:
> > > > > nf_ct_expires() got called by userspace program.  And the return
> > > > > value (which means the remaining timeout) will be the parameter for
> > > > > the next ctnetlink_change_timeout().
> > > > 
> > > > Unconfirmed conntrack is owned by the packet that refers to it, it is
> > > > not yet in the hashes. I don't see how concurrent access to the
> > > > timeout might occur.
> > > > 
> > > > Or are you referring to a different scenario that triggers the partial
> > > > state?
> > > 
> > > I think it isn't a concurrent access.  We observed that userspace program
> > > reads the remaining timeout and sets it back for unconfirmed conntrack.
> > > 
> > > Conceptually, it looks like:
> > > timeout = nf_ct_expires(...);  (1)
> > > ctnetlink_change_timeout(...timeout);
> > 
> > How could this possibly happen?
> > 
> > nf_ct_expires() is called from:
> > 
> > - ctnetlink_dump_timeout(), this is netlink dump path, since:
> > 
> > commit 1397af5bfd7d32b0cf2adb70a78c9a9e8f11d912
> > Author: Florian Westphal <fw@strlen.de>
> > Date:   Mon Apr 11 13:01:18 2022 +0200
> > 
> >     netfilter: conntrack: remove the percpu dying list
> > 
> > it is not possible to do any listing of unconfirmed, and that is fine.
> > 
> > As I said, nfnetlink_queue operates with an unconfirmed conntrack with
> > owns exclusively, which is not in the hashes.
> 
> I applied the following patches on top of v6.3-rc5[5].
> 
> As you suggested:
> 
> diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
> index 71d1269fe4d4..3384859a8921 100644
> --- a/include/net/netfilter/nf_conntrack_core.h
> +++ b/include/net/netfilter/nf_conntrack_core.h
> @@ -89,7 +89,11 @@ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
>  {
>         if (timeout > INT_MAX)
>                 timeout = INT_MAX;
> -       WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
> +
> +       if (nf_ct_is_confirmed(ct))
> +               WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
> +       else
> +               ct->timeout = (u32)timeout;
>  }
>  
> And this patch for dumping the debug information:
> 
> diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
> index bfc3aaa2c872..b4c016ff07e9 100644
> --- a/net/netfilter/nf_conntrack_netlink.c
> +++ b/net/netfilter/nf_conntrack_netlink.c
> @@ -178,6 +178,11 @@ static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct,
>  {
>         long timeout = nf_ct_expires(ct) / HZ;
>  
> +       if (!nf_ct_is_confirmed(ct)) {
> +               dump_stack();
> +               printk(KERN_ERR "===ct->timeout=%u===timeout=%ld\n", ct->timeout, timeout);
> +       }
> +
>         if (skip_zero && timeout == 0)
>                 return 0;
> 
> And here is the trimmed dmesg:
> 
> ===ct->timeout=30000===timeout=0
> [...] 6.3.0-rc5-00675-ged0c7cbf5b2c [...]
> Call Trace:
>  <TASK>
>  dump_stack_lvl
>  ctnetlink_dump_timeout
>  __ctnetlink_glue_build
>  ctnetlink_glue_build
>  __nfqnl_enqueue_packet
>  nf_queue
>  nf_hook_slow
>  ip_mc_output
>  ? __pfx_ip_finish_output
>  ip_send_skb
>  ? __pfx_dst_output
>  udp_send_skb
>  udp_sendmsg
>  ? __pfx_ip_generic_getfrag
>  sock_sendmsg
> 
> As the dmesg showed, the unconfirmed conntrack did call into
> ctnetlink_dump_timeout().  As a result, the value returned from
> nf_ct_expires() is always 0 in the case (because the `ct->timeout` got
> subtracted by current timestamp `nfct_time_stamp`[4]).

Oh, interesting.

Thanks for digging deeper into this issue.

Then, on top of what I suggest, my recommendation is to skip the
ctnetlink_dump_timeout() call for !nf_ct_confirmed(ct).

Or, you add some special handling for ctnetlink_dump_timeout() for the
!nf_ct_confirmed(ct) case.

Let me know, thanks.

> [4]: https://elixir.bootlin.com/linux/v6.3-rc6/source/include/net/netfilter/nf_conntrack.h#L296
> [5]: https://chromium.googlesource.com/chromiumos/third_party/kernel/+/refs/heads/merge/continuous/chromeos-kernelupstream-6.3-rc5

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
  2023-04-18  8:17                   ` Pablo Neira Ayuso
@ 2023-04-19  5:20                     ` Tzung-Bi Shih
  0 siblings, 0 replies; 12+ messages in thread
From: Tzung-Bi Shih @ 2023-04-19  5:20 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
	garrick

On Tue, Apr 18, 2023 at 10:17:32AM +0200, Pablo Neira Ayuso wrote:
> Then, on top of what I suggest, my recommendation is to skip the
> ctnetlink_dump_timeout() call for !nf_ct_confirmed(ct).
> 
> Or, you add some special handling for ctnetlink_dump_timeout() for the
> !nf_ct_confirmed(ct) case.

Sent a v3[6] for this and hope it makes more sense.

[6]: https://patchwork.ozlabs.org/project/netfilter-devel/patch/20230419051526.3170053-1-tzungbi@kernel.org/

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2023-04-19  5:20 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-04-10  6:09 [PATCH] netfilter: conntrack: fix wrong ct->timeout value Tzung-Bi Shih
2023-04-10  8:33 ` Pablo Neira Ayuso
2023-04-10  9:31   ` Pablo Neira Ayuso
2023-04-10  9:59     ` Tzung-Bi Shih
2023-04-12 22:56       ` Pablo Neira Ayuso
2023-04-13  3:23         ` Tzung-Bi Shih
2023-04-13  9:04           ` Pablo Neira Ayuso
2023-04-14  3:52             ` Tzung-Bi Shih
2023-04-14  8:12               ` Pablo Neira Ayuso
2023-04-17  3:41                 ` Tzung-Bi Shih
2023-04-18  8:17                   ` Pablo Neira Ayuso
2023-04-19  5:20                     ` Tzung-Bi Shih

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.