* [PATCH] netfilter: conntrack: fix wrong ct->timeout value
@ 2023-04-10 6:09 Tzung-Bi Shih
2023-04-10 8:33 ` Pablo Neira Ayuso
0 siblings, 1 reply; 12+ messages in thread
From: Tzung-Bi Shih @ 2023-04-10 6:09 UTC (permalink / raw)
To: pablo, kadlec, fw
Cc: netfilter-devel, coreteam, tzungbi, jiejiang, jasongustaman,
garrick
(struct nf_conn)->timeout is an interval before the conntrack
confirmed. After confirmed, it becomes a timestamp[1].
It is observed that timeout of an unconfirmed conntrack have been
altered by calling ctnetlink_change_timeout(). As a result,
`nfct_time_stamp` was wrongly added to `ct->timeout` twice[2].
Differentiate the 2 cases in all `ct->timeout` accesses.
[1]: https://elixir.bootlin.com/linux/v6.3-rc5/source/net/netfilter/nf_conntrack_core.c#L1257
[2]: https://elixir.bootlin.com/linux/v6.3-rc5/source/include/net/netfilter/nf_conntrack_core.h#L92
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
include/net/netfilter/nf_conntrack.h | 27 +++++++++++++++++++----
include/net/netfilter/nf_conntrack_core.h | 2 +-
net/netfilter/nf_conntrack_core.c | 11 ++++-----
net/netfilter/nf_conntrack_proto_tcp.c | 4 ++--
net/netfilter/nf_flow_table_core.c | 4 ++--
5 files changed, 32 insertions(+), 16 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index a72028dbef0c..48e020db3fb3 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -290,17 +290,36 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
#define nfct_time_stamp ((u32)(jiffies))
+static inline s32 nf_ct_read_timeout(const struct nf_conn *ct)
+{
+ s32 timeout;
+
+ if (nf_ct_is_confirmed(ct))
+ timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
+ else
+ timeout = READ_ONCE(ct->timeout);
+
+ return timeout;
+}
+
+static inline void nf_ct_write_timeout(struct nf_conn *ct, s32 timeout)
+{
+ if (nf_ct_is_confirmed(ct))
+ WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
+ else
+ WRITE_ONCE(ct->timeout, timeout);
+}
+
/* jiffies until ct expires, 0 if already expired */
static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
{
- s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
-
+ s32 timeout = nf_ct_read_timeout(ct);
return max(timeout, 0);
}
static inline bool nf_ct_is_expired(const struct nf_conn *ct)
{
- return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0;
+ return nf_ct_expires(ct) == 0;
}
/* use after obtaining a reference count */
@@ -319,7 +338,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
static inline void nf_ct_offload_timeout(struct nf_conn *ct)
{
if (nf_ct_expires(ct) < NF_CT_DAY / 2)
- WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
+ nf_ct_write_timeout(ct, NF_CT_DAY);
}
struct kernel_param;
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 71d1269fe4d4..7a6e49a66d20 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -89,7 +89,7 @@ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
{
if (timeout > INT_MAX)
timeout = INT_MAX;
- WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
+ nf_ct_write_timeout(ct, (u32)timeout);
}
int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index db1ea361f2da..47166576c195 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -657,7 +657,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
tstamp = nf_conn_tstamp_find(ct);
if (tstamp) {
- s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
+ s32 timeout = nf_ct_read_timeout(ct);
tstamp->stop = ktime_get_real_ns();
if (timeout < 0)
@@ -1063,7 +1063,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
}
/* We want the clashing entry to go away real soon: 1 second timeout. */
- WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ);
+ nf_ct_write_timeout(loser_ct, HZ);
/* IPS_NAT_CLASH removes the entry automatically on the first
* reply. Also prevents UDP tracker from moving the entry to
@@ -2079,11 +2079,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
goto acct;
/* If not in hash table, timer will not be active yet */
- if (nf_ct_is_confirmed(ct))
- extra_jiffies += nfct_time_stamp;
-
- if (READ_ONCE(ct->timeout) != extra_jiffies)
- WRITE_ONCE(ct->timeout, extra_jiffies);
+ if (nf_ct_read_timeout(ct) != extra_jiffies)
+ nf_ct_write_timeout(ct, extra_jiffies);
acct:
if (do_acct)
nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 4018acb1d674..a2797d026943 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -766,7 +766,7 @@ static void __cold nf_tcp_handle_invalid(struct nf_conn *ct,
"packet (index %d, dir %d) response for index %d lower timeout to %u",
index, dir, ct->proto.tcp.last_index, timeout);
- WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
+ nf_ct_write_timeout(ct, timeout);
}
} else {
ct->proto.tcp.last_index = index;
@@ -939,7 +939,7 @@ void nf_conntrack_tcp_set_closing(struct nf_conn *ct)
}
timeout = timeouts[TCP_CONNTRACK_CLOSE];
- WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
+ nf_ct_write_timeout(ct, timeout);
spin_unlock_bh(&ct->lock);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 04bd0ed4d2ae..113bd361e537 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -206,8 +206,8 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
if (timeout < 0)
timeout = 0;
- if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
- WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
+ if (nf_flow_timeout_delta(nf_ct_read_timeout(ct)) > (__s32)timeout)
+ nf_ct_write_timeout(ct, timeout);
}
static void flow_offload_route_release(struct flow_offload *flow)
--
2.40.0.577.gac1e443424-goog
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
2023-04-10 6:09 [PATCH] netfilter: conntrack: fix wrong ct->timeout value Tzung-Bi Shih
@ 2023-04-10 8:33 ` Pablo Neira Ayuso
2023-04-10 9:31 ` Pablo Neira Ayuso
0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2023-04-10 8:33 UTC (permalink / raw)
To: Tzung-Bi Shih
Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
garrick
Hi,
On Mon, Apr 10, 2023 at 02:09:35PM +0800, Tzung-Bi Shih wrote:
> (struct nf_conn)->timeout is an interval before the conntrack
> confirmed. After confirmed, it becomes a timestamp[1].
>
> It is observed that timeout of an unconfirmed conntrack have been
> altered by calling ctnetlink_change_timeout(). As a result,
> `nfct_time_stamp` was wrongly added to `ct->timeout` twice[2].
>
> Differentiate the 2 cases in all `ct->timeout` accesses.
You can just skip refreshing the timeout for unconfirmed conntrack
entries in ctnetlink_change_timeout().
> [1]: https://elixir.bootlin.com/linux/v6.3-rc5/source/net/netfilter/nf_conntrack_core.c#L1257
> [2]: https://elixir.bootlin.com/linux/v6.3-rc5/source/include/net/netfilter/nf_conntrack_core.h#L92
>
> Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
> ---
> include/net/netfilter/nf_conntrack.h | 27 +++++++++++++++++++----
> include/net/netfilter/nf_conntrack_core.h | 2 +-
> net/netfilter/nf_conntrack_core.c | 11 ++++-----
> net/netfilter/nf_conntrack_proto_tcp.c | 4 ++--
> net/netfilter/nf_flow_table_core.c | 4 ++--
> 5 files changed, 32 insertions(+), 16 deletions(-)
>
> diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
> index a72028dbef0c..48e020db3fb3 100644
> --- a/include/net/netfilter/nf_conntrack.h
> +++ b/include/net/netfilter/nf_conntrack.h
> @@ -290,17 +290,36 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
>
> #define nfct_time_stamp ((u32)(jiffies))
>
> +static inline s32 nf_ct_read_timeout(const struct nf_conn *ct)
> +{
> + s32 timeout;
> +
> + if (nf_ct_is_confirmed(ct))
> + timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
> + else
> + timeout = READ_ONCE(ct->timeout);
> +
> + return timeout;
> +}
> +
> +static inline void nf_ct_write_timeout(struct nf_conn *ct, s32 timeout)
> +{
> + if (nf_ct_is_confirmed(ct))
> + WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
> + else
> + WRITE_ONCE(ct->timeout, timeout);
> +}
> +
> /* jiffies until ct expires, 0 if already expired */
> static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
> {
> - s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
> -
> + s32 timeout = nf_ct_read_timeout(ct);
> return max(timeout, 0);
> }
>
> static inline bool nf_ct_is_expired(const struct nf_conn *ct)
> {
> - return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0;
> + return nf_ct_expires(ct) == 0;
> }
>
> /* use after obtaining a reference count */
> @@ -319,7 +338,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
> static inline void nf_ct_offload_timeout(struct nf_conn *ct)
> {
> if (nf_ct_expires(ct) < NF_CT_DAY / 2)
> - WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
> + nf_ct_write_timeout(ct, NF_CT_DAY);
> }
>
> struct kernel_param;
> diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
> index 71d1269fe4d4..7a6e49a66d20 100644
> --- a/include/net/netfilter/nf_conntrack_core.h
> +++ b/include/net/netfilter/nf_conntrack_core.h
> @@ -89,7 +89,7 @@ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
> {
> if (timeout > INT_MAX)
> timeout = INT_MAX;
> - WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
> + nf_ct_write_timeout(ct, (u32)timeout);
> }
>
> int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout);
> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
> index db1ea361f2da..47166576c195 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -657,7 +657,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
>
> tstamp = nf_conn_tstamp_find(ct);
> if (tstamp) {
> - s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
> + s32 timeout = nf_ct_read_timeout(ct);
>
> tstamp->stop = ktime_get_real_ns();
> if (timeout < 0)
> @@ -1063,7 +1063,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
> }
>
> /* We want the clashing entry to go away real soon: 1 second timeout. */
> - WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ);
> + nf_ct_write_timeout(loser_ct, HZ);
>
> /* IPS_NAT_CLASH removes the entry automatically on the first
> * reply. Also prevents UDP tracker from moving the entry to
> @@ -2079,11 +2079,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
> goto acct;
>
> /* If not in hash table, timer will not be active yet */
> - if (nf_ct_is_confirmed(ct))
> - extra_jiffies += nfct_time_stamp;
> -
> - if (READ_ONCE(ct->timeout) != extra_jiffies)
> - WRITE_ONCE(ct->timeout, extra_jiffies);
> + if (nf_ct_read_timeout(ct) != extra_jiffies)
> + nf_ct_write_timeout(ct, extra_jiffies);
> acct:
> if (do_acct)
> nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
> diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
> index 4018acb1d674..a2797d026943 100644
> --- a/net/netfilter/nf_conntrack_proto_tcp.c
> +++ b/net/netfilter/nf_conntrack_proto_tcp.c
> @@ -766,7 +766,7 @@ static void __cold nf_tcp_handle_invalid(struct nf_conn *ct,
> "packet (index %d, dir %d) response for index %d lower timeout to %u",
> index, dir, ct->proto.tcp.last_index, timeout);
>
> - WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
> + nf_ct_write_timeout(ct, timeout);
> }
> } else {
> ct->proto.tcp.last_index = index;
> @@ -939,7 +939,7 @@ void nf_conntrack_tcp_set_closing(struct nf_conn *ct)
> }
>
> timeout = timeouts[TCP_CONNTRACK_CLOSE];
> - WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
> + nf_ct_write_timeout(ct, timeout);
>
> spin_unlock_bh(&ct->lock);
>
> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
> index 04bd0ed4d2ae..113bd361e537 100644
> --- a/net/netfilter/nf_flow_table_core.c
> +++ b/net/netfilter/nf_flow_table_core.c
> @@ -206,8 +206,8 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
> if (timeout < 0)
> timeout = 0;
>
> - if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
> - WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
> + if (nf_flow_timeout_delta(nf_ct_read_timeout(ct)) > (__s32)timeout)
> + nf_ct_write_timeout(ct, timeout);
> }
>
> static void flow_offload_route_release(struct flow_offload *flow)
> --
> 2.40.0.577.gac1e443424-goog
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
2023-04-10 8:33 ` Pablo Neira Ayuso
@ 2023-04-10 9:31 ` Pablo Neira Ayuso
2023-04-10 9:59 ` Tzung-Bi Shih
0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2023-04-10 9:31 UTC (permalink / raw)
To: Tzung-Bi Shih
Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
garrick
[-- Attachment #1: Type: text/plain, Size: 690 bytes --]
On Mon, Apr 10, 2023 at 10:33:32AM +0200, Pablo Neira Ayuso wrote:
> Hi,
>
> On Mon, Apr 10, 2023 at 02:09:35PM +0800, Tzung-Bi Shih wrote:
> > (struct nf_conn)->timeout is an interval before the conntrack
> > confirmed. After confirmed, it becomes a timestamp[1].
> >
> > It is observed that timeout of an unconfirmed conntrack have been
> > altered by calling ctnetlink_change_timeout(). As a result,
> > `nfct_time_stamp` was wrongly added to `ct->timeout` twice[2].
> >
> > Differentiate the 2 cases in all `ct->timeout` accesses.
>
> You can just skip refreshing the timeout for unconfirmed conntrack
> entries in ctnetlink_change_timeout().
Something like this patch probably?
[-- Attachment #2: x.patch --]
[-- Type: text/x-diff, Size: 596 bytes --]
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index bfc3aaa2c872..6556f5f30844 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2466,7 +2466,8 @@ static int ctnetlink_new_conntrack(struct sk_buff *skb,
err = -EEXIST;
ct = nf_ct_tuplehash_to_ctrack(h);
- if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) {
+ if (!(info->nlh->nlmsg_flags & NLM_F_EXCL) &&
+ nf_ct_is_confirmed(ct)) {
err = ctnetlink_change_conntrack(ct, cda);
if (err == 0) {
nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
2023-04-10 9:31 ` Pablo Neira Ayuso
@ 2023-04-10 9:59 ` Tzung-Bi Shih
2023-04-12 22:56 ` Pablo Neira Ayuso
0 siblings, 1 reply; 12+ messages in thread
From: Tzung-Bi Shih @ 2023-04-10 9:59 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
garrick
On Mon, Apr 10, 2023 at 11:31:21AM +0200, Pablo Neira Ayuso wrote:
> On Mon, Apr 10, 2023 at 10:33:32AM +0200, Pablo Neira Ayuso wrote:
> > On Mon, Apr 10, 2023 at 02:09:35PM +0800, Tzung-Bi Shih wrote:
> > > (struct nf_conn)->timeout is an interval before the conntrack
> > > confirmed. After confirmed, it becomes a timestamp[1].
> > >
> > > It is observed that timeout of an unconfirmed conntrack have been
> > > altered by calling ctnetlink_change_timeout(). As a result,
> > > `nfct_time_stamp` was wrongly added to `ct->timeout` twice[2].
> > >
> > > Differentiate the 2 cases in all `ct->timeout` accesses.
> >
> > You can just skip refreshing the timeout for unconfirmed conntrack
> > entries in ctnetlink_change_timeout().
>
> Something like this patch probably?
Pardon me, I sent a v2[3] before seeing the message.
[3]: https://lore.kernel.org/netfilter-devel/20230410093454.853575-1-tzungbi@kernel.org/T/#u
> diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
> index bfc3aaa2c872..6556f5f30844 100644
> --- a/net/netfilter/nf_conntrack_netlink.c
> +++ b/net/netfilter/nf_conntrack_netlink.c
> @@ -2466,7 +2466,8 @@ static int ctnetlink_new_conntrack(struct sk_buff *skb,
>
> err = -EEXIST;
> ct = nf_ct_tuplehash_to_ctrack(h);
> - if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) {
> + if (!(info->nlh->nlmsg_flags & NLM_F_EXCL) &&
> + nf_ct_is_confirmed(ct)) {
> err = ctnetlink_change_conntrack(ct, cda);
> if (err == 0) {
> nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
The patch can't fix the issue we observed.
Here is the calling stack:
ctnetlink_glue_parse
[...]
__sys_sendto
__x64_sys_sendto
[...]
It was on another path:
ctnetlink_glue_parse_ct() -> ctnetlink_change_timeout().
I guess we should skip it in ctnetlink_change_timeout(). Something like v2.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
2023-04-10 9:59 ` Tzung-Bi Shih
@ 2023-04-12 22:56 ` Pablo Neira Ayuso
2023-04-13 3:23 ` Tzung-Bi Shih
0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2023-04-12 22:56 UTC (permalink / raw)
To: Tzung-Bi Shih
Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
garrick
On Mon, Apr 10, 2023 at 05:59:54PM +0800, Tzung-Bi Shih wrote:
> On Mon, Apr 10, 2023 at 11:31:21AM +0200, Pablo Neira Ayuso wrote:
> > On Mon, Apr 10, 2023 at 10:33:32AM +0200, Pablo Neira Ayuso wrote:
> > > On Mon, Apr 10, 2023 at 02:09:35PM +0800, Tzung-Bi Shih wrote:
> > > > (struct nf_conn)->timeout is an interval before the conntrack
> > > > confirmed. After confirmed, it becomes a timestamp[1].
> > > >
> > > > It is observed that timeout of an unconfirmed conntrack have been
> > > > altered by calling ctnetlink_change_timeout(). As a result,
> > > > `nfct_time_stamp` was wrongly added to `ct->timeout` twice[2].
> > > >
> > > > Differentiate the 2 cases in all `ct->timeout` accesses.
> > >
> > > You can just skip refreshing the timeout for unconfirmed conntrack
> > > entries in ctnetlink_change_timeout().
> >
> > Something like this patch probably?
>
> Pardon me, I sent a v2[3] before seeing the message.
>
> [3]: https://lore.kernel.org/netfilter-devel/20230410093454.853575-1-tzungbi@kernel.org/T/#u
>
> > diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
> > index bfc3aaa2c872..6556f5f30844 100644
> > --- a/net/netfilter/nf_conntrack_netlink.c
> > +++ b/net/netfilter/nf_conntrack_netlink.c
> > @@ -2466,7 +2466,8 @@ static int ctnetlink_new_conntrack(struct sk_buff *skb,
> >
> > err = -EEXIST;
> > ct = nf_ct_tuplehash_to_ctrack(h);
> > - if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) {
> > + if (!(info->nlh->nlmsg_flags & NLM_F_EXCL) &&
> > + nf_ct_is_confirmed(ct)) {
> > err = ctnetlink_change_conntrack(ct, cda);
> > if (err == 0) {
> > nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
>
> The patch can't fix the issue we observed.
>
> Here is the calling stack:
> ctnetlink_glue_parse
> [...]
> __sys_sendto
> __x64_sys_sendto
> [...]
I see. So this is from nfqueue path, now I understand better, thanks.
Maybe just do this special handling:
+ if (nf_ct_is_confirmed(ct))
+ WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
+ else
+ WRITE_ONCE(ct->timeout, timeout);
for ctnetlink_change_timeout().
Just replace __nf_ct_set_timeout(), by this code above in
nf_conntrack_netlink.c? I think the __nf_ct_set_timeout() helper is
not very useful.
Better not to cripple features, even if this was broken :-).
Thanks.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
2023-04-12 22:56 ` Pablo Neira Ayuso
@ 2023-04-13 3:23 ` Tzung-Bi Shih
2023-04-13 9:04 ` Pablo Neira Ayuso
0 siblings, 1 reply; 12+ messages in thread
From: Tzung-Bi Shih @ 2023-04-13 3:23 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
garrick
On Thu, Apr 13, 2023 at 12:56:01AM +0200, Pablo Neira Ayuso wrote:
> Maybe just do this special handling:
>
> + if (nf_ct_is_confirmed(ct))
> + WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
> + else
> + WRITE_ONCE(ct->timeout, timeout);
>
> for ctnetlink_change_timeout().
>
> Just replace __nf_ct_set_timeout(), by this code above in
> nf_conntrack_netlink.c? I think the __nf_ct_set_timeout() helper is
> not very useful.
I don't quite understand the message above.
Calling path in v6.3-rc6:
ctnetlink_change_timeout() in net/netfilter/nf_conntrack_netlink.c
-> __nf_ct_change_timeout() in net/netfilter/nf_conntrack_core.c
-> __nf_ct_set_timeout() in include/net/netfilter/nf_conntrack_core.h
To clarify, which one did you mean:
Option 1: replace the __nf_ct_change_timeout() invocation to the special
handling in net/netfilter/nf_conntrack_netlink.c
Option 2: replace the __nf_ct_set_timeout() invocation to the special
handling in net/netfilter/nf_conntrack_core.c
Option 3: put the special handling in __nf_ct_set_timeout() in
include/net/netfilter/nf_conntrack_core.h
In either case, the fix would be a subset of v1.
I'm not sure other use cases. In our environment, we observed an
inconsistent state by a partial fix of v1. nf_ct_expires() got called
by userspace program. And the return value (which means the remaining
timeout) will be the parameter for the next ctnetlink_change_timeout().
As you can see in [4], if this happens on an unconfirmed conntrack, the
`nfct_time_stamp` would be wrongly invoved in the calculation again.
That's why we take care of all `ct->timeout` accesses in v1.
[4]: https://elixir.bootlin.com/linux/v6.3-rc6/source/include/net/netfilter/nf_conntrack.h#L296
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
2023-04-13 3:23 ` Tzung-Bi Shih
@ 2023-04-13 9:04 ` Pablo Neira Ayuso
2023-04-14 3:52 ` Tzung-Bi Shih
0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2023-04-13 9:04 UTC (permalink / raw)
To: Tzung-Bi Shih
Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
garrick
On Thu, Apr 13, 2023 at 11:23:11AM +0800, Tzung-Bi Shih wrote:
> On Thu, Apr 13, 2023 at 12:56:01AM +0200, Pablo Neira Ayuso wrote:
> > Maybe just do this special handling:
> >
> > + if (nf_ct_is_confirmed(ct))
> > + WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
> > + else
> > + WRITE_ONCE(ct->timeout, timeout);
> >
> > for ctnetlink_change_timeout().
> >
> > Just replace __nf_ct_set_timeout(), by this code above in
> > nf_conntrack_netlink.c? I think the __nf_ct_set_timeout() helper is
> > not very useful.
>
> I don't quite understand the message above.
>
> Calling path in v6.3-rc6:
> ctnetlink_change_timeout() in net/netfilter/nf_conntrack_netlink.c
> -> __nf_ct_change_timeout() in net/netfilter/nf_conntrack_core.c
> -> __nf_ct_set_timeout() in include/net/netfilter/nf_conntrack_core.h
>
> To clarify, which one did you mean:
>
> Option 1: replace the __nf_ct_change_timeout() invocation to the special
> handling in net/netfilter/nf_conntrack_netlink.c
> Option 2: replace the __nf_ct_set_timeout() invocation to the special
> handling in net/netfilter/nf_conntrack_core.c
> Option 3: put the special handling in __nf_ct_set_timeout() in
> include/net/netfilter/nf_conntrack_core.h
>
> In either case, the fix would be a subset of v1.
Yes, I think this is Option 3:
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 71d1269fe4d4..9c2cd69bbdc6 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -89,7 +89,11 @@ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
{
if (timeout > INT_MAX)
timeout = INT_MAX;
- WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
+
+ if (nf_ct_is_confirmed(ct))
+ WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout;
+ else
+ ct->timeout = (u32)timeout;
}
int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout);
Note:
WRITE_ONCE(ct->timeout, (u32)timeout);
is not required, because unconfirmed conntrack object is owned by the
packet (not yet in the hashes).
BTW, not related to this patch, but I would like to understand why
this __nf_ct_set_timeout() function is inline, but that is a different
issue.
> I'm not sure other use cases. In our environment, we observed an
> inconsistent state by a partial fix of v1.
Thanks for explaining, extending patch description would be good.
> nf_ct_expires() got called by userspace program. And the return
> value (which means the remaining timeout) will be the parameter for
> the next ctnetlink_change_timeout().
Unconfirmed conntrack is owned by the packet that refers to it, it is
not yet in the hashes. I don't see how concurrent access to the
timeout might occur.
Or are you referring to a different scenario that triggers the partial
state?
> As you can see in [4], if this happens on an unconfirmed conntrack, the
> `nfct_time_stamp` would be wrongly invoved in the calculation again.
> That's why we take care of all `ct->timeout` accesses in v1.
If you are observing a partial state, that is a different issue and I
think it deserves a separated patch with a description? Probably
including KCSAN splat if this is what you used to catch the partial
state.
Thanks!
> [4]: https://elixir.bootlin.com/linux/v6.3-rc6/source/include/net/netfilter/nf_conntrack.h#L296
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
2023-04-13 9:04 ` Pablo Neira Ayuso
@ 2023-04-14 3:52 ` Tzung-Bi Shih
2023-04-14 8:12 ` Pablo Neira Ayuso
0 siblings, 1 reply; 12+ messages in thread
From: Tzung-Bi Shih @ 2023-04-14 3:52 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
garrick
On Thu, Apr 13, 2023 at 11:04:24AM +0200, Pablo Neira Ayuso wrote:
> On Thu, Apr 13, 2023 at 11:23:11AM +0800, Tzung-Bi Shih wrote:
> > nf_ct_expires() got called by userspace program. And the return
> > value (which means the remaining timeout) will be the parameter for
> > the next ctnetlink_change_timeout().
>
> Unconfirmed conntrack is owned by the packet that refers to it, it is
> not yet in the hashes. I don't see how concurrent access to the
> timeout might occur.
>
> Or are you referring to a different scenario that triggers the partial
> state?
I think it isn't a concurrent access. We observed that userspace program
reads the remaining timeout and sets it back for unconfirmed conntrack.
Conceptually, it looks like:
timeout = nf_ct_expires(...); (1)
ctnetlink_change_timeout(...timeout);
At (1), `nfct_time_stamp` is wrongly involved in the calculation[4] because
the conntrack is unconfirmed. The `ct->timeout` is an internal but not a
timestamp.
As a result, ctnetlink_change_timeout() sets the wrong value to `ct->timeout`.
[4]: https://elixir.bootlin.com/linux/v6.3-rc6/source/include/net/netfilter/nf_conntrack.h#L296
> > As you can see in [4], if this happens on an unconfirmed conntrack, the
> > `nfct_time_stamp` would be wrongly invoved in the calculation again.
> > That's why we take care of all `ct->timeout` accesses in v1.
Again, that's why v1 separates all `ct->timeout` accesses.
If the conntrack is confirmed:
- `ct->timeout` is a timestamp.
- `nfct_time_stamp` should be involved when getting/setting `ct->timeout`.
If the conntrack is unconfirmed:
- `ct->timeout` is an interval.
- `nfct_time_stamp` shouldn't be involved.
Only separate `ct->timeout` access in __nf_ct_set_timeout() is obviously
insufficient. I would suggest either take care of all `ct->timeout`
accesses as v1 or at least change both __nf_ct_set_timeout() and
nf_ct_expires().
In the latter case, it looks fine in our environment. However, I'm not
sure if any hidden cases we haven't seen.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
2023-04-14 3:52 ` Tzung-Bi Shih
@ 2023-04-14 8:12 ` Pablo Neira Ayuso
2023-04-17 3:41 ` Tzung-Bi Shih
0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2023-04-14 8:12 UTC (permalink / raw)
To: Tzung-Bi Shih
Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
garrick
On Fri, Apr 14, 2023 at 11:52:02AM +0800, Tzung-Bi Shih wrote:
> On Thu, Apr 13, 2023 at 11:04:24AM +0200, Pablo Neira Ayuso wrote:
> > On Thu, Apr 13, 2023 at 11:23:11AM +0800, Tzung-Bi Shih wrote:
> > > nf_ct_expires() got called by userspace program. And the return
> > > value (which means the remaining timeout) will be the parameter for
> > > the next ctnetlink_change_timeout().
> >
> > Unconfirmed conntrack is owned by the packet that refers to it, it is
> > not yet in the hashes. I don't see how concurrent access to the
> > timeout might occur.
> >
> > Or are you referring to a different scenario that triggers the partial
> > state?
>
> I think it isn't a concurrent access. We observed that userspace program
> reads the remaining timeout and sets it back for unconfirmed conntrack.
>
> Conceptually, it looks like:
> timeout = nf_ct_expires(...); (1)
> ctnetlink_change_timeout(...timeout);
How could this possibly happen?
nf_ct_expires() is called from:
- ctnetlink_dump_timeout(), this is netlink dump path, since:
commit 1397af5bfd7d32b0cf2adb70a78c9a9e8f11d912
Author: Florian Westphal <fw@strlen.de>
Date: Mon Apr 11 13:01:18 2022 +0200
netfilter: conntrack: remove the percpu dying list
it is not possible to do any listing of unconfirmed, and that is fine.
As I said, nfnetlink_queue operates with an unconfirmed conntrack with
owns exclusively, which is not in the hashes.
- nf_ct_expires() calls from xt and nft matches are irrelevant:
net/netfilter/nft_ct.c: *dest = jiffies_to_msecs(nf_ct_expires(ct));
net/netfilter/xt_conntrack.c: unsigned long expires = nf_ct_expires(ct) / HZ;
- there is the garbage collector, but that only works with conntrack
entries in the hashes:
net/netfilter/nf_conntrack_core.c: expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
- there is also /proc interface, but that only works with conntrack
entries in the hashes:
net/netfilter/nf_conntrack_standalone.c: seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ);
> At (1), `nfct_time_stamp` is wrongly involved in the calculation[4] because
> the conntrack is unconfirmed. The `ct->timeout` is an internal but not a
> timestamp.
I can see there are two possible states for ct->timeout, thanks for
explaining this again.
> As a result, ctnetlink_change_timeout() sets the wrong value to `ct->timeout`.
>
> [4]: https://elixir.bootlin.com/linux/v6.3-rc6/source/include/net/netfilter/nf_conntrack.h#L296
>
> > > As you can see in [4], if this happens on an unconfirmed conntrack, the
> > > `nfct_time_stamp` would be wrongly invoved in the calculation again.
> > > That's why we take care of all `ct->timeout` accesses in v1.
>
> Again, that's why v1 separates all `ct->timeout` accesses.
>
> If the conntrack is confirmed:
> - `ct->timeout` is a timestamp.
> - `nfct_time_stamp` should be involved when getting/setting `ct->timeout`.
>
> If the conntrack is unconfirmed:
> - `ct->timeout` is an interval.
> - `nfct_time_stamp` shouldn't be involved.
>
> Only separate `ct->timeout` access in __nf_ct_set_timeout() is obviously
> insufficient. I would suggest either take care of all `ct->timeout`
> accesses as v1 or at least change both __nf_ct_set_timeout() and
> nf_ct_expires().
it does not even make sense to use WRITE_ONCE from
__nf_conntrack_confirm(), see:
https://elixir.bootlin.com/linux/latest/source/net/netfilter/nf_conntrack_core.c#L1260
because the unconfirmed conntrack object is owned exclusively by the packet.
> In the latter case, it looks fine in our environment. However, I'm not
> sure if any hidden cases we haven't seen.
Maybe you have an old kernel?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
2023-04-14 8:12 ` Pablo Neira Ayuso
@ 2023-04-17 3:41 ` Tzung-Bi Shih
2023-04-18 8:17 ` Pablo Neira Ayuso
0 siblings, 1 reply; 12+ messages in thread
From: Tzung-Bi Shih @ 2023-04-17 3:41 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
garrick
On Fri, Apr 14, 2023 at 10:12:14AM +0200, Pablo Neira Ayuso wrote:
> On Fri, Apr 14, 2023 at 11:52:02AM +0800, Tzung-Bi Shih wrote:
> > On Thu, Apr 13, 2023 at 11:04:24AM +0200, Pablo Neira Ayuso wrote:
> > > On Thu, Apr 13, 2023 at 11:23:11AM +0800, Tzung-Bi Shih wrote:
> > > > nf_ct_expires() got called by userspace program. And the return
> > > > value (which means the remaining timeout) will be the parameter for
> > > > the next ctnetlink_change_timeout().
> > >
> > > Unconfirmed conntrack is owned by the packet that refers to it, it is
> > > not yet in the hashes. I don't see how concurrent access to the
> > > timeout might occur.
> > >
> > > Or are you referring to a different scenario that triggers the partial
> > > state?
> >
> > I think it isn't a concurrent access. We observed that userspace program
> > reads the remaining timeout and sets it back for unconfirmed conntrack.
> >
> > Conceptually, it looks like:
> > timeout = nf_ct_expires(...); (1)
> > ctnetlink_change_timeout(...timeout);
>
> How could this possibly happen?
>
> nf_ct_expires() is called from:
>
> - ctnetlink_dump_timeout(), this is netlink dump path, since:
>
> commit 1397af5bfd7d32b0cf2adb70a78c9a9e8f11d912
> Author: Florian Westphal <fw@strlen.de>
> Date: Mon Apr 11 13:01:18 2022 +0200
>
> netfilter: conntrack: remove the percpu dying list
>
> it is not possible to do any listing of unconfirmed, and that is fine.
>
> As I said, nfnetlink_queue operates with an unconfirmed conntrack with
> owns exclusively, which is not in the hashes.
I applied the following patches on top of v6.3-rc5[5].
As you suggested:
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 71d1269fe4d4..3384859a8921 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -89,7 +89,11 @@ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
{
if (timeout > INT_MAX)
timeout = INT_MAX;
- WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
+
+ if (nf_ct_is_confirmed(ct))
+ WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
+ else
+ ct->timeout = (u32)timeout;
}
And this patch for dumping the debug information:
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index bfc3aaa2c872..b4c016ff07e9 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -178,6 +178,11 @@ static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct,
{
long timeout = nf_ct_expires(ct) / HZ;
+ if (!nf_ct_is_confirmed(ct)) {
+ dump_stack();
+ printk(KERN_ERR "===ct->timeout=%u===timeout=%ld\n", ct->timeout, timeout);
+ }
+
if (skip_zero && timeout == 0)
return 0;
And here is the trimmed dmesg:
===ct->timeout=30000===timeout=0
[...] 6.3.0-rc5-00675-ged0c7cbf5b2c [...]
Call Trace:
<TASK>
dump_stack_lvl
ctnetlink_dump_timeout
__ctnetlink_glue_build
ctnetlink_glue_build
__nfqnl_enqueue_packet
nf_queue
nf_hook_slow
ip_mc_output
? __pfx_ip_finish_output
ip_send_skb
? __pfx_dst_output
udp_send_skb
udp_sendmsg
? __pfx_ip_generic_getfrag
sock_sendmsg
As the dmesg showed, the unconfirmed conntrack did call into
ctnetlink_dump_timeout(). As a result, the value returned from
nf_ct_expires() is always 0 in the case (because the `ct->timeout` got
subtracted by current timestamp `nfct_time_stamp`[4]).
[4]: https://elixir.bootlin.com/linux/v6.3-rc6/source/include/net/netfilter/nf_conntrack.h#L296
[5]: https://chromium.googlesource.com/chromiumos/third_party/kernel/+/refs/heads/merge/continuous/chromeos-kernelupstream-6.3-rc5
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
2023-04-17 3:41 ` Tzung-Bi Shih
@ 2023-04-18 8:17 ` Pablo Neira Ayuso
2023-04-19 5:20 ` Tzung-Bi Shih
0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2023-04-18 8:17 UTC (permalink / raw)
To: Tzung-Bi Shih
Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
garrick
Hi,
On Mon, Apr 17, 2023 at 11:41:41AM +0800, Tzung-Bi Shih wrote:
> On Fri, Apr 14, 2023 at 10:12:14AM +0200, Pablo Neira Ayuso wrote:
> > On Fri, Apr 14, 2023 at 11:52:02AM +0800, Tzung-Bi Shih wrote:
> > > On Thu, Apr 13, 2023 at 11:04:24AM +0200, Pablo Neira Ayuso wrote:
> > > > On Thu, Apr 13, 2023 at 11:23:11AM +0800, Tzung-Bi Shih wrote:
> > > > > nf_ct_expires() got called by userspace program. And the return
> > > > > value (which means the remaining timeout) will be the parameter for
> > > > > the next ctnetlink_change_timeout().
> > > >
> > > > Unconfirmed conntrack is owned by the packet that refers to it, it is
> > > > not yet in the hashes. I don't see how concurrent access to the
> > > > timeout might occur.
> > > >
> > > > Or are you referring to a different scenario that triggers the partial
> > > > state?
> > >
> > > I think it isn't a concurrent access. We observed that userspace program
> > > reads the remaining timeout and sets it back for unconfirmed conntrack.
> > >
> > > Conceptually, it looks like:
> > > timeout = nf_ct_expires(...); (1)
> > > ctnetlink_change_timeout(...timeout);
> >
> > How could this possibly happen?
> >
> > nf_ct_expires() is called from:
> >
> > - ctnetlink_dump_timeout(), this is netlink dump path, since:
> >
> > commit 1397af5bfd7d32b0cf2adb70a78c9a9e8f11d912
> > Author: Florian Westphal <fw@strlen.de>
> > Date: Mon Apr 11 13:01:18 2022 +0200
> >
> > netfilter: conntrack: remove the percpu dying list
> >
> > it is not possible to do any listing of unconfirmed, and that is fine.
> >
> > As I said, nfnetlink_queue operates with an unconfirmed conntrack with
> > owns exclusively, which is not in the hashes.
>
> I applied the following patches on top of v6.3-rc5[5].
>
> As you suggested:
>
> diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
> index 71d1269fe4d4..3384859a8921 100644
> --- a/include/net/netfilter/nf_conntrack_core.h
> +++ b/include/net/netfilter/nf_conntrack_core.h
> @@ -89,7 +89,11 @@ static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout)
> {
> if (timeout > INT_MAX)
> timeout = INT_MAX;
> - WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
> +
> + if (nf_ct_is_confirmed(ct))
> + WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
> + else
> + ct->timeout = (u32)timeout;
> }
>
> And this patch for dumping the debug information:
>
> diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
> index bfc3aaa2c872..b4c016ff07e9 100644
> --- a/net/netfilter/nf_conntrack_netlink.c
> +++ b/net/netfilter/nf_conntrack_netlink.c
> @@ -178,6 +178,11 @@ static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct,
> {
> long timeout = nf_ct_expires(ct) / HZ;
>
> + if (!nf_ct_is_confirmed(ct)) {
> + dump_stack();
> + printk(KERN_ERR "===ct->timeout=%u===timeout=%ld\n", ct->timeout, timeout);
> + }
> +
> if (skip_zero && timeout == 0)
> return 0;
>
> And here is the trimmed dmesg:
>
> ===ct->timeout=30000===timeout=0
> [...] 6.3.0-rc5-00675-ged0c7cbf5b2c [...]
> Call Trace:
> <TASK>
> dump_stack_lvl
> ctnetlink_dump_timeout
> __ctnetlink_glue_build
> ctnetlink_glue_build
> __nfqnl_enqueue_packet
> nf_queue
> nf_hook_slow
> ip_mc_output
> ? __pfx_ip_finish_output
> ip_send_skb
> ? __pfx_dst_output
> udp_send_skb
> udp_sendmsg
> ? __pfx_ip_generic_getfrag
> sock_sendmsg
>
> As the dmesg showed, the unconfirmed conntrack did call into
> ctnetlink_dump_timeout(). As a result, the value returned from
> nf_ct_expires() is always 0 in the case (because the `ct->timeout` got
> subtracted by current timestamp `nfct_time_stamp`[4]).
Oh, interesting.
Thanks for digging deeper into this issue.
Then, on top of what I suggest, my recommendation is to skip the
ctnetlink_dump_timeout() call for !nf_ct_confirmed(ct).
Or, you add some special handling for ctnetlink_dump_timeout() for the
!nf_ct_confirmed(ct) case.
Let me know, thanks.
> [4]: https://elixir.bootlin.com/linux/v6.3-rc6/source/include/net/netfilter/nf_conntrack.h#L296
> [5]: https://chromium.googlesource.com/chromiumos/third_party/kernel/+/refs/heads/merge/continuous/chromeos-kernelupstream-6.3-rc5
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] netfilter: conntrack: fix wrong ct->timeout value
2023-04-18 8:17 ` Pablo Neira Ayuso
@ 2023-04-19 5:20 ` Tzung-Bi Shih
0 siblings, 0 replies; 12+ messages in thread
From: Tzung-Bi Shih @ 2023-04-19 5:20 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: kadlec, fw, netfilter-devel, coreteam, jiejiang, jasongustaman,
garrick
On Tue, Apr 18, 2023 at 10:17:32AM +0200, Pablo Neira Ayuso wrote:
> Then, on top of what I suggest, my recommendation is to skip the
> ctnetlink_dump_timeout() call for !nf_ct_confirmed(ct).
>
> Or, you add some special handling for ctnetlink_dump_timeout() for the
> !nf_ct_confirmed(ct) case.
Sent a v3[6] for this and hope it makes more sense.
[6]: https://patchwork.ozlabs.org/project/netfilter-devel/patch/20230419051526.3170053-1-tzungbi@kernel.org/
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2023-04-19 5:20 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-04-10 6:09 [PATCH] netfilter: conntrack: fix wrong ct->timeout value Tzung-Bi Shih
2023-04-10 8:33 ` Pablo Neira Ayuso
2023-04-10 9:31 ` Pablo Neira Ayuso
2023-04-10 9:59 ` Tzung-Bi Shih
2023-04-12 22:56 ` Pablo Neira Ayuso
2023-04-13 3:23 ` Tzung-Bi Shih
2023-04-13 9:04 ` Pablo Neira Ayuso
2023-04-14 3:52 ` Tzung-Bi Shih
2023-04-14 8:12 ` Pablo Neira Ayuso
2023-04-17 3:41 ` Tzung-Bi Shih
2023-04-18 8:17 ` Pablo Neira Ayuso
2023-04-19 5:20 ` Tzung-Bi Shih
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.