From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
patches@lists.linux.dev, Florian Westphal <fw@strlen.de>,
Sasha Levin <sashal@kernel.org>
Subject: [PATCH 5.15 53/75] netfilter: nf_tables: nft_set_rbtree: fix spurious insertion failure
Date: Mon, 9 Oct 2023 15:02:15 +0200 [thread overview]
Message-ID: <20231009130113.096507619@linuxfoundation.org> (raw)
In-Reply-To: <20231009130111.200710898@linuxfoundation.org>
5.15-stable review patch. If anyone has any objections, please let me know.
------------------
From: Florian Westphal <fw@strlen.de>
[ Upstream commit 087388278e0f301f4c61ddffb1911d3a180f84b8 ]
nft_rbtree_gc_elem() walks back and removes the end interval element that
comes before the expired element.
There is a small chance that we've cached this element as 'rbe_ge'.
If this happens, we hold and test a pointer that has been queued for
freeing.
It also causes spurious insertion failures:
$ cat test-testcases-sets-0044interval_overlap_0.1/testout.log
Error: Could not process rule: File exists
add element t s { 0 - 2 }
^^^^^^
Failed to insert 0 - 2 given:
table ip t {
set s {
type inet_service
flags interval,timeout
timeout 2s
gc-interval 2s
}
}
The set (rbtree) is empty. The 'failure' doesn't happen on next attempt.
Reason is that when we try to insert, the tree may hold an expired
element that collides with the range we're adding.
While we do evict/erase this element, we can trip over this check:
if (rbe_ge && nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new))
return -ENOTEMPTY;
rbe_ge was erased by the synchronous gc, we should not have done this
check. Next attempt won't find it, so retry results in successful
insertion.
Restart in-kernel to avoid such spurious errors.
Such restart are rare, unless userspace intentionally adds very large
numbers of elements with very short timeouts while setting a huge
gc interval.
Even in this case, this cannot loop forever, on each retry an existing
element has been removed.
As the caller is holding the transaction mutex, its impossible
for a second entity to add more expiring elements to the tree.
After this it also becomes feasible to remove the async gc worker
and perform all garbage collection from the commit path.
Fixes: c9e6978e2725 ("netfilter: nft_set_rbtree: Switch to node list walk for overlap detection")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
net/netfilter/nft_set_rbtree.c | 46 +++++++++++++++++++++-------------
1 file changed, 29 insertions(+), 17 deletions(-)
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 487572dcd6144..2660ceab3759d 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -233,10 +233,9 @@ static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
rb_erase(&rbe->node, &priv->root);
}
-static int nft_rbtree_gc_elem(const struct nft_set *__set,
- struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe,
- u8 genmask)
+static const struct nft_rbtree_elem *
+nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe, u8 genmask)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
@@ -246,7 +245,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
if (!gc)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
/* search for end interval coming before this element.
* end intervals don't carry a timeout extension, they
@@ -261,6 +260,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
prev = rb_prev(prev);
}
+ rbe_prev = NULL;
if (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
nft_rbtree_gc_remove(net, set, priv, rbe_prev);
@@ -272,7 +272,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
*/
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (WARN_ON_ONCE(!gc))
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
nft_trans_gc_elem_add(gc, rbe_prev);
}
@@ -280,13 +280,13 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
nft_rbtree_gc_remove(net, set, priv, rbe);
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (WARN_ON_ONCE(!gc))
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
nft_trans_gc_elem_add(gc, rbe);
nft_trans_gc_queue_sync_done(gc);
- return 0;
+ return rbe_prev;
}
static bool nft_rbtree_update_first(const struct nft_set *set,
@@ -314,7 +314,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
- int d, err;
+ int d;
/* Descend the tree to search for an existing element greater than the
* key value to insert that is greater than the new element. This is the
@@ -363,9 +363,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*/
if (nft_set_elem_expired(&rbe->ext) &&
nft_set_elem_active(&rbe->ext, cur_genmask)) {
- err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
- if (err < 0)
- return err;
+ const struct nft_rbtree_elem *removed_end;
+
+ removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+ if (IS_ERR(removed_end))
+ return PTR_ERR(removed_end);
+
+ if (removed_end == rbe_le || removed_end == rbe_ge)
+ return -EAGAIN;
continue;
}
@@ -486,11 +491,18 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *rbe = elem->priv;
int err;
- write_lock_bh(&priv->lock);
- write_seqcount_begin(&priv->count);
- err = __nft_rbtree_insert(net, set, rbe, ext);
- write_seqcount_end(&priv->count);
- write_unlock_bh(&priv->lock);
+ do {
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
+ cond_resched();
+
+ write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
+ err = __nft_rbtree_insert(net, set, rbe, ext);
+ write_seqcount_end(&priv->count);
+ write_unlock_bh(&priv->lock);
+ } while (err == -EAGAIN);
return err;
}
--
2.40.1
next prev parent reply other threads:[~2023-10-09 13:26 UTC|newest]
Thread overview: 84+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-10-09 13:01 [PATCH 5.15 00/75] 5.15.135-rc1 review Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 01/75] spi: zynqmp-gqspi: Convert to platform remove callback returning void Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 02/75] spi: zynqmp-gqspi: fix clock imbalance on probe failure Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 03/75] ASoC: soc-utils: Export snd_soc_dai_is_dummy() symbol Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 04/75] ASoC: tegra: Fix redundant PLLA and PLLA_OUT0 updates Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 05/75] NFS: Cleanup unused rpc_clnt variable Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 06/75] NFS: rename nfs_client_kset to nfs_kset Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 07/75] NFSv4: Fix a state manager thread deadlock regression Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 08/75] ring-buffer: remove obsolete comment for free_buffer_page() Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 09/75] ring-buffer: Fix bytes info in per_cpu buffer stats Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 10/75] arm64: Avoid repeated AA64MMFR1_EL1 register read on pagefault path Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 11/75] iommu/arm-smmu-v3: Set TTL invalidation hint better Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 12/75] iommu/arm-smmu-v3: Avoid constructing invalid range commands Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 13/75] rbd: move rbd_dev_refresh() definition Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 14/75] rbd: decouple header read-in from updating rbd_dev->header Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 15/75] rbd: decouple parent info read-in from updating rbd_dev Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 16/75] rbd: take header_rwsem in rbd_dev_refresh() only when updating Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 17/75] block: fix use-after-free of q->q_usage_counter Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 18/75] Revert "clk: imx: pll14xx: dynamically configure PLL for 393216000/361267200Hz" Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 19/75] scsi: zfcp: Fix a double put in zfcp_port_enqueue() Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 20/75] vringh: dont use vringh_kiov_advance() in vringh_iov_xfer() Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 21/75] qed/red_ll2: Fix undefined behavior bug in struct qed_ll2_info Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 22/75] wifi: mwifiex: Fix tlv_buf_left calculation Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 23/75] net: replace calls to sock->ops->connect() with kernel_connect() Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 24/75] btrfs: reject unknown mount options early Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 25/75] net: prevent rewrite of msg_name in sock_sendmsg() Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 26/75] drm/amd: Fix detection of _PR3 on the PCIe root port Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 27/75] arm64: Add Cortex-A520 CPU part definition Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 28/75] HID: sony: Fix a potential memory leak in sony_probe() Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 29/75] ubi: Refuse attaching if mtds erasesize is 0 Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 30/75] wifi: iwlwifi: dbg_ini: fix structure packing Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 31/75] iwlwifi: avoid void pointer arithmetic Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 32/75] wifi: iwlwifi: mvm: Fix a memory corruption issue Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 33/75] wifi: mwifiex: Fix oob check condition in mwifiex_process_rx_packet Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 34/75] bpf: Fix tr dereferencing Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 35/75] drivers/net: process the result of hdlc_open() and add call of hdlc_close() in uhdlc_close() Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 36/75] wifi: mt76: mt76x02: fix MT76x0 external LNA gain handling Greg Kroah-Hartman
2023-10-09 13:01 ` [PATCH 5.15 37/75] regmap: rbtree: Fix wrong register marked as in-cache when creating new node Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 38/75] ima: Finish deprecation of IMA_TRUSTED_KEYRING Kconfig Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 39/75] scsi: target: core: Fix deadlock due to recursive locking Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 40/75] ima: rework CONFIG_IMA dependency block Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 41/75] NFSv4: Fix a nfs4_state_manager() race Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 42/75] bpf, sockmap: Reject sk_msg egress redirects to non-TCP sockets Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 43/75] modpost: add missing else to the "of" check Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 44/75] net: fix possible store tearing in neigh_periodic_work() Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 45/75] ipv4, ipv6: Fix handling of transhdrlen in __ip{,6}_append_data() Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 46/75] ptp: ocp: Fix error handling in ptp_ocp_device_init Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 47/75] net: dsa: mv88e6xxx: Avoid EEPROM timeout when EEPROM is absent Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 48/75] net: usb: smsc75xx: Fix uninit-value access in __smsc75xx_read_reg Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 49/75] net: nfc: llcp: Add lock when modifying device list Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 50/75] net: ethernet: ti: am65-cpsw: Fix error code in am65_cpsw_nuss_init_tx_chns() Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 51/75] ibmveth: Remove condition to recompute TCP header checksum Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 52/75] netfilter: handle the connecting collision properly in nf_conntrack_proto_sctp Greg Kroah-Hartman
2023-10-09 13:02 ` Greg Kroah-Hartman [this message]
2023-10-09 13:02 ` [PATCH 5.15 54/75] ipv4: Set offload_failed flag in fibmatch results Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 55/75] net: stmmac: dwmac-stm32: fix resume on STM32 MCU Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 56/75] tipc: fix a potential deadlock on &tx->lock Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 57/75] tcp: fix quick-ack counting to count actual ACKs of new data Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 58/75] tcp: fix delayed ACKs for MSS boundary condition Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 59/75] sctp: update transport state when processing a dupcook packet Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 60/75] sctp: update hb timer immediately after users change hb_interval Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 61/75] HID: sony: remove duplicate NULL check before calling usb_free_urb() Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 62/75] HID: intel-ish-hid: ipc: Disable and reenable ACPI GPE bit Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 63/75] dm zoned: free dmz->ddev array in dmz_put_zoned_devices Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 64/75] RDMA/core: Require admin capabilities to set system parameters Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 65/75] of: dynamic: Fix potential memory leak in of_changeset_action() Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 66/75] IB/mlx4: Fix the size of a buffer in add_port_entries() Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 67/75] gpio: aspeed: fix the GPIO number passed to pinctrl_gpio_set_config() Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 68/75] gpio: pxa: disable pinctrl calls for MMP_GPIO Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 69/75] RDMA/cma: Initialize ib_sa_multicast structure to 0 when join Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 70/75] RDMA/cma: Fix truncation compilation warning in make_cma_ports Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 71/75] RDMA/uverbs: Fix typo of sizeof argument Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 72/75] RDMA/siw: Fix connection failure handling Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 73/75] RDMA/mlx5: Fix NULL string error Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 74/75] ksmbd: fix uaf in smb20_oplock_break_ack Greg Kroah-Hartman
2023-10-09 13:02 ` [PATCH 5.15 75/75] parisc: Restore __ldcw_align for PA-RISC 2.0 processors Greg Kroah-Hartman
2023-10-09 18:43 ` [PATCH 5.15 00/75] 5.15.135-rc1 review SeongJae Park
2023-10-09 22:48 ` Florian Fainelli
2023-10-09 22:54 ` Shuah Khan
2023-10-10 9:58 ` Jon Hunter
2023-10-10 14:48 ` Harshit Mogalapalli
2023-10-10 15:09 ` Allen Pais
2023-10-10 17:27 ` Naresh Kamboju
2023-10-10 18:19 ` Guenter Roeck
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231009130113.096507619@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=fw@strlen.de \
--cc=patches@lists.linux.dev \
--cc=sashal@kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox