From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Subject: [PATCH nf-next 3/4] netfilter: nft_set_rbtree: validate element belonging to interval
Date: Wed, 28 Jan 2026 02:42:50 +0100 [thread overview]
Message-ID: <20260128014251.754512-4-pablo@netfilter.org> (raw)
In-Reply-To: <20260128014251.754512-1-pablo@netfilter.org>
The existing partial overlap detection does not check if the elements
belong to the interval, eg.
add element inet x y { 1.1.1.1-2.2.2.2, 4.4.4.4-5.5.5.5 }
add element inet x y { 1.1.1.1-5.5.5.5 } => this should fail: ENOENT
Similar situation occurs with deletions:
add element inet x y { 1.1.1.1-2.2.2.2, 4.4.4.4-5.5.5.5}
delete element inet x y { 1.1.1.1-5.5.5.5 } => this should fail: ENOENT
This currently works via mitigation by nft in userspace, which is
performing the overlap detection before sending the elements to the
kernel. This requires a previous netlink dump of the set content which
slows down incremental updates on interval sets, because a netlink set
content dump is needed.
This patch extends the existing overlap detection to track the most
recent start element that already exists. The pointer to the existing
start element is stored as a cookie (no pointer dereference is ever
possible). If the end element is added and it already exists, then
check that the existing end element is adjacent to the already existing
start element. Similar logic applies to element deactivation.
There is still a few more corner cases of overlap detection related to
the open interval that are addressed in follow up patches.
Fixes: 7c84d41416d8 ("netfilter: nft_set_rbtree: Detect partial overlaps on insertion")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nft_set_rbtree.c | 127 ++++++++++++++++++++++++++++++++-
1 file changed, 126 insertions(+), 1 deletion(-)
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 0581184cacf9..6580b8e2ec25 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -33,6 +33,7 @@ struct nft_rbtree {
rwlock_t lock;
struct nft_array __rcu *array;
struct nft_array *array_next;
+ unsigned long start_rbe_cookie;
unsigned long last_gc;
};
@@ -278,6 +279,67 @@ static struct nft_rbtree_elem *nft_rbtree_prev_active(struct nft_rbtree_elem *rb
return rb_entry(node, struct nft_rbtree_elem, node);
}
+static struct nft_rbtree_elem *
+__nft_rbtree_next_active(struct rb_node *node, u8 genmask)
+{
+ struct nft_rbtree_elem *next_rbe;
+
+ while (node) {
+ next_rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ if (!nft_set_elem_active(&next_rbe->ext, genmask)) {
+ node = rb_next(node);
+ continue;
+ }
+
+ return next_rbe;
+ }
+
+ return NULL;
+}
+
+static struct nft_rbtree_elem *
+nft_rbtree_next_active(struct nft_rbtree_elem *rbe, u8 genmask)
+{
+ return __nft_rbtree_next_active(rb_next(&rbe->node), genmask);
+}
+
+static void nft_rbtree_set_start_cookie(struct nft_rbtree *priv,
+ const struct nft_rbtree_elem *rbe)
+{
+ priv->start_rbe_cookie = (unsigned long)rbe;
+}
+
+static bool nft_rbtree_cmp_start_cookie(struct nft_rbtree *priv,
+ const struct nft_rbtree_elem *rbe)
+{
+ return priv->start_rbe_cookie == (unsigned long)rbe;
+}
+
+static bool nft_rbtree_insert_same_interval(const struct net *net,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ u8 genmask = nft_genmask_next(net);
+ struct nft_rbtree_elem *next_rbe;
+
+ if (!priv->start_rbe_cookie)
+ return true;
+
+ next_rbe = nft_rbtree_next_active(rbe, genmask);
+ if (next_rbe) {
+ /* Closest start element differs from last element added. */
+ if (nft_rbtree_interval_start(next_rbe) &&
+ nft_rbtree_cmp_start_cookie(priv, next_rbe)) {
+ priv->start_rbe_cookie = 0;
+ return true;
+ }
+ }
+
+ priv->start_rbe_cookie = 0;
+
+ return false;
+}
+
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_elem_priv **elem_priv)
@@ -393,12 +455,18 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
}
}
+ if (nft_rbtree_interval_null(set, new))
+ priv->start_rbe_cookie = 0;
+ else if (nft_rbtree_interval_start(new) && priv->start_rbe_cookie)
+ priv->start_rbe_cookie = 0;
+
/* - new start element matching existing start element: full overlap
* reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
*/
if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
*elem_priv = &rbe_ge->priv;
+ nft_rbtree_set_start_cookie(priv, rbe_ge);
return -EEXIST;
}
@@ -414,6 +482,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
return -ECANCELED;
*elem_priv = &rbe_le->priv;
+
+ /* - start and end element belong to the same interval. */
+ if (!nft_rbtree_insert_same_interval(net, priv, rbe_le))
+ return -ENOTEMPTY;
+
return -EEXIST;
}
@@ -603,6 +676,48 @@ static void nft_rbtree_activate(const struct net *net,
nft_clear(net, &rbe->ext);
}
+static struct nft_rbtree_elem *
+nft_rbtree_next_inactive(struct nft_rbtree_elem *rbe, u8 genmask)
+{
+ struct nft_rbtree_elem *next_rbe;
+ struct rb_node *node;
+
+ node = rb_next(&rbe->node);
+ if (node) {
+ next_rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ if (nft_rbtree_interval_start(next_rbe) &&
+ !nft_set_elem_active(&next_rbe->ext, genmask))
+ return next_rbe;
+ }
+
+ return NULL;
+}
+
+static bool nft_rbtree_deactivate_same_interval(const struct net *net,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ u8 genmask = nft_genmask_next(net);
+ struct nft_rbtree_elem *next_rbe;
+
+ if (!priv->start_rbe_cookie)
+ return true;
+
+ next_rbe = nft_rbtree_next_inactive(rbe, genmask);
+ if (next_rbe) {
+ /* Closest start element differs from last element added. */
+ if (nft_rbtree_interval_start(next_rbe) &&
+ nft_rbtree_cmp_start_cookie(priv, next_rbe)) {
+ priv->start_rbe_cookie = 0;
+ return true;
+ }
+ }
+
+ priv->start_rbe_cookie = 0;
+
+ return false;
+}
+
static void nft_rbtree_flush(const struct net *net,
const struct nft_set *set,
struct nft_elem_priv *elem_priv)
@@ -617,12 +732,16 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv);
- const struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
u8 genmask = nft_genmask_next(net);
u64 tstamp = nft_net_tstamp(net);
int d;
+ if (nft_rbtree_interval_start(this) ||
+ nft_rbtree_interval_null(set, this))
+ priv->start_rbe_cookie = 0;
+
if (nft_array_may_resize(set) < 0)
return NULL;
@@ -650,6 +769,12 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
parent = parent->rb_left;
continue;
}
+
+ if (nft_rbtree_interval_start(rbe))
+ nft_rbtree_set_start_cookie(priv, rbe);
+ else if (!nft_rbtree_deactivate_same_interval(net, priv, rbe))
+ return NULL;
+
nft_rbtree_flush(net, set, &rbe->priv);
return &rbe->priv;
}
--
2.47.3
next prev parent reply other threads:[~2026-01-28 1:43 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-28 1:42 [PATCH nf-next 0/4] nf_tables: complete interval overlap detection Pablo Neira Ayuso
2026-01-28 1:42 ` [PATCH nf-next 1/4] netfilter: nft_set_rbtree: fix bogus EEXIST with NLM_F_CREATE with null interval Pablo Neira Ayuso
2026-01-28 1:42 ` [PATCH nf-next 2/4] netfilter: nft_set_rbtree: check for partial overlaps in anonymous sets Pablo Neira Ayuso
2026-01-28 1:42 ` Pablo Neira Ayuso [this message]
2026-01-28 1:42 ` [PATCH nf-next 4/4] netfilter: nft_set_rbtree: validate open interval overlap Pablo Neira Ayuso
2026-01-30 12:34 ` Florian Westphal
2026-01-28 15:45 ` [PATCH nf-next 0/4] nf_tables: complete interval overlap detection Florian Westphal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260128014251.754512-4-pablo@netfilter.org \
--to=pablo@netfilter.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox