netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: gregkh@linuxfoundation.org, stable@vger.kernel.org, sashal@kernel.org
Subject: [PATCH -stable,5.10 01/17] netfilter: nf_tables: integrate pipapo into commit protocol
Date: Fri, 22 Sep 2023 19:01:02 +0200	[thread overview]
Message-ID: <20230922170118.152420-2-pablo@netfilter.org> (raw)
In-Reply-To: <20230922170118.152420-1-pablo@netfilter.org>

commit 212ed75dc5fb9d1423b3942c8f872a868cda3466 upstream.

The pipapo set backend follows copy-on-update approach, maintaining one
clone of the existing datastructure that is being updated. The clone
and current datastructures are swapped via rcu from the commit step.

The existing integration with the commit protocol is flawed because
there is no operation to clean up the clone if the transaction is
aborted. Moreover, the datastructure swap happens on set element
activation.

This patch adds two new operations for sets: commit and abort, these new
operations are invoked from the commit and abort steps, after the
transactions have been digested, and it updates the pipapo set backend
to use it.

This patch adds a new ->pending_update field to sets to maintain a list
of sets that require this new commit and abort operations.

Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  4 ++-
 net/netfilter/nf_tables_api.c     | 56 +++++++++++++++++++++++++++++++
 net/netfilter/nft_set_pipapo.c    | 55 +++++++++++++++++++++---------
 3 files changed, 99 insertions(+), 16 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index eec29dd6681c..a3068ed0f316 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -373,7 +373,8 @@ struct nft_set_ops {
 					       const struct nft_set *set,
 					       const struct nft_set_elem *elem,
 					       unsigned int flags);
-
+	void				(*commit)(const struct nft_set *set);
+	void				(*abort)(const struct nft_set *set);
 	u64				(*privsize)(const struct nlattr * const nla[],
 						    const struct nft_set_desc *desc);
 	bool				(*estimate)(const struct nft_set_desc *desc,
@@ -454,6 +455,7 @@ struct nft_set {
 	u16				udlen;
 	unsigned char			*udata;
 	struct nft_expr			*expr;
+	struct list_head		pending_update;
 	/* runtime data below here */
 	const struct nft_set_ops	*ops ____cacheline_aligned;
 	u16				flags:14,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2669999d1bc9..430dcd0f6c3b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4509,6 +4509,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 	}
 
 	set->handle = nf_tables_alloc_handle(table);
+	INIT_LIST_HEAD(&set->pending_update);
 
 	err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
 	if (err < 0)
@@ -8141,10 +8142,25 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation)
 	}
 }
 
+static void nft_set_commit_update(struct list_head *set_update_list)
+{
+	struct nft_set *set, *next;
+
+	list_for_each_entry_safe(set, next, set_update_list, pending_update) {
+		list_del_init(&set->pending_update);
+
+		if (!set->ops->commit)
+			continue;
+
+		set->ops->commit(set);
+	}
+}
+
 static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 {
 	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nft_trans *trans, *next;
+	LIST_HEAD(set_update_list);
 	struct nft_trans_elem *te;
 	struct nft_chain *chain;
 	struct nft_table *table;
@@ -8310,6 +8326,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 			nf_tables_setelem_notify(&trans->ctx, te->set,
 						 &te->elem,
 						 NFT_MSG_NEWSETELEM, 0);
+			if (te->set->ops->commit &&
+			    list_empty(&te->set->pending_update)) {
+				list_add_tail(&te->set->pending_update,
+					      &set_update_list);
+			}
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_DELSETELEM:
@@ -8321,6 +8342,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 			te->set->ops->remove(net, te->set, &te->elem);
 			atomic_dec(&te->set->nelems);
 			te->set->ndeact--;
+			if (te->set->ops->commit &&
+			    list_empty(&te->set->pending_update)) {
+				list_add_tail(&te->set->pending_update,
+					      &set_update_list);
+			}
 			break;
 		case NFT_MSG_NEWOBJ:
 			if (nft_trans_obj_update(trans)) {
@@ -8381,6 +8407,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 		}
 	}
 
+	nft_set_commit_update(&set_update_list);
+
 	nft_commit_notify(net, NETLINK_CB(skb).portid);
 	nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
 	nf_tables_commit_audit_log(&adl, nft_net->base_seq);
@@ -8437,10 +8465,25 @@ static void nf_tables_abort_release(struct nft_trans *trans)
 	kfree(trans);
 }
 
+static void nft_set_abort_update(struct list_head *set_update_list)
+{
+	struct nft_set *set, *next;
+
+	list_for_each_entry_safe(set, next, set_update_list, pending_update) {
+		list_del_init(&set->pending_update);
+
+		if (!set->ops->abort)
+			continue;
+
+		set->ops->abort(set);
+	}
+}
+
 static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 {
 	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nft_trans *trans, *next;
+	LIST_HEAD(set_update_list);
 	struct nft_trans_elem *te;
 
 	if (action == NFNL_ABORT_VALIDATE &&
@@ -8529,6 +8572,12 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 			te = (struct nft_trans_elem *)trans->data;
 			te->set->ops->remove(net, te->set, &te->elem);
 			atomic_dec(&te->set->nelems);
+
+			if (te->set->ops->abort &&
+			    list_empty(&te->set->pending_update)) {
+				list_add_tail(&te->set->pending_update,
+					      &set_update_list);
+			}
 			break;
 		case NFT_MSG_DELSETELEM:
 			te = (struct nft_trans_elem *)trans->data;
@@ -8537,6 +8586,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 			te->set->ops->activate(net, te->set, &te->elem);
 			te->set->ndeact--;
 
+			if (te->set->ops->abort &&
+			    list_empty(&te->set->pending_update)) {
+				list_add_tail(&te->set->pending_update,
+					      &set_update_list);
+			}
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWOBJ:
@@ -8577,6 +8631,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 		}
 	}
 
+	nft_set_abort_update(&set_update_list);
+
 	synchronize_rcu();
 
 	list_for_each_entry_safe_reverse(trans, next,
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 50f840e312b0..ce6c07ea7244 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1603,17 +1603,10 @@ static void pipapo_free_fields(struct nft_pipapo_match *m)
 	}
 }
 
-/**
- * pipapo_reclaim_match - RCU callback to free fields from old matching data
- * @rcu:	RCU head
- */
-static void pipapo_reclaim_match(struct rcu_head *rcu)
+static void pipapo_free_match(struct nft_pipapo_match *m)
 {
-	struct nft_pipapo_match *m;
 	int i;
 
-	m = container_of(rcu, struct nft_pipapo_match, rcu);
-
 	for_each_possible_cpu(i)
 		kfree(*per_cpu_ptr(m->scratch, i));
 
@@ -1628,7 +1621,19 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
 }
 
 /**
- * pipapo_commit() - Replace lookup data with current working copy
+ * pipapo_reclaim_match - RCU callback to free fields from old matching data
+ * @rcu:	RCU head
+ */
+static void pipapo_reclaim_match(struct rcu_head *rcu)
+{
+	struct nft_pipapo_match *m;
+
+	m = container_of(rcu, struct nft_pipapo_match, rcu);
+	pipapo_free_match(m);
+}
+
+/**
+ * nft_pipapo_commit() - Replace lookup data with current working copy
  * @set:	nftables API set representation
  *
  * While at it, check if we should perform garbage collection on the working
@@ -1638,7 +1643,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
  * We also need to create a new working copy for subsequent insertions and
  * deletions.
  */
-static void pipapo_commit(const struct nft_set *set)
+static void nft_pipapo_commit(const struct nft_set *set)
 {
 	struct nft_pipapo *priv = nft_set_priv(set);
 	struct nft_pipapo_match *new_clone, *old;
@@ -1663,6 +1668,26 @@ static void pipapo_commit(const struct nft_set *set)
 	priv->clone = new_clone;
 }
 
+static void nft_pipapo_abort(const struct nft_set *set)
+{
+	struct nft_pipapo *priv = nft_set_priv(set);
+	struct nft_pipapo_match *new_clone, *m;
+
+	if (!priv->dirty)
+		return;
+
+	m = rcu_dereference(priv->match);
+
+	new_clone = pipapo_clone(m);
+	if (IS_ERR(new_clone))
+		return;
+
+	priv->dirty = false;
+
+	pipapo_free_match(priv->clone);
+	priv->clone = new_clone;
+}
+
 /**
  * nft_pipapo_activate() - Mark element reference as active given key, commit
  * @net:	Network namespace
@@ -1670,8 +1695,7 @@ static void pipapo_commit(const struct nft_set *set)
  * @elem:	nftables API element representation containing key data
  *
  * On insertion, elements are added to a copy of the matching data currently
- * in use for lookups, and not directly inserted into current lookup data, so
- * we'll take care of that by calling pipapo_commit() here. Both
+ * in use for lookups, and not directly inserted into current lookup data. Both
  * nft_pipapo_insert() and nft_pipapo_activate() are called once for each
  * element, hence we can't purpose either one as a real commit operation.
  */
@@ -1687,8 +1711,6 @@ static void nft_pipapo_activate(const struct net *net,
 
 	nft_set_elem_change_active(net, set, &e->ext);
 	nft_set_elem_clear_busy(&e->ext);
-
-	pipapo_commit(set);
 }
 
 /**
@@ -1938,7 +1960,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
 		if (i == m->field_count) {
 			priv->dirty = true;
 			pipapo_drop(m, rulemap);
-			pipapo_commit(set);
 			return;
 		}
 
@@ -2245,6 +2266,8 @@ const struct nft_set_type nft_set_pipapo_type = {
 		.init		= nft_pipapo_init,
 		.destroy	= nft_pipapo_destroy,
 		.gc_init	= nft_pipapo_gc_init,
+		.commit		= nft_pipapo_commit,
+		.abort		= nft_pipapo_abort,
 		.elemsize	= offsetof(struct nft_pipapo_elem, ext),
 	},
 };
@@ -2267,6 +2290,8 @@ const struct nft_set_type nft_set_pipapo_avx2_type = {
 		.init		= nft_pipapo_init,
 		.destroy	= nft_pipapo_destroy,
 		.gc_init	= nft_pipapo_gc_init,
+		.commit		= nft_pipapo_commit,
+		.abort		= nft_pipapo_abort,
 		.elemsize	= offsetof(struct nft_pipapo_elem, ext),
 	},
 };
-- 
2.30.2


  reply	other threads:[~2023-09-22 17:01 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-22 17:01 [PATCH -stable,5.10 00/17] Netfilter stable fixes for 5.10 Pablo Neira Ayuso
2023-09-22 17:01 ` Pablo Neira Ayuso [this message]
2023-09-22 17:01 ` [PATCH -stable,5.10 02/17] netfilter: nf_tables: don't skip expired elements during walk Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 03/17] netfilter: nf_tables: GC transaction API to avoid race with control plane Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 04/17] netfilter: nf_tables: adapt set backend to use GC transaction API Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 05/17] netfilter: nft_set_hash: mark set element as dead when deleting from packet path Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 06/17] netfilter: nf_tables: remove busy mark and gc batch API Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 07/17] netfilter: nf_tables: don't fail inserts if duplicate has expired Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 08/17] netfilter: nf_tables: fix GC transaction races with netns and netlink event exit path Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 09/17] netfilter: nf_tables: GC transaction race with netns dismantle Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 10/17] netfilter: nf_tables: GC transaction race with abort path Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 11/17] netfilter: nf_tables: use correct lock to protect gc_list Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 12/17] netfilter: nf_tables: defer gc run if previous batch is still pending Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 13/17] netfilter: nft_set_rbtree: skip sync GC for new elements in this transaction Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 14/17] netfilter: nft_set_rbtree: use read spinlock to avoid datapath contention Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 15/17] netfilter: nft_set_pipapo: stop GC iteration if GC transaction allocation fails Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 16/17] netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration Pablo Neira Ayuso
2023-09-22 17:01 ` [PATCH -stable,5.10 17/17] netfilter: nf_tables: fix memleak when more than 255 elements expired Pablo Neira Ayuso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230922170118.152420-2-pablo@netfilter.org \
    --to=pablo@netfilter.org \
    --cc=gregkh@linuxfoundation.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).