* [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking
@ 2022-01-09 16:11 Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 01/14] netfilter: nft_connlimit: move stateful fields out of expression data Pablo Neira Ayuso
                   ` (13 more replies)
  0 siblings, 14 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
Hi,
The following patchset contains v3 updates for the datapath ruleset
representation and new infrastructure to skip redundant selector store
to register operations [1].
Changes only to patch 7 and 12.
- Patch 7:  Add more memory checks to the routine that builds the blob,
            as requested by Florian.
- Patch 12: Update nft_bitwise reduce routine to deal with different
            source and destination registers.
[1] https://marc.info/?l=netfilter-devel&m=164168070413344&w=2
Pablo Neira Ayuso (14):
  netfilter: nft_connlimit: move stateful fields out of expression data
  netfilter: nft_last: move stateful fields out of expression data
  netfilter: nft_quota: move stateful fields out of expression data
  netfilter: nft_numgen: move stateful fields out of expression data
  netfilter: nft_limit: rename stateful structure
  netfilter: nft_limit: move stateful fields out of expression data
  netfilter: nf_tables: add rule blob layout
  netfilter: nf_tables: add NFT_REG32_NUM
  netfilter: nf_tables: add register tracking infrastructure
  netfilter: nft_payload: track register operations
  netfilter: nft_meta: track register operations
  netfilter: nft_bitwise: track register operations
  netfilter: nft_payload: cancel register tracking after payload update
  netfilter: nft_meta: cancel register tracking after meta update
 include/net/netfilter/nf_tables.h      |  40 +++++-
 net/bridge/netfilter/nft_meta_bridge.c |  20 +++
 net/netfilter/nf_tables_api.c          | 160 ++++++++++++++++-------
 net/netfilter/nf_tables_core.c         |  41 ++++--
 net/netfilter/nf_tables_trace.c        |   2 +-
 net/netfilter/nft_bitwise.c            |  95 ++++++++++++++
 net/netfilter/nft_connlimit.c          |  26 ++--
 net/netfilter/nft_last.c               |  69 +++++++---
 net/netfilter/nft_limit.c              | 172 +++++++++++++++++--------
 net/netfilter/nft_meta.c               |  48 +++++++
 net/netfilter/nft_numgen.c             |  34 ++++-
 net/netfilter/nft_payload.c            |  51 ++++++++
 net/netfilter/nft_quota.c              |  52 +++++++-
 13 files changed, 654 insertions(+), 156 deletions(-)
--
2.30.2
^ permalink raw reply	[flat|nested] 15+ messages in thread
* [PATCH 01/14] netfilter: nft_connlimit: move stateful fields out of expression data
  2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
@ 2022-01-09 16:11 ` Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 02/14] netfilter: nft_last: " Pablo Neira Ayuso
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
In preparation for the rule blob representation.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_connlimit.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 7d0761fad37e..58dcafe8bf79 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -14,7 +14,7 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 
 struct nft_connlimit {
-	struct nf_conncount_list	list;
+	struct nf_conncount_list	*list;
 	u32				limit;
 	bool				invert;
 };
@@ -43,12 +43,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
 		return;
 	}
 
-	if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) {
+	if (nf_conncount_add(nft_net(pkt), priv->list, tuple_ptr, zone)) {
 		regs->verdict.code = NF_DROP;
 		return;
 	}
 
-	count = priv->list.count;
+	count = priv->list->count;
 
 	if ((count > priv->limit) ^ priv->invert) {
 		regs->verdict.code = NFT_BREAK;
@@ -76,7 +76,11 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
 			invert = true;
 	}
 
-	nf_conncount_list_init(&priv->list);
+	priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL);
+	if (!priv->list)
+		return -ENOMEM;
+
+	nf_conncount_list_init(priv->list);
 	priv->limit	= limit;
 	priv->invert	= invert;
 
@@ -87,7 +91,8 @@ static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
 				     struct nft_connlimit *priv)
 {
 	nf_ct_netns_put(ctx->net, ctx->family);
-	nf_conncount_cache_free(&priv->list);
+	nf_conncount_cache_free(priv->list);
+	kfree(priv->list);
 }
 
 static int nft_connlimit_do_dump(struct sk_buff *skb,
@@ -200,7 +205,11 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
 	struct nft_connlimit *priv_dst = nft_expr_priv(dst);
 	struct nft_connlimit *priv_src = nft_expr_priv(src);
 
-	nf_conncount_list_init(&priv_dst->list);
+	priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
+	if (priv_dst->list)
+		return -ENOMEM;
+
+	nf_conncount_list_init(priv_dst->list);
 	priv_dst->limit	 = priv_src->limit;
 	priv_dst->invert = priv_src->invert;
 
@@ -212,7 +221,8 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
 {
 	struct nft_connlimit *priv = nft_expr_priv(expr);
 
-	nf_conncount_cache_free(&priv->list);
+	nf_conncount_cache_free(priv->list);
+	kfree(priv->list);
 }
 
 static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
@@ -221,7 +231,7 @@ static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
 	bool ret;
 
 	local_bh_disable();
-	ret = nf_conncount_gc_list(net, &priv->list);
+	ret = nf_conncount_gc_list(net, priv->list);
 	local_bh_enable();
 
 	return ret;
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH 02/14] netfilter: nft_last: move stateful fields out of expression data
  2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 01/14] netfilter: nft_connlimit: move stateful fields out of expression data Pablo Neira Ayuso
@ 2022-01-09 16:11 ` Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 03/14] netfilter: nft_quota: " Pablo Neira Ayuso
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
In preparation for the rule blob representation.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_last.c | 69 +++++++++++++++++++++++++++++-----------
 1 file changed, 51 insertions(+), 18 deletions(-)
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 304e33cbed9b..5ee33d0ccd4e 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -8,9 +8,13 @@
 #include <net/netfilter/nf_tables_core.h>
 #include <net/netfilter/nf_tables.h>
 
+struct nft_last {
+	unsigned long	jiffies;
+	unsigned int	set;
+};
+
 struct nft_last_priv {
-	unsigned long	last_jiffies;
-	unsigned int	last_set;
+	struct nft_last	*last;
 };
 
 static const struct nla_policy nft_last_policy[NFTA_LAST_MAX + 1] = {
@@ -22,47 +26,55 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 			 const struct nlattr * const tb[])
 {
 	struct nft_last_priv *priv = nft_expr_priv(expr);
+	struct nft_last *last;
 	u64 last_jiffies;
-	u32 last_set = 0;
 	int err;
 
-	if (tb[NFTA_LAST_SET]) {
-		last_set = ntohl(nla_get_be32(tb[NFTA_LAST_SET]));
-		if (last_set == 1)
-			priv->last_set = 1;
-	}
+	last = kzalloc(sizeof(*last), GFP_KERNEL);
+	if (!last)
+		return -ENOMEM;
+
+	if (tb[NFTA_LAST_SET])
+		last->set = ntohl(nla_get_be32(tb[NFTA_LAST_SET]));
 
-	if (last_set && tb[NFTA_LAST_MSECS]) {
+	if (last->set && tb[NFTA_LAST_MSECS]) {
 		err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies);
 		if (err < 0)
-			return err;
+			goto err;
 
-		priv->last_jiffies = jiffies - (unsigned long)last_jiffies;
+		last->jiffies = jiffies - (unsigned long)last_jiffies;
 	}
+	priv->last = last;
 
 	return 0;
+err:
+	kfree(last);
+
+	return err;
 }
 
 static void nft_last_eval(const struct nft_expr *expr,
 			  struct nft_regs *regs, const struct nft_pktinfo *pkt)
 {
 	struct nft_last_priv *priv = nft_expr_priv(expr);
+	struct nft_last *last = priv->last;
 
-	if (READ_ONCE(priv->last_jiffies) != jiffies)
-		WRITE_ONCE(priv->last_jiffies, jiffies);
-	if (READ_ONCE(priv->last_set) == 0)
-		WRITE_ONCE(priv->last_set, 1);
+	if (READ_ONCE(last->jiffies) != jiffies)
+		WRITE_ONCE(last->jiffies, jiffies);
+	if (READ_ONCE(last->set) == 0)
+		WRITE_ONCE(last->set, 1);
 }
 
 static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	struct nft_last_priv *priv = nft_expr_priv(expr);
-	unsigned long last_jiffies = READ_ONCE(priv->last_jiffies);
-	u32 last_set = READ_ONCE(priv->last_set);
+	struct nft_last *last = priv->last;
+	unsigned long last_jiffies = READ_ONCE(last->jiffies);
+	u32 last_set = READ_ONCE(last->set);
 	__be64 msecs;
 
 	if (time_before(jiffies, last_jiffies)) {
-		WRITE_ONCE(priv->last_set, 0);
+		WRITE_ONCE(last->set, 0);
 		last_set = 0;
 	}
 
@@ -81,11 +93,32 @@ static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
 	return -1;
 }
 
+static void nft_last_destroy(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr)
+{
+	struct nft_last_priv *priv = nft_expr_priv(expr);
+
+	kfree(priv->last);
+}
+
+static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+	struct nft_last_priv *priv_dst = nft_expr_priv(dst);
+
+	priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
+	if (priv_dst->last)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static const struct nft_expr_ops nft_last_ops = {
 	.type		= &nft_last_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_last_priv)),
 	.eval		= nft_last_eval,
 	.init		= nft_last_init,
+	.destroy	= nft_last_destroy,
+	.clone		= nft_last_clone,
 	.dump		= nft_last_dump,
 };
 
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH 03/14] netfilter: nft_quota: move stateful fields out of expression data
  2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 01/14] netfilter: nft_connlimit: move stateful fields out of expression data Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 02/14] netfilter: nft_last: " Pablo Neira Ayuso
@ 2022-01-09 16:11 ` Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 04/14] netfilter: nft_numgen: " Pablo Neira Ayuso
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
In preparation for the rule blob representation.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_quota.c | 52 +++++++++++++++++++++++++++++++++++----
 1 file changed, 47 insertions(+), 5 deletions(-)
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index c4d1389f7185..0484aef74273 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -15,13 +15,13 @@
 struct nft_quota {
 	atomic64_t	quota;
 	unsigned long	flags;
-	atomic64_t	consumed;
+	atomic64_t	*consumed;
 };
 
 static inline bool nft_overquota(struct nft_quota *priv,
 				 const struct sk_buff *skb)
 {
-	return atomic64_add_return(skb->len, &priv->consumed) >=
+	return atomic64_add_return(skb->len, priv->consumed) >=
 	       atomic64_read(&priv->quota);
 }
 
@@ -90,13 +90,23 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
 			return -EOPNOTSUPP;
 	}
 
+	priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL);
+	if (!priv->consumed)
+		return -ENOMEM;
+
 	atomic64_set(&priv->quota, quota);
 	priv->flags = flags;
-	atomic64_set(&priv->consumed, consumed);
+	atomic64_set(priv->consumed, consumed);
 
 	return 0;
 }
 
+static void nft_quota_do_destroy(const struct nft_ctx *ctx,
+				 struct nft_quota *priv)
+{
+	kfree(priv->consumed);
+}
+
 static int nft_quota_obj_init(const struct nft_ctx *ctx,
 			      const struct nlattr * const tb[],
 			      struct nft_object *obj)
@@ -128,7 +138,7 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv,
 	 * that we see, don't go over the quota boundary in what we send to
 	 * userspace.
 	 */
-	consumed = atomic64_read(&priv->consumed);
+	consumed = atomic64_read(priv->consumed);
 	quota = atomic64_read(&priv->quota);
 	if (consumed >= quota) {
 		consumed_cap = quota;
@@ -145,7 +155,7 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv,
 		goto nla_put_failure;
 
 	if (reset) {
-		atomic64_sub(consumed, &priv->consumed);
+		atomic64_sub(consumed, priv->consumed);
 		clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags);
 	}
 	return 0;
@@ -162,11 +172,20 @@ static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj,
 	return nft_quota_do_dump(skb, priv, reset);
 }
 
+static void nft_quota_obj_destroy(const struct nft_ctx *ctx,
+				  struct nft_object *obj)
+{
+	struct nft_quota *priv = nft_obj_data(obj);
+
+	return nft_quota_do_destroy(ctx, priv);
+}
+
 static struct nft_object_type nft_quota_obj_type;
 static const struct nft_object_ops nft_quota_obj_ops = {
 	.type		= &nft_quota_obj_type,
 	.size		= sizeof(struct nft_quota),
 	.init		= nft_quota_obj_init,
+	.destroy	= nft_quota_obj_destroy,
 	.eval		= nft_quota_obj_eval,
 	.dump		= nft_quota_obj_dump,
 	.update		= nft_quota_obj_update,
@@ -205,12 +224,35 @@ static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
 	return nft_quota_do_dump(skb, priv, false);
 }
 
+static void nft_quota_destroy(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr)
+{
+	struct nft_quota *priv = nft_expr_priv(expr);
+
+	return nft_quota_do_destroy(ctx, priv);
+}
+
+static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+	struct nft_quota *priv_dst = nft_expr_priv(dst);
+
+	priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
+	if (priv_dst->consumed)
+		return -ENOMEM;
+
+	atomic64_set(priv_dst->consumed, 0);
+
+	return 0;
+}
+
 static struct nft_expr_type nft_quota_type;
 static const struct nft_expr_ops nft_quota_ops = {
 	.type		= &nft_quota_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_quota)),
 	.eval		= nft_quota_eval,
 	.init		= nft_quota_init,
+	.destroy	= nft_quota_destroy,
+	.clone		= nft_quota_clone,
 	.dump		= nft_quota_dump,
 };
 
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH 04/14] netfilter: nft_numgen: move stateful fields out of expression data
  2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
                   ` (2 preceding siblings ...)
  2022-01-09 16:11 ` [PATCH 03/14] netfilter: nft_quota: " Pablo Neira Ayuso
@ 2022-01-09 16:11 ` Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 05/14] netfilter: nft_limit: rename stateful structure Pablo Neira Ayuso
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
In preparation for the rule blob representation.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_numgen.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 722cac1e90e0..1d378efd8823 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -18,7 +18,7 @@ static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
 struct nft_ng_inc {
 	u8			dreg;
 	u32			modulus;
-	atomic_t		counter;
+	atomic_t		*counter;
 	u32			offset;
 };
 
@@ -27,9 +27,9 @@ static u32 nft_ng_inc_gen(struct nft_ng_inc *priv)
 	u32 nval, oval;
 
 	do {
-		oval = atomic_read(&priv->counter);
+		oval = atomic_read(priv->counter);
 		nval = (oval + 1 < priv->modulus) ? oval + 1 : 0;
-	} while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
+	} while (atomic_cmpxchg(priv->counter, oval, nval) != oval);
 
 	return nval + priv->offset;
 }
@@ -55,6 +55,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
 			   const struct nlattr * const tb[])
 {
 	struct nft_ng_inc *priv = nft_expr_priv(expr);
+	int err;
 
 	if (tb[NFTA_NG_OFFSET])
 		priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET]));
@@ -66,10 +67,22 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
 	if (priv->offset + priv->modulus - 1 < priv->offset)
 		return -EOVERFLOW;
 
-	atomic_set(&priv->counter, priv->modulus - 1);
+	priv->counter = kmalloc(sizeof(*priv->counter), GFP_KERNEL);
+	if (!priv->counter)
+		return -ENOMEM;
 
-	return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
-					NULL, NFT_DATA_VALUE, sizeof(u32));
+	atomic_set(priv->counter, priv->modulus - 1);
+
+	err = nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
+				       NULL, NFT_DATA_VALUE, sizeof(u32));
+	if (err < 0)
+		goto err;
+
+	return 0;
+err:
+	kfree(priv->counter);
+
+	return err;
 }
 
 static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
@@ -98,6 +111,14 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
 			   priv->offset);
 }
 
+static void nft_ng_inc_destroy(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr)
+{
+	const struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+	kfree(priv->counter);
+}
+
 struct nft_ng_random {
 	u8			dreg;
 	u32			modulus;
@@ -157,6 +178,7 @@ static const struct nft_expr_ops nft_ng_inc_ops = {
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
 	.eval		= nft_ng_inc_eval,
 	.init		= nft_ng_inc_init,
+	.destroy	= nft_ng_inc_destroy,
 	.dump		= nft_ng_inc_dump,
 };
 
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH 05/14] netfilter: nft_limit: rename stateful structure
  2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
                   ` (3 preceding siblings ...)
  2022-01-09 16:11 ` [PATCH 04/14] netfilter: nft_numgen: " Pablo Neira Ayuso
@ 2022-01-09 16:11 ` Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 06/14] netfilter: nft_limit: move stateful fields out of expression data Pablo Neira Ayuso
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
From struct nft_limit to nft_limit_priv.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_limit.c | 104 +++++++++++++++++++-------------------
 1 file changed, 52 insertions(+), 52 deletions(-)
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 82ec27bdf941..d6e0226b7603 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -14,7 +14,7 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 
-struct nft_limit {
+struct nft_limit_priv {
 	spinlock_t	lock;
 	u64		last;
 	u64		tokens;
@@ -25,33 +25,33 @@ struct nft_limit {
 	bool		invert;
 };
 
-static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
+static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost)
 {
 	u64 now, tokens;
 	s64 delta;
 
-	spin_lock_bh(&limit->lock);
+	spin_lock_bh(&priv->lock);
 	now = ktime_get_ns();
-	tokens = limit->tokens + now - limit->last;
-	if (tokens > limit->tokens_max)
-		tokens = limit->tokens_max;
+	tokens = priv->tokens + now - priv->last;
+	if (tokens > priv->tokens_max)
+		tokens = priv->tokens_max;
 
-	limit->last = now;
+	priv->last = now;
 	delta = tokens - cost;
 	if (delta >= 0) {
-		limit->tokens = delta;
-		spin_unlock_bh(&limit->lock);
-		return limit->invert;
+		priv->tokens = delta;
+		spin_unlock_bh(&priv->lock);
+		return priv->invert;
 	}
-	limit->tokens = tokens;
-	spin_unlock_bh(&limit->lock);
-	return !limit->invert;
+	priv->tokens = tokens;
+	spin_unlock_bh(&priv->lock);
+	return !priv->invert;
 }
 
 /* Use same default as in iptables. */
 #define NFT_LIMIT_PKT_BURST_DEFAULT	5
 
-static int nft_limit_init(struct nft_limit *limit,
+static int nft_limit_init(struct nft_limit_priv *priv,
 			  const struct nlattr * const tb[], bool pkts)
 {
 	u64 unit, tokens;
@@ -60,58 +60,58 @@ static int nft_limit_init(struct nft_limit *limit,
 	    tb[NFTA_LIMIT_UNIT] == NULL)
 		return -EINVAL;
 
-	limit->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+	priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
 	unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
-	limit->nsecs = unit * NSEC_PER_SEC;
-	if (limit->rate == 0 || limit->nsecs < unit)
+	priv->nsecs = unit * NSEC_PER_SEC;
+	if (priv->rate == 0 || priv->nsecs < unit)
 		return -EOVERFLOW;
 
 	if (tb[NFTA_LIMIT_BURST])
-		limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
+		priv->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
 
-	if (pkts && limit->burst == 0)
-		limit->burst = NFT_LIMIT_PKT_BURST_DEFAULT;
+	if (pkts && priv->burst == 0)
+		priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT;
 
-	if (limit->rate + limit->burst < limit->rate)
+	if (priv->rate + priv->burst < priv->rate)
 		return -EOVERFLOW;
 
 	if (pkts) {
-		tokens = div64_u64(limit->nsecs, limit->rate) * limit->burst;
+		tokens = div64_u64(priv->nsecs, priv->rate) * priv->burst;
 	} else {
 		/* The token bucket size limits the number of tokens can be
 		 * accumulated. tokens_max specifies the bucket size.
 		 * tokens_max = unit * (rate + burst) / rate.
 		 */
-		tokens = div64_u64(limit->nsecs * (limit->rate + limit->burst),
-				 limit->rate);
+		tokens = div64_u64(priv->nsecs * (priv->rate + priv->burst),
+				 priv->rate);
 	}
 
-	limit->tokens = tokens;
-	limit->tokens_max = limit->tokens;
+	priv->tokens = tokens;
+	priv->tokens_max = priv->tokens;
 
 	if (tb[NFTA_LIMIT_FLAGS]) {
 		u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS]));
 
 		if (flags & NFT_LIMIT_F_INV)
-			limit->invert = true;
+			priv->invert = true;
 	}
-	limit->last = ktime_get_ns();
-	spin_lock_init(&limit->lock);
+	priv->last = ktime_get_ns();
+	spin_lock_init(&priv->lock);
 
 	return 0;
 }
 
-static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit,
+static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit_priv *priv,
 			  enum nft_limit_type type)
 {
-	u32 flags = limit->invert ? NFT_LIMIT_F_INV : 0;
-	u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC);
+	u32 flags = priv->invert ? NFT_LIMIT_F_INV : 0;
+	u64 secs = div_u64(priv->nsecs, NSEC_PER_SEC);
 
-	if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(limit->rate),
+	if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate),
 			 NFTA_LIMIT_PAD) ||
 	    nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs),
 			 NFTA_LIMIT_PAD) ||
-	    nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) ||
+	    nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(priv->burst)) ||
 	    nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)) ||
 	    nla_put_be32(skb, NFTA_LIMIT_FLAGS, htonl(flags)))
 		goto nla_put_failure;
@@ -121,8 +121,8 @@ static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit,
 	return -1;
 }
 
-struct nft_limit_pkts {
-	struct nft_limit	limit;
+struct nft_limit_priv_pkts {
+	struct nft_limit_priv	limit;
 	u64			cost;
 };
 
@@ -130,7 +130,7 @@ static void nft_limit_pkts_eval(const struct nft_expr *expr,
 				struct nft_regs *regs,
 				const struct nft_pktinfo *pkt)
 {
-	struct nft_limit_pkts *priv = nft_expr_priv(expr);
+	struct nft_limit_priv_pkts *priv = nft_expr_priv(expr);
 
 	if (nft_limit_eval(&priv->limit, priv->cost))
 		regs->verdict.code = NFT_BREAK;
@@ -148,7 +148,7 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx,
 			       const struct nft_expr *expr,
 			       const struct nlattr * const tb[])
 {
-	struct nft_limit_pkts *priv = nft_expr_priv(expr);
+	struct nft_limit_priv_pkts *priv = nft_expr_priv(expr);
 	int err;
 
 	err = nft_limit_init(&priv->limit, tb, true);
@@ -161,7 +161,7 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx,
 
 static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
-	const struct nft_limit_pkts *priv = nft_expr_priv(expr);
+	const struct nft_limit_priv_pkts *priv = nft_expr_priv(expr);
 
 	return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
 }
@@ -169,7 +169,7 @@ static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr)
 static struct nft_expr_type nft_limit_type;
 static const struct nft_expr_ops nft_limit_pkts_ops = {
 	.type		= &nft_limit_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)),
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)),
 	.eval		= nft_limit_pkts_eval,
 	.init		= nft_limit_pkts_init,
 	.dump		= nft_limit_pkts_dump,
@@ -179,7 +179,7 @@ static void nft_limit_bytes_eval(const struct nft_expr *expr,
 				 struct nft_regs *regs,
 				 const struct nft_pktinfo *pkt)
 {
-	struct nft_limit *priv = nft_expr_priv(expr);
+	struct nft_limit_priv *priv = nft_expr_priv(expr);
 	u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
 
 	if (nft_limit_eval(priv, cost))
@@ -190,7 +190,7 @@ static int nft_limit_bytes_init(const struct nft_ctx *ctx,
 				const struct nft_expr *expr,
 				const struct nlattr * const tb[])
 {
-	struct nft_limit *priv = nft_expr_priv(expr);
+	struct nft_limit_priv *priv = nft_expr_priv(expr);
 
 	return nft_limit_init(priv, tb, false);
 }
@@ -198,14 +198,14 @@ static int nft_limit_bytes_init(const struct nft_ctx *ctx,
 static int nft_limit_bytes_dump(struct sk_buff *skb,
 				const struct nft_expr *expr)
 {
-	const struct nft_limit *priv = nft_expr_priv(expr);
+	const struct nft_limit_priv *priv = nft_expr_priv(expr);
 
 	return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
 }
 
 static const struct nft_expr_ops nft_limit_bytes_ops = {
 	.type		= &nft_limit_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit)),
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit_priv)),
 	.eval		= nft_limit_bytes_eval,
 	.init		= nft_limit_bytes_init,
 	.dump		= nft_limit_bytes_dump,
@@ -240,7 +240,7 @@ static void nft_limit_obj_pkts_eval(struct nft_object *obj,
 				    struct nft_regs *regs,
 				    const struct nft_pktinfo *pkt)
 {
-	struct nft_limit_pkts *priv = nft_obj_data(obj);
+	struct nft_limit_priv_pkts *priv = nft_obj_data(obj);
 
 	if (nft_limit_eval(&priv->limit, priv->cost))
 		regs->verdict.code = NFT_BREAK;
@@ -250,7 +250,7 @@ static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx,
 				   const struct nlattr * const tb[],
 				   struct nft_object *obj)
 {
-	struct nft_limit_pkts *priv = nft_obj_data(obj);
+	struct nft_limit_priv_pkts *priv = nft_obj_data(obj);
 	int err;
 
 	err = nft_limit_init(&priv->limit, tb, true);
@@ -265,7 +265,7 @@ static int nft_limit_obj_pkts_dump(struct sk_buff *skb,
 				   struct nft_object *obj,
 				   bool reset)
 {
-	const struct nft_limit_pkts *priv = nft_obj_data(obj);
+	const struct nft_limit_priv_pkts *priv = nft_obj_data(obj);
 
 	return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
 }
@@ -273,7 +273,7 @@ static int nft_limit_obj_pkts_dump(struct sk_buff *skb,
 static struct nft_object_type nft_limit_obj_type;
 static const struct nft_object_ops nft_limit_obj_pkts_ops = {
 	.type		= &nft_limit_obj_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)),
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)),
 	.init		= nft_limit_obj_pkts_init,
 	.eval		= nft_limit_obj_pkts_eval,
 	.dump		= nft_limit_obj_pkts_dump,
@@ -283,7 +283,7 @@ static void nft_limit_obj_bytes_eval(struct nft_object *obj,
 				     struct nft_regs *regs,
 				     const struct nft_pktinfo *pkt)
 {
-	struct nft_limit *priv = nft_obj_data(obj);
+	struct nft_limit_priv *priv = nft_obj_data(obj);
 	u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
 
 	if (nft_limit_eval(priv, cost))
@@ -294,7 +294,7 @@ static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx,
 				    const struct nlattr * const tb[],
 				    struct nft_object *obj)
 {
-	struct nft_limit *priv = nft_obj_data(obj);
+	struct nft_limit_priv *priv = nft_obj_data(obj);
 
 	return nft_limit_init(priv, tb, false);
 }
@@ -303,7 +303,7 @@ static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
 				    struct nft_object *obj,
 				    bool reset)
 {
-	const struct nft_limit *priv = nft_obj_data(obj);
+	const struct nft_limit_priv *priv = nft_obj_data(obj);
 
 	return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
 }
@@ -311,7 +311,7 @@ static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
 static struct nft_object_type nft_limit_obj_type;
 static const struct nft_object_ops nft_limit_obj_bytes_ops = {
 	.type		= &nft_limit_obj_type,
-	.size		= sizeof(struct nft_limit),
+	.size		= sizeof(struct nft_limit_priv),
 	.init		= nft_limit_obj_bytes_init,
 	.eval		= nft_limit_obj_bytes_eval,
 	.dump		= nft_limit_obj_bytes_dump,
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH 06/14] netfilter: nft_limit: move stateful fields out of expression data
  2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
                   ` (4 preceding siblings ...)
  2022-01-09 16:11 ` [PATCH 05/14] netfilter: nft_limit: rename stateful structure Pablo Neira Ayuso
@ 2022-01-09 16:11 ` Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 07/14] netfilter: nf_tables: add rule blob layout Pablo Neira Ayuso
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
In preparation for the rule blob representation.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_limit.c | 94 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 82 insertions(+), 12 deletions(-)
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index d6e0226b7603..f04be5be73a0 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -14,10 +14,14 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 
-struct nft_limit_priv {
+struct nft_limit {
 	spinlock_t	lock;
 	u64		last;
 	u64		tokens;
+};
+
+struct nft_limit_priv {
+	struct nft_limit *limit;
 	u64		tokens_max;
 	u64		rate;
 	u64		nsecs;
@@ -30,21 +34,21 @@ static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost)
 	u64 now, tokens;
 	s64 delta;
 
-	spin_lock_bh(&priv->lock);
+	spin_lock_bh(&priv->limit->lock);
 	now = ktime_get_ns();
-	tokens = priv->tokens + now - priv->last;
+	tokens = priv->limit->tokens + now - priv->limit->last;
 	if (tokens > priv->tokens_max)
 		tokens = priv->tokens_max;
 
-	priv->last = now;
+	priv->limit->last = now;
 	delta = tokens - cost;
 	if (delta >= 0) {
-		priv->tokens = delta;
-		spin_unlock_bh(&priv->lock);
+		priv->limit->tokens = delta;
+		spin_unlock_bh(&priv->limit->lock);
 		return priv->invert;
 	}
-	priv->tokens = tokens;
-	spin_unlock_bh(&priv->lock);
+	priv->limit->tokens = tokens;
+	spin_unlock_bh(&priv->limit->lock);
 	return !priv->invert;
 }
 
@@ -86,8 +90,12 @@ static int nft_limit_init(struct nft_limit_priv *priv,
 				 priv->rate);
 	}
 
-	priv->tokens = tokens;
-	priv->tokens_max = priv->tokens;
+	priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL);
+	if (!priv->limit)
+		return -ENOMEM;
+
+	priv->limit->tokens = tokens;
+	priv->tokens_max = priv->limit->tokens;
 
 	if (tb[NFTA_LIMIT_FLAGS]) {
 		u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS]));
@@ -95,8 +103,8 @@ static int nft_limit_init(struct nft_limit_priv *priv,
 		if (flags & NFT_LIMIT_F_INV)
 			priv->invert = true;
 	}
-	priv->last = ktime_get_ns();
-	spin_lock_init(&priv->lock);
+	priv->limit->last = ktime_get_ns();
+	spin_lock_init(&priv->limit->lock);
 
 	return 0;
 }
@@ -121,6 +129,32 @@ static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit_priv *priv
 	return -1;
 }
 
+static void nft_limit_destroy(const struct nft_ctx *ctx,
+			      const struct nft_limit_priv *priv)
+{
+	kfree(priv->limit);
+}
+
+static int nft_limit_clone(struct nft_limit_priv *priv_dst,
+			   const struct nft_limit_priv *priv_src)
+{
+	priv_dst->tokens_max = priv_src->tokens_max;
+	priv_dst->rate = priv_src->rate;
+	priv_dst->nsecs = priv_src->nsecs;
+	priv_dst->burst = priv_src->burst;
+	priv_dst->invert = priv_src->invert;
+
+	priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
+	if (priv_dst->limit)
+		return -ENOMEM;
+
+	spin_lock_init(&priv_dst->limit->lock);
+	priv_dst->limit->tokens = priv_src->tokens_max;
+	priv_dst->limit->last = ktime_get_ns();
+
+	return 0;
+}
+
 struct nft_limit_priv_pkts {
 	struct nft_limit_priv	limit;
 	u64			cost;
@@ -166,12 +200,30 @@ static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr)
 	return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
 }
 
+static void nft_limit_pkts_destroy(const struct nft_ctx *ctx,
+				   const struct nft_expr *expr)
+{
+	const struct nft_limit_priv_pkts *priv = nft_expr_priv(expr);
+
+	nft_limit_destroy(ctx, &priv->limit);
+}
+
+static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+	struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst);
+	struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src);
+
+	return nft_limit_clone(&priv_dst->limit, &priv_src->limit);
+}
+
 static struct nft_expr_type nft_limit_type;
 static const struct nft_expr_ops nft_limit_pkts_ops = {
 	.type		= &nft_limit_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)),
 	.eval		= nft_limit_pkts_eval,
 	.init		= nft_limit_pkts_init,
+	.destroy	= nft_limit_pkts_destroy,
+	.clone		= nft_limit_pkts_clone,
 	.dump		= nft_limit_pkts_dump,
 };
 
@@ -203,12 +255,30 @@ static int nft_limit_bytes_dump(struct sk_buff *skb,
 	return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
 }
 
+static void nft_limit_bytes_destroy(const struct nft_ctx *ctx,
+				    const struct nft_expr *expr)
+{
+	const struct nft_limit_priv *priv = nft_expr_priv(expr);
+
+	nft_limit_destroy(ctx, priv);
+}
+
+static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+	struct nft_limit_priv *priv_dst = nft_expr_priv(dst);
+	struct nft_limit_priv *priv_src = nft_expr_priv(src);
+
+	return nft_limit_clone(priv_dst, priv_src);
+}
+
 static const struct nft_expr_ops nft_limit_bytes_ops = {
 	.type		= &nft_limit_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit_priv)),
 	.eval		= nft_limit_bytes_eval,
 	.init		= nft_limit_bytes_init,
 	.dump		= nft_limit_bytes_dump,
+	.clone		= nft_limit_bytes_clone,
+	.destroy	= nft_limit_bytes_destroy,
 };
 
 static const struct nft_expr_ops *
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH 07/14] netfilter: nf_tables: add rule blob layout
  2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
                   ` (5 preceding siblings ...)
  2022-01-09 16:11 ` [PATCH 06/14] netfilter: nft_limit: move stateful fields out of expression data Pablo Neira Ayuso
@ 2022-01-09 16:11 ` Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 08/14] netfilter: nf_tables: add NFT_REG32_NUM Pablo Neira Ayuso
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
This patch adds a blob layout per chain to represent the ruleset in the
packet datapath.
	size (unsigned long)
	struct nft_rule_dp
	  struct nft_expr
	  ...
        struct nft_rule_dp
          struct nft_expr
          ...
        struct nft_rule_dp (is_last=1)
The new structure nft_rule_dp represents the rule in a more compact way
(smaller memory footprint) compared to the control-plane nft_rule
structure.
The ruleset blob is a read-only data structure. The first field contains
the blob size, then the rules containing expressions. There is a trailing
rule which is used by the tracing infrastructure which is equivalent to
the NULL rule marker in the previous representation. The blob size field
does not include the size of this trailing rule marker.
The ruleset blob is generated from the commit path.
This patch reuses the infrastructure available since 0cbc06b3faba
("netfilter: nf_tables: remove synchronize_rcu in commit phase") to
build the array of rules per chain.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  22 ++++-
 net/netfilter/nf_tables_api.c     | 149 ++++++++++++++++++++----------
 net/netfilter/nf_tables_core.c    |  41 +++++---
 net/netfilter/nf_tables_trace.c   |   2 +-
 4 files changed, 147 insertions(+), 67 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index a0d9e0b47ab8..5a046b01bdab 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -974,6 +974,20 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext,
 
 #define NFT_CHAIN_POLICY_UNSET		U8_MAX
 
+struct nft_rule_dp {
+	u64				is_last:1,
+					dlen:12,
+					handle:42;	/* for tracing */
+	unsigned char			data[]
+		__attribute__((aligned(__alignof__(struct nft_expr))));
+};
+
+struct nft_rule_blob {
+	unsigned long			size;
+	unsigned char			data[]
+		__attribute__((aligned(__alignof__(struct nft_rule_dp))));
+};
+
 /**
  *	struct nft_chain - nf_tables chain
  *
@@ -987,8 +1001,8 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext,
  *	@name: name of the chain
  */
 struct nft_chain {
-	struct nft_rule			*__rcu *rules_gen_0;
-	struct nft_rule			*__rcu *rules_gen_1;
+	struct nft_rule_blob		__rcu *blob_gen_0;
+	struct nft_rule_blob		__rcu *blob_gen_1;
 	struct list_head		rules;
 	struct list_head		list;
 	struct rhlist_head		rhlhead;
@@ -1003,7 +1017,7 @@ struct nft_chain {
 	u8				*udata;
 
 	/* Only used during control plane commit phase: */
-	struct nft_rule			**rules_next;
+	struct nft_rule_blob		*blob_next;
 };
 
 int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
@@ -1321,7 +1335,7 @@ struct nft_traceinfo {
 	const struct nft_pktinfo	*pkt;
 	const struct nft_base_chain	*basechain;
 	const struct nft_chain		*chain;
-	const struct nft_rule		*rule;
+	const struct nft_rule_dp	*rule;
 	const struct nft_verdict	*verdict;
 	enum nft_trace_types		type;
 	bool				packet_dumped;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c20772822637..af1128fe79e0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1747,16 +1747,16 @@ static void nft_chain_stats_replace(struct nft_trans *trans)
 
 static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
 {
-	struct nft_rule **g0 = rcu_dereference_raw(chain->rules_gen_0);
-	struct nft_rule **g1 = rcu_dereference_raw(chain->rules_gen_1);
+	struct nft_rule_blob *g0 = rcu_dereference_raw(chain->blob_gen_0);
+	struct nft_rule_blob *g1 = rcu_dereference_raw(chain->blob_gen_1);
 
 	if (g0 != g1)
 		kvfree(g1);
 	kvfree(g0);
 
 	/* should be NULL either via abort or via successful commit */
-	WARN_ON_ONCE(chain->rules_next);
-	kvfree(chain->rules_next);
+	WARN_ON_ONCE(chain->blob_next);
+	kvfree(chain->blob_next);
 }
 
 void nf_tables_chain_destroy(struct nft_ctx *ctx)
@@ -2002,23 +2002,39 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
 
 struct nft_rules_old {
 	struct rcu_head h;
-	struct nft_rule **start;
+	struct nft_rule_blob *blob;
 };
 
-static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *chain,
-						     unsigned int alloc)
+static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr)
 {
-	if (alloc > INT_MAX)
+	struct nft_rule_dp *prule;
+
+	prule = (struct nft_rule_dp *)ptr;
+	prule->is_last = 1;
+	ptr += offsetof(struct nft_rule_dp, data);
+	/* blob size does not include the trailer rule */
+}
+
+static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size)
+{
+	struct nft_rule_blob *blob;
+
+	/* size must include room for the last rule */
+	if (size < offsetof(struct nft_rule_dp, data))
+		return NULL;
+
+	size += sizeof(struct nft_rule_blob) + sizeof(struct nft_rules_old);
+	if (size > INT_MAX)
 		return NULL;
 
-	alloc += 1;	/* NULL, ends rules */
-	if (sizeof(struct nft_rule *) > INT_MAX / alloc)
+	blob = kvmalloc(size, GFP_KERNEL);
+	if (!blob)
 		return NULL;
 
-	alloc *= sizeof(struct nft_rule *);
-	alloc += sizeof(struct nft_rules_old);
+	blob->size = 0;
+	nft_last_rule(blob, blob->data);
 
-	return kvmalloc(alloc, GFP_KERNEL);
+	return blob;
 }
 
 static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family,
@@ -2091,9 +2107,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 	struct nft_stats __percpu *stats;
 	struct net *net = ctx->net;
 	char name[NFT_NAME_MAXLEN];
+	struct nft_rule_blob *blob;
 	struct nft_trans *trans;
 	struct nft_chain *chain;
-	struct nft_rule **rules;
+	unsigned int data_size;
 	int err;
 
 	if (table->use == UINT_MAX)
@@ -2178,15 +2195,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 		chain->udlen = nla_len(nla[NFTA_CHAIN_USERDATA]);
 	}
 
-	rules = nf_tables_chain_alloc_rules(chain, 0);
-	if (!rules) {
+	data_size = offsetof(struct nft_rule_dp, data);	/* last rule */
+	blob = nf_tables_chain_alloc_rules(data_size);
+	if (!blob) {
 		err = -ENOMEM;
 		goto err_destroy_chain;
 	}
 
-	*rules = NULL;
-	rcu_assign_pointer(chain->rules_gen_0, rules);
-	rcu_assign_pointer(chain->rules_gen_1, rules);
+	RCU_INIT_POINTER(chain->blob_gen_0, blob);
+	RCU_INIT_POINTER(chain->blob_gen_1, blob);
 
 	err = nf_tables_register_hook(net, table, chain);
 	if (err < 0)
@@ -8241,32 +8258,72 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
 
 static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
 {
+	const struct nft_expr *expr, *last;
+	unsigned int size, data_size;
+	void *data, *data_boundary;
+	struct nft_rule_dp *prule;
 	struct nft_rule *rule;
-	unsigned int alloc = 0;
 	int i;
 
 	/* already handled or inactive chain? */
-	if (chain->rules_next || !nft_is_active_next(net, chain))
+	if (chain->blob_next || !nft_is_active_next(net, chain))
 		return 0;
 
 	rule = list_entry(&chain->rules, struct nft_rule, list);
 	i = 0;
 
 	list_for_each_entry_continue(rule, &chain->rules, list) {
-		if (nft_is_active_next(net, rule))
-			alloc++;
+		if (nft_is_active_next(net, rule)) {
+			data_size += sizeof(*prule) + rule->dlen;
+			if (data_size > INT_MAX)
+				return -ENOMEM;
+		}
 	}
+	data_size += offsetof(struct nft_rule_dp, data);	/* last rule */
 
-	chain->rules_next = nf_tables_chain_alloc_rules(chain, alloc);
-	if (!chain->rules_next)
+	chain->blob_next = nf_tables_chain_alloc_rules(data_size);
+	if (!chain->blob_next)
 		return -ENOMEM;
 
+	data = (void *)chain->blob_next->data;
+	data_boundary = data + data_size;
+	size = 0;
+
 	list_for_each_entry_continue(rule, &chain->rules, list) {
-		if (nft_is_active_next(net, rule))
-			chain->rules_next[i++] = rule;
+		if (!nft_is_active_next(net, rule))
+			continue;
+
+		prule = (struct nft_rule_dp *)data;
+		data += offsetof(struct nft_rule_dp, data);
+		if (WARN_ON_ONCE(data > data_boundary))
+			return -ENOMEM;
+
+		nft_rule_for_each_expr(expr, last, rule) {
+			if (WARN_ON_ONCE(data + expr->ops->size > data_boundary))
+				return -ENOMEM;
+
+			memcpy(data + size, expr, expr->ops->size);
+			size += expr->ops->size;
+		}
+		if (WARN_ON_ONCE(size >= 1 << 12))
+			return -ENOMEM;
+
+		prule->handle = rule->handle;
+		prule->dlen = size;
+		prule->is_last = 0;
+
+		data += size;
+		size = 0;
+		chain->blob_next->size += (unsigned long)(data - (void *)prule);
 	}
 
-	chain->rules_next[i] = NULL;
+	prule = (struct nft_rule_dp *)data;
+	data += offsetof(struct nft_rule_dp, data);
+	if (WARN_ON_ONCE(data > data_boundary))
+		return -ENOMEM;
+
+	nft_last_rule(chain->blob_next, prule);
+
 	return 0;
 }
 
@@ -8280,8 +8337,8 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net)
 
 		if (trans->msg_type == NFT_MSG_NEWRULE ||
 		    trans->msg_type == NFT_MSG_DELRULE) {
-			kvfree(chain->rules_next);
-			chain->rules_next = NULL;
+			kvfree(chain->blob_next);
+			chain->blob_next = NULL;
 		}
 	}
 }
@@ -8290,38 +8347,34 @@ static void __nf_tables_commit_chain_free_rules_old(struct rcu_head *h)
 {
 	struct nft_rules_old *o = container_of(h, struct nft_rules_old, h);
 
-	kvfree(o->start);
+	kvfree(o->blob);
 }
 
-static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules)
+static void nf_tables_commit_chain_free_rules_old(struct nft_rule_blob *blob)
 {
-	struct nft_rule **r = rules;
 	struct nft_rules_old *old;
 
-	while (*r)
-		r++;
-
-	r++;	/* rcu_head is after end marker */
-	old = (void *) r;
-	old->start = rules;
+	/* rcu_head is after end marker */
+	old = (void *)blob + sizeof(*blob) + blob->size;
+	old->blob = blob;
 
 	call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old);
 }
 
 static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
 {
-	struct nft_rule **g0, **g1;
+	struct nft_rule_blob *g0, *g1;
 	bool next_genbit;
 
 	next_genbit = nft_gencursor_next(net);
 
-	g0 = rcu_dereference_protected(chain->rules_gen_0,
+	g0 = rcu_dereference_protected(chain->blob_gen_0,
 				       lockdep_commit_lock_is_held(net));
-	g1 = rcu_dereference_protected(chain->rules_gen_1,
+	g1 = rcu_dereference_protected(chain->blob_gen_1,
 				       lockdep_commit_lock_is_held(net));
 
 	/* No changes to this chain? */
-	if (chain->rules_next == NULL) {
+	if (chain->blob_next == NULL) {
 		/* chain had no change in last or next generation */
 		if (g0 == g1)
 			return;
@@ -8330,10 +8383,10 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
 		 * one uses same rules as current generation.
 		 */
 		if (next_genbit) {
-			rcu_assign_pointer(chain->rules_gen_1, g0);
+			rcu_assign_pointer(chain->blob_gen_1, g0);
 			nf_tables_commit_chain_free_rules_old(g1);
 		} else {
-			rcu_assign_pointer(chain->rules_gen_0, g1);
+			rcu_assign_pointer(chain->blob_gen_0, g1);
 			nf_tables_commit_chain_free_rules_old(g0);
 		}
 
@@ -8341,11 +8394,11 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
 	}
 
 	if (next_genbit)
-		rcu_assign_pointer(chain->rules_gen_1, chain->rules_next);
+		rcu_assign_pointer(chain->blob_gen_1, chain->blob_next);
 	else
-		rcu_assign_pointer(chain->rules_gen_0, chain->rules_next);
+		rcu_assign_pointer(chain->blob_gen_0, chain->blob_next);
 
-	chain->rules_next = NULL;
+	chain->blob_next = NULL;
 
 	if (g0 == g1)
 		return;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 1fe4911e7e72..ad3a914d6835 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -38,7 +38,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info,
 
 static inline void nft_trace_packet(struct nft_traceinfo *info,
 				    const struct nft_chain *chain,
-				    const struct nft_rule *rule,
+				    const struct nft_rule_dp *rule,
 				    enum nft_trace_types type)
 {
 	if (static_branch_unlikely(&nft_trace_enabled)) {
@@ -88,7 +88,7 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
 
 static inline void nft_trace_verdict(struct nft_traceinfo *info,
 				     const struct nft_chain *chain,
-				     const struct nft_rule *rule,
+				     const struct nft_rule_dp *rule,
 				     const struct nft_regs *regs)
 {
 	if (static_branch_unlikely(&nft_trace_enabled)) {
@@ -153,8 +153,9 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain,
 }
 
 struct nft_jumpstack {
-	const struct nft_chain	*chain;
-	struct nft_rule	*const *rules;
+	const struct nft_chain *chain;
+	const struct nft_rule_dp *rule;
+	const struct nft_rule_dp *last_rule;
 };
 
 static void expr_call_ops_eval(const struct nft_expr *expr,
@@ -183,18 +184,28 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
 	expr->ops->eval(expr, regs, pkt);
 }
 
+#define nft_rule_expr_first(rule)	(struct nft_expr *)&rule->data[0]
+#define nft_rule_expr_next(expr)	((void *)expr) + expr->ops->size
+#define nft_rule_expr_last(rule)	(struct nft_expr *)&rule->data[rule->dlen]
+#define nft_rule_next(rule)		(void *)rule + sizeof(*rule) + rule->dlen
+
+#define nft_rule_dp_for_each_expr(expr, last, rule) \
+        for ((expr) = nft_rule_expr_first(rule), (last) = nft_rule_expr_last(rule); \
+             (expr) != (last); \
+             (expr) = nft_rule_expr_next(expr))
+
 unsigned int
 nft_do_chain(struct nft_pktinfo *pkt, void *priv)
 {
 	const struct nft_chain *chain = priv, *basechain = chain;
+	const struct nft_rule_dp *rule, *last_rule;
 	const struct net *net = nft_net(pkt);
-	struct nft_rule *const *rules;
-	const struct nft_rule *rule;
 	const struct nft_expr *expr, *last;
 	struct nft_regs regs;
 	unsigned int stackptr = 0;
 	struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
 	bool genbit = READ_ONCE(net->nft.gencursor);
+	struct nft_rule_blob *blob;
 	struct nft_traceinfo info;
 
 	info.trace = false;
@@ -202,16 +213,16 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
 		nft_trace_init(&info, pkt, ®s.verdict, basechain);
 do_chain:
 	if (genbit)
-		rules = rcu_dereference(chain->rules_gen_1);
+		blob = rcu_dereference(chain->blob_gen_1);
 	else
-		rules = rcu_dereference(chain->rules_gen_0);
+		blob = rcu_dereference(chain->blob_gen_0);
 
+	rule = (struct nft_rule_dp *)blob->data;
+	last_rule = (void *)blob->data + blob->size;
 next_rule:
-	rule = *rules;
 	regs.verdict.code = NFT_CONTINUE;
-	for (; *rules ; rules++) {
-		rule = *rules;
-		nft_rule_for_each_expr(expr, last, rule) {
+	for (; rule < last_rule; rule = nft_rule_next(rule)) {
+		nft_rule_dp_for_each_expr(expr, last, rule) {
 			if (expr->ops == &nft_cmp_fast_ops)
 				nft_cmp_fast_eval(expr, ®s);
 			else if (expr->ops == &nft_bitwise_fast_ops)
@@ -251,7 +262,8 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
 		if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE))
 			return NF_DROP;
 		jumpstack[stackptr].chain = chain;
-		jumpstack[stackptr].rules = rules + 1;
+		jumpstack[stackptr].rule = nft_rule_next(rule);
+		jumpstack[stackptr].last_rule = last_rule;
 		stackptr++;
 		fallthrough;
 	case NFT_GOTO:
@@ -267,7 +279,8 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
 	if (stackptr > 0) {
 		stackptr--;
 		chain = jumpstack[stackptr].chain;
-		rules = jumpstack[stackptr].rules;
+		rule = jumpstack[stackptr].rule;
+		last_rule = jumpstack[stackptr].last_rule;
 		goto next_rule;
 	}
 
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 84a7dea46efa..5041725423c2 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -142,7 +142,7 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
 static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
 				   const struct nft_traceinfo *info)
 {
-	if (!info->rule)
+	if (!info->rule || info->rule->is_last)
 		return 0;
 
 	/* a continue verdict with ->type == RETURN means that this is
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH 08/14] netfilter: nf_tables: add NFT_REG32_NUM
  2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
                   ` (6 preceding siblings ...)
  2022-01-09 16:11 ` [PATCH 07/14] netfilter: nf_tables: add rule blob layout Pablo Neira Ayuso
@ 2022-01-09 16:11 ` Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 09/14] netfilter: nf_tables: add register tracking infrastructure Pablo Neira Ayuso
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
Add a definition including the maximum number of 32-bits registers that
are used a scratchpad memory area to store data.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 5a046b01bdab..515e5db97e01 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -105,6 +105,8 @@ struct nft_data {
 	};
 } __attribute__((aligned(__alignof__(u64))));
 
+#define NFT_REG32_NUM		20
+
 /**
  *	struct nft_regs - nf_tables register set
  *
@@ -115,7 +117,7 @@ struct nft_data {
  */
 struct nft_regs {
 	union {
-		u32			data[20];
+		u32			data[NFT_REG32_NUM];
 		struct nft_verdict	verdict;
 	};
 };
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH 09/14] netfilter: nf_tables: add register tracking infrastructure
  2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
                   ` (7 preceding siblings ...)
  2022-01-09 16:11 ` [PATCH 08/14] netfilter: nf_tables: add NFT_REG32_NUM Pablo Neira Ayuso
@ 2022-01-09 16:11 ` Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 10/14] netfilter: nft_payload: track register operations Pablo Neira Ayuso
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
This patch adds new infrastructure to skip redundant selector store
operations on the same register to achieve a performance boost from
the packet path.
This is particularly noticeable in pure linear rulesets but it also
helps in rulesets which are already heaving relying in maps to avoid
ruleset linear inspection.
The idea is to keep data of the most recurrent store operations on
register to reuse them with cmp and lookup expressions.
This infrastructure allows for dynamic ruleset updates since the ruleset
blob reduction happens from the kernel.
Userspace still needs to be updated to maximize register utilization to
cooperate to improve register data reuse / reduce number of store on
register operations.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 12 ++++++++++++
 net/netfilter/nf_tables_api.c     | 11 +++++++++++
 2 files changed, 23 insertions(+)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 515e5db97e01..1c37ce61daea 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -122,6 +122,16 @@ struct nft_regs {
 	};
 };
 
+struct nft_regs_track {
+	struct {
+		const struct nft_expr		*selector;
+		const struct nft_expr		*bitwise;
+	} regs[NFT_REG32_NUM];
+
+	const struct nft_expr			*cur;
+	const struct nft_expr			*last;
+};
+
 /* Store/load an u8, u16 or u64 integer to/from the u32 data register.
  *
  * Note, when using concatenations, register allocation happens at 32-bit
@@ -886,6 +896,8 @@ struct nft_expr_ops {
 	int				(*validate)(const struct nft_ctx *ctx,
 						    const struct nft_expr *expr,
 						    const struct nft_data **data);
+	bool				(*reduce)(struct nft_regs_track *track,
+						  const struct nft_expr *expr);
 	bool				(*gc)(struct net *net,
 					      const struct nft_expr *expr);
 	int				(*offload)(struct nft_offload_ctx *ctx,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index af1128fe79e0..eb12fc9b803d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8259,6 +8259,7 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
 static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
 {
 	const struct nft_expr *expr, *last;
+	struct nft_regs_track track = {};
 	unsigned int size, data_size;
 	void *data, *data_boundary;
 	struct nft_rule_dp *prule;
@@ -8298,7 +8299,17 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
 		if (WARN_ON_ONCE(data > data_boundary))
 			return -ENOMEM;
 
+		size = 0;
+		track.last = last;
 		nft_rule_for_each_expr(expr, last, rule) {
+			track.cur = expr;
+
+			if (expr->ops->reduce &&
+			    expr->ops->reduce(&track, expr)) {
+				expr = track.cur;
+				continue;
+			}
+
 			if (WARN_ON_ONCE(data + expr->ops->size > data_boundary))
 				return -ENOMEM;
 
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH 10/14] netfilter: nft_payload: track register operations
  2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
                   ` (8 preceding siblings ...)
  2022-01-09 16:11 ` [PATCH 09/14] netfilter: nf_tables: add register tracking infrastructure Pablo Neira Ayuso
@ 2022-01-09 16:11 ` Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 11/14] netfilter: nft_meta: " Pablo Neira Ayuso
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
Check if the destination register already contains the data that this
payload store expression performs. This allows to skip this redundant
operation. If the destination contains a different selector, update
the register tracking information.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_payload.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index b9d636c706f4..b228fea0f263 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -210,6 +210,34 @@ static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
 	return -1;
 }
 
+static bool nft_payload_reduce(struct nft_regs_track *track,
+			       const struct nft_expr *expr)
+{
+	const struct nft_payload *priv = nft_expr_priv(expr);
+	const struct nft_payload *payload;
+
+	if (!track->regs[priv->dreg].selector ||
+	    track->regs[priv->dreg].selector->ops != expr->ops) {
+		track->regs[priv->dreg].selector = expr;
+		track->regs[priv->dreg].bitwise = NULL;
+		return false;
+	}
+
+	payload = nft_expr_priv(track->regs[priv->dreg].selector);
+	if (priv->base != payload->base ||
+	    priv->offset != payload->offset ||
+	    priv->len != payload->len) {
+		track->regs[priv->dreg].selector = expr;
+		track->regs[priv->dreg].bitwise = NULL;
+		return false;
+	}
+
+	if (!track->regs[priv->dreg].bitwise)
+		return true;
+
+	return false;
+}
+
 static bool nft_payload_offload_mask(struct nft_offload_reg *reg,
 				     u32 priv_len, u32 field_len)
 {
@@ -513,6 +541,7 @@ static const struct nft_expr_ops nft_payload_ops = {
 	.eval		= nft_payload_eval,
 	.init		= nft_payload_init,
 	.dump		= nft_payload_dump,
+	.reduce		= nft_payload_reduce,
 	.offload	= nft_payload_offload,
 };
 
@@ -522,6 +551,7 @@ const struct nft_expr_ops nft_payload_fast_ops = {
 	.eval		= nft_payload_eval,
 	.init		= nft_payload_init,
 	.dump		= nft_payload_dump,
+	.reduce		= nft_payload_reduce,
 	.offload	= nft_payload_offload,
 };
 
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH 11/14] netfilter: nft_meta: track register operations
  2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
                   ` (9 preceding siblings ...)
  2022-01-09 16:11 ` [PATCH 10/14] netfilter: nft_payload: track register operations Pablo Neira Ayuso
@ 2022-01-09 16:11 ` Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 12/14] netfilter: nft_bitwise: " Pablo Neira Ayuso
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
Check if the destination register already contains the data that this
meta store expression performs. This allows to skip this redundant
operation. If the destination contains a different selector, update
the register tracking information.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_meta.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index fe91ff5f8fbe..430f40bc3cb4 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -750,12 +750,40 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx,
 	return 0;
 }
 
+static bool nft_meta_get_reduce(struct nft_regs_track *track,
+				const struct nft_expr *expr)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+	const struct nft_meta *meta;
+
+	if (!track->regs[priv->dreg].selector ||
+	    track->regs[priv->dreg].selector->ops != expr->ops) {
+		track->regs[priv->dreg].selector = expr;
+		track->regs[priv->dreg].bitwise = NULL;
+		return false;
+	}
+
+	meta = nft_expr_priv(track->regs[priv->dreg].selector);
+	if (priv->key != meta->key ||
+	    priv->dreg != meta->dreg) {
+		track->regs[priv->dreg].selector = expr;
+		track->regs[priv->dreg].bitwise = NULL;
+		return false;
+	}
+
+	if (!track->regs[priv->dreg].bitwise)
+		return true;
+
+	return false;
+}
+
 static const struct nft_expr_ops nft_meta_get_ops = {
 	.type		= &nft_meta_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
 	.eval		= nft_meta_get_eval,
 	.init		= nft_meta_get_init,
 	.dump		= nft_meta_get_dump,
+	.reduce		= nft_meta_get_reduce,
 	.validate	= nft_meta_get_validate,
 	.offload	= nft_meta_get_offload,
 };
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH 12/14] netfilter: nft_bitwise: track register operations
  2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
                   ` (10 preceding siblings ...)
  2022-01-09 16:11 ` [PATCH 11/14] netfilter: nft_meta: " Pablo Neira Ayuso
@ 2022-01-09 16:11 ` Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 13/14] netfilter: nft_payload: cancel register tracking after payload update Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 14/14] netfilter: nft_meta: cancel register tracking after meta update Pablo Neira Ayuso
  13 siblings, 0 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
Check if the destination register already contains the data that this
bitwise expression performs. This allows to skip this redundant
operation.
If the destination contains a different bitwise operation, cancel the
register tracking information. If the destination contains no bitwise
operation, update the register tracking information.
Update the payload and meta expression to check if this bitwise
operation has been already performed on the register. Hence, both the
payload/meta and the bitwise expressions are reduced.
There is also a special case: If source register != destination register
and source register is not updated by a previous bitwise operation, then
transfer selector from the source register to the destination register.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  2 +
 net/netfilter/nft_bitwise.c       | 95 +++++++++++++++++++++++++++++++
 net/netfilter/nft_meta.c          |  2 +-
 net/netfilter/nft_payload.c       |  2 +-
 4 files changed, 99 insertions(+), 2 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1c37ce61daea..eaf55da9a205 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -358,6 +358,8 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src);
 void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
 int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
 		  const struct nft_expr *expr);
+bool nft_expr_reduce_bitwise(struct nft_regs_track *track,
+			     const struct nft_expr *expr);
 
 struct nft_set_ext;
 
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 47b0dba95054..7b727d3ebf9d 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -278,12 +278,52 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx,
 	return 0;
 }
 
+static bool nft_bitwise_reduce(struct nft_regs_track *track,
+			       const struct nft_expr *expr)
+{
+	const struct nft_bitwise *priv = nft_expr_priv(expr);
+	const struct nft_bitwise *bitwise;
+
+	if (!track->regs[priv->sreg].selector)
+		return false;
+
+	bitwise = nft_expr_priv(expr);
+	if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector &&
+	    track->regs[priv->dreg].bitwise &&
+	    track->regs[priv->dreg].bitwise->ops == expr->ops &&
+	    priv->sreg == bitwise->sreg &&
+	    priv->dreg == bitwise->dreg &&
+	    priv->op == bitwise->op &&
+	    priv->len == bitwise->len &&
+	    !memcmp(&priv->mask, &bitwise->mask, sizeof(priv->mask)) &&
+	    !memcmp(&priv->xor, &bitwise->xor, sizeof(priv->xor)) &&
+	    !memcmp(&priv->data, &bitwise->data, sizeof(priv->data))) {
+		track->cur = expr;
+		return true;
+	}
+
+	if (track->regs[priv->sreg].bitwise) {
+		track->regs[priv->dreg].selector = NULL;
+		track->regs[priv->dreg].bitwise = NULL;
+		return false;
+	}
+
+	if (priv->sreg != priv->dreg) {
+		track->regs[priv->dreg].selector =
+			track->regs[priv->sreg].selector;
+	}
+	track->regs[priv->dreg].bitwise = expr;
+
+	return false;
+}
+
 static const struct nft_expr_ops nft_bitwise_ops = {
 	.type		= &nft_bitwise_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
 	.eval		= nft_bitwise_eval,
 	.init		= nft_bitwise_init,
 	.dump		= nft_bitwise_dump,
+	.reduce		= nft_bitwise_reduce,
 	.offload	= nft_bitwise_offload,
 };
 
@@ -385,12 +425,49 @@ static int nft_bitwise_fast_offload(struct nft_offload_ctx *ctx,
 	return 0;
 }
 
+static bool nft_bitwise_fast_reduce(struct nft_regs_track *track,
+				    const struct nft_expr *expr)
+{
+	const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr);
+	const struct nft_bitwise_fast_expr *bitwise;
+
+	if (!track->regs[priv->sreg].selector)
+		return false;
+
+	bitwise = nft_expr_priv(expr);
+	if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector &&
+	    track->regs[priv->dreg].bitwise &&
+	    track->regs[priv->dreg].bitwise->ops == expr->ops &&
+	    priv->sreg == bitwise->sreg &&
+	    priv->dreg == bitwise->dreg &&
+	    priv->mask == bitwise->mask &&
+	    priv->xor == bitwise->xor) {
+		track->cur = expr;
+		return true;
+	}
+
+	if (track->regs[priv->sreg].bitwise) {
+		track->regs[priv->dreg].selector = NULL;
+		track->regs[priv->dreg].bitwise = NULL;
+		return false;
+	}
+
+	if (priv->sreg != priv->dreg) {
+		track->regs[priv->dreg].selector =
+			track->regs[priv->sreg].selector;
+	}
+	track->regs[priv->dreg].bitwise = expr;
+
+	return false;
+}
+
 const struct nft_expr_ops nft_bitwise_fast_ops = {
 	.type		= &nft_bitwise_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_bitwise_fast_expr)),
 	.eval		= NULL, /* inlined */
 	.init		= nft_bitwise_fast_init,
 	.dump		= nft_bitwise_fast_dump,
+	.reduce		= nft_bitwise_fast_reduce,
 	.offload	= nft_bitwise_fast_offload,
 };
 
@@ -427,3 +504,21 @@ struct nft_expr_type nft_bitwise_type __read_mostly = {
 	.maxattr	= NFTA_BITWISE_MAX,
 	.owner		= THIS_MODULE,
 };
+
+bool nft_expr_reduce_bitwise(struct nft_regs_track *track,
+			     const struct nft_expr *expr)
+{
+	const struct nft_expr *last = track->last;
+	const struct nft_expr *next;
+
+	if (expr == last)
+		return false;
+
+	next = nft_expr_next(expr);
+	if (next->ops == &nft_bitwise_ops)
+		return nft_bitwise_reduce(track, next);
+	else if (next->ops == &nft_bitwise_fast_ops)
+		return nft_bitwise_fast_reduce(track, next);
+
+	return false;
+}
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 430f40bc3cb4..40fe48fcf9d0 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -774,7 +774,7 @@ static bool nft_meta_get_reduce(struct nft_regs_track *track,
 	if (!track->regs[priv->dreg].bitwise)
 		return true;
 
-	return false;
+	return nft_expr_reduce_bitwise(track, expr);
 }
 
 static const struct nft_expr_ops nft_meta_get_ops = {
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index b228fea0f263..b5a3c45727b3 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -235,7 +235,7 @@ static bool nft_payload_reduce(struct nft_regs_track *track,
 	if (!track->regs[priv->dreg].bitwise)
 		return true;
 
-	return false;
+	return nft_expr_reduce_bitwise(track, expr);
 }
 
 static bool nft_payload_offload_mask(struct nft_offload_reg *reg,
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH 13/14] netfilter: nft_payload: cancel register tracking after payload update
  2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
                   ` (11 preceding siblings ...)
  2022-01-09 16:11 ` [PATCH 12/14] netfilter: nft_bitwise: " Pablo Neira Ayuso
@ 2022-01-09 16:11 ` Pablo Neira Ayuso
  2022-01-09 16:11 ` [PATCH 14/14] netfilter: nft_meta: cancel register tracking after meta update Pablo Neira Ayuso
  13 siblings, 0 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
The payload expression might mangle the packet, cancel register tracking
since any payload data in the registers is stale.
Finer grain register tracking cancellation by inspecting the payload
base, offset and length on the register is also possible.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_payload.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index b5a3c45727b3..940fed9a760b 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -801,12 +801,33 @@ static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr
 	return -1;
 }
 
+static bool nft_payload_set_reduce(struct nft_regs_track *track,
+				   const struct nft_expr *expr)
+{
+	int i;
+
+	for (i = 0; i < NFT_REG32_NUM; i++) {
+		if (!track->regs[i].selector)
+			continue;
+
+		if (track->regs[i].selector->ops != &nft_payload_ops &&
+		    track->regs[i].selector->ops != &nft_payload_fast_ops)
+			continue;
+
+		track->regs[i].selector = NULL;
+		track->regs[i].bitwise = NULL;
+	}
+
+	return false;
+}
+
 static const struct nft_expr_ops nft_payload_set_ops = {
 	.type		= &nft_payload_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_payload_set)),
 	.eval		= nft_payload_set_eval,
 	.init		= nft_payload_set_init,
 	.dump		= nft_payload_set_dump,
+	.reduce		= nft_payload_set_reduce,
 };
 
 static const struct nft_expr_ops *
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH 14/14] netfilter: nft_meta: cancel register tracking after meta update
  2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
                   ` (12 preceding siblings ...)
  2022-01-09 16:11 ` [PATCH 13/14] netfilter: nft_payload: cancel register tracking after payload update Pablo Neira Ayuso
@ 2022-01-09 16:11 ` Pablo Neira Ayuso
  13 siblings, 0 replies; 15+ messages in thread
From: Pablo Neira Ayuso @ 2022-01-09 16:11 UTC (permalink / raw)
  To: netfilter-devel
The meta expression might mangle the packet metadata, cancel register
tracking since any metadata in the registers is stale.
Finer grain register tracking cancellation by inspecting the meta type
on the register is also possible.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/nft_meta_bridge.c | 20 ++++++++++++++++++++
 net/netfilter/nft_meta.c               | 20 ++++++++++++++++++++
 2 files changed, 40 insertions(+)
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index 97805ec424c1..c1ef9cc89b78 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -100,6 +100,25 @@ static const struct nft_expr_ops nft_meta_bridge_get_ops = {
 	.dump		= nft_meta_get_dump,
 };
 
+static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track,
+				       const struct nft_expr *expr)
+{
+	int i;
+
+	for (i = 0; i < NFT_REG32_NUM; i++) {
+		if (!track->regs[i].selector)
+			continue;
+
+		if (track->regs[i].selector->ops != &nft_meta_bridge_get_ops)
+			continue;
+
+		track->regs[i].selector = NULL;
+		track->regs[i].bitwise = NULL;
+	}
+
+	return false;
+}
+
 static const struct nft_expr_ops nft_meta_bridge_set_ops = {
 	.type		= &nft_meta_bridge_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
@@ -107,6 +126,7 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = {
 	.init		= nft_meta_set_init,
 	.destroy	= nft_meta_set_destroy,
 	.dump		= nft_meta_set_dump,
+	.reduce		= nft_meta_bridge_set_reduce,
 	.validate	= nft_meta_set_validate,
 };
 
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 40fe48fcf9d0..5ab4df56c945 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -788,6 +788,25 @@ static const struct nft_expr_ops nft_meta_get_ops = {
 	.offload	= nft_meta_get_offload,
 };
 
+static bool nft_meta_set_reduce(struct nft_regs_track *track,
+				const struct nft_expr *expr)
+{
+	int i;
+
+	for (i = 0; i < NFT_REG32_NUM; i++) {
+		if (!track->regs[i].selector)
+			continue;
+
+		if (track->regs[i].selector->ops != &nft_meta_get_ops)
+			continue;
+
+		track->regs[i].selector = NULL;
+		track->regs[i].bitwise = NULL;
+	}
+
+	return false;
+}
+
 static const struct nft_expr_ops nft_meta_set_ops = {
 	.type		= &nft_meta_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
@@ -795,6 +814,7 @@ static const struct nft_expr_ops nft_meta_set_ops = {
 	.init		= nft_meta_set_init,
 	.destroy	= nft_meta_set_destroy,
 	.dump		= nft_meta_set_dump,
+	.reduce		= nft_meta_set_reduce,
 	.validate	= nft_meta_set_validate,
 };
 
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 15+ messages in thread
end of thread, other threads:[~2022-01-09 16:11 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-01-09 16:11 [PATCH nf-next,v3 00/14] nf_tables datapath ruleset blob and register tracking Pablo Neira Ayuso
2022-01-09 16:11 ` [PATCH 01/14] netfilter: nft_connlimit: move stateful fields out of expression data Pablo Neira Ayuso
2022-01-09 16:11 ` [PATCH 02/14] netfilter: nft_last: " Pablo Neira Ayuso
2022-01-09 16:11 ` [PATCH 03/14] netfilter: nft_quota: " Pablo Neira Ayuso
2022-01-09 16:11 ` [PATCH 04/14] netfilter: nft_numgen: " Pablo Neira Ayuso
2022-01-09 16:11 ` [PATCH 05/14] netfilter: nft_limit: rename stateful structure Pablo Neira Ayuso
2022-01-09 16:11 ` [PATCH 06/14] netfilter: nft_limit: move stateful fields out of expression data Pablo Neira Ayuso
2022-01-09 16:11 ` [PATCH 07/14] netfilter: nf_tables: add rule blob layout Pablo Neira Ayuso
2022-01-09 16:11 ` [PATCH 08/14] netfilter: nf_tables: add NFT_REG32_NUM Pablo Neira Ayuso
2022-01-09 16:11 ` [PATCH 09/14] netfilter: nf_tables: add register tracking infrastructure Pablo Neira Ayuso
2022-01-09 16:11 ` [PATCH 10/14] netfilter: nft_payload: track register operations Pablo Neira Ayuso
2022-01-09 16:11 ` [PATCH 11/14] netfilter: nft_meta: " Pablo Neira Ayuso
2022-01-09 16:11 ` [PATCH 12/14] netfilter: nft_bitwise: " Pablo Neira Ayuso
2022-01-09 16:11 ` [PATCH 13/14] netfilter: nft_payload: cancel register tracking after payload update Pablo Neira Ayuso
2022-01-09 16:11 ` [PATCH 14/14] netfilter: nft_meta: cancel register tracking after meta update Pablo Neira Ayuso
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).