* [PATCH nf-next 1/3] netfilter: nft_set_pipapo: Store real pointer, adjust later.
2025-07-01 22:13 [PATCH nf-next 0/3] netfilter: nft_set_pipapo: Use nested-BH locking for nft_pipapo_scratch Sebastian Andrzej Siewior
@ 2025-07-01 22:13 ` Sebastian Andrzej Siewior
2025-07-01 22:13 ` [PATCH nf-next 2/3] netfilter: nft_set_pipapo_avx2: Drop the comment regarding protection Sebastian Andrzej Siewior
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Sebastian Andrzej Siewior @ 2025-07-01 22:13 UTC (permalink / raw)
To: netfilter-devel, coreteam, linux-rt-devel
Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Thomas Gleixner,
Sebastian Andrzej Siewior
The struct nft_pipapo_scratch is allocated, then aligned to the required
alignment and difference (in bytes) is then saved in align_off. The
aligned pointer is used later.
While this works, it gets complicated with all the extra checks if
all member before map are larger than the required alignment.
Instead of saving the aligned pointer, just save the returned pointer
and align the map pointer in nft_pipapo_lookup() before using it. The
alignment later on shouldn't be that expensive. With this change, the
align_off can be removed and the pointer can be passed to kfree() as is.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
net/netfilter/nft_set_pipapo.c | 40 ++++++-----------------------
net/netfilter/nft_set_pipapo.h | 5 ++--
net/netfilter/nft_set_pipapo_avx2.c | 8 +++---
3 files changed, 14 insertions(+), 39 deletions(-)
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index c5855069bdaba..8b71a4630aa86 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -411,8 +411,8 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
+ unsigned long *res_map, *fill_map, *map;
struct nft_pipapo_scratch *scratch;
- unsigned long *res_map, *fill_map;
u8 genmask = nft_genmask_cur(net);
const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
@@ -431,8 +431,9 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
map_index = scratch->map_index;
- res_map = scratch->map + (map_index ? m->bsize_max : 0);
- fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
+ map = NFT_PIPAPO_LT_ALIGN(&scratch->__map[0]);
+ res_map = map + (map_index ? m->bsize_max : 0);
+ fill_map = map + (map_index ? 0 : m->bsize_max);
pipapo_resmap_init(m, res_map);
@@ -1204,22 +1205,17 @@ static void pipapo_map(struct nft_pipapo_match *m,
}
/**
- * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address
+ * pipapo_free_scratch() - Free per-CPU map at original address
* @m: Matching data
* @cpu: CPU number
*/
static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu)
{
struct nft_pipapo_scratch *s;
- void *mem;
s = *per_cpu_ptr(m->scratch, cpu);
- if (!s)
- return;
- mem = s;
- mem -= s->align_off;
- kfree(mem);
+ kfree(s);
}
/**
@@ -1236,11 +1232,8 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
for_each_possible_cpu(i) {
struct nft_pipapo_scratch *scratch;
-#ifdef NFT_PIPAPO_ALIGN
- void *scratch_aligned;
- u32 align_off;
-#endif
- scratch = kzalloc_node(struct_size(scratch, map,
+
+ scratch = kzalloc_node(struct_size(scratch, __map,
bsize_max * 2) +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL_ACCOUNT, cpu_to_node(i));
@@ -1256,23 +1249,6 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
}
pipapo_free_scratch(clone, i);
-
-#ifdef NFT_PIPAPO_ALIGN
- /* Align &scratch->map (not the struct itself): the extra
- * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
- * above guarantee we can waste up to those bytes in order
- * to align the map field regardless of its offset within
- * the struct.
- */
- BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM);
-
- scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
- scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
- align_off = scratch_aligned - (void *)scratch;
-
- scratch = scratch_aligned;
- scratch->align_off = align_off;
-#endif
*per_cpu_ptr(clone->scratch, i) = scratch;
}
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 4a2ff85ce1c43..3655aa41fa949 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -126,12 +126,11 @@ struct nft_pipapo_field {
* struct nft_pipapo_scratch - percpu data used for lookup and matching
* @map_index: Current working bitmap index, toggled between field matches
* @align_off: Offset to get the originally allocated address
- * @map: store partial matching results during lookup
+ * @__map: store partial matching results during lookup
*/
struct nft_pipapo_scratch {
u8 map_index;
- u32 align_off;
- unsigned long map[];
+ unsigned long __map[];
};
/**
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index be7c16c79f711..83acfa0c62b91 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1154,8 +1154,8 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
u8 genmask = nft_genmask_cur(net);
const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
+ unsigned long *res, *fill, *map;
const u8 *rp = (const u8 *)key;
- unsigned long *res, *fill;
bool map_index;
int i, ret = 0;
@@ -1186,9 +1186,9 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
}
map_index = scratch->map_index;
-
- res = scratch->map + (map_index ? m->bsize_max : 0);
- fill = scratch->map + (map_index ? 0 : m->bsize_max);
+ map = NFT_PIPAPO_LT_ALIGN(&scratch->__map[0]);
+ res = map + (map_index ? m->bsize_max : 0);
+ fill = map + (map_index ? 0 : m->bsize_max);
pipapo_resmap_init_avx2(m, res);
--
2.50.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH nf-next 2/3] netfilter: nft_set_pipapo_avx2: Drop the comment regarding protection
2025-07-01 22:13 [PATCH nf-next 0/3] netfilter: nft_set_pipapo: Use nested-BH locking for nft_pipapo_scratch Sebastian Andrzej Siewior
2025-07-01 22:13 ` [PATCH nf-next 1/3] netfilter: nft_set_pipapo: Store real pointer, adjust later Sebastian Andrzej Siewior
@ 2025-07-01 22:13 ` Sebastian Andrzej Siewior
2025-07-01 22:13 ` [PATCH nf-next 3/3] netfilter: nft_set_pipapo: Use nested-BH locking for nft_pipapo_scratch Sebastian Andrzej Siewior
2025-07-24 22:06 ` [PATCH nf-next 0/3] " Florian Westphal
3 siblings, 0 replies; 6+ messages in thread
From: Sebastian Andrzej Siewior @ 2025-07-01 22:13 UTC (permalink / raw)
To: netfilter-devel, coreteam, linux-rt-devel
Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Thomas Gleixner,
Sebastian Andrzej Siewior
The comment claims that the kernel_fpu_begin_mask() below protects
access to the scratch map. This is not true because the access is only
protected by local_bh_disable() above.
Remove the misleading comment.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
net/netfilter/nft_set_pipapo_avx2.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 83acfa0c62b91..daf23a996e612 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1170,9 +1170,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
m = rcu_dereference(priv->match);
- /* This also protects access to all data related to scratch maps.
- *
- * Note that we don't need a valid MXCSR state for any of the
+ /* Note that we don't need a valid MXCSR state for any of the
* operations we use here, so pass 0 as mask and spare a LDMXCSR
* instruction.
*/
--
2.50.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH nf-next 3/3] netfilter: nft_set_pipapo: Use nested-BH locking for nft_pipapo_scratch
2025-07-01 22:13 [PATCH nf-next 0/3] netfilter: nft_set_pipapo: Use nested-BH locking for nft_pipapo_scratch Sebastian Andrzej Siewior
2025-07-01 22:13 ` [PATCH nf-next 1/3] netfilter: nft_set_pipapo: Store real pointer, adjust later Sebastian Andrzej Siewior
2025-07-01 22:13 ` [PATCH nf-next 2/3] netfilter: nft_set_pipapo_avx2: Drop the comment regarding protection Sebastian Andrzej Siewior
@ 2025-07-01 22:13 ` Sebastian Andrzej Siewior
2025-07-24 22:06 ` [PATCH nf-next 0/3] " Florian Westphal
3 siblings, 0 replies; 6+ messages in thread
From: Sebastian Andrzej Siewior @ 2025-07-01 22:13 UTC (permalink / raw)
To: netfilter-devel, coreteam, linux-rt-devel
Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Thomas Gleixner,
Sebastian Andrzej Siewior
nft_pipapo_scratch is a per-CPU variable and relies on disabled BH for
its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT
this data structure requires explicit locking.
Add a local_lock_t to the data structure and use local_lock_nested_bh() for
locking. This change adds only lockdep coverage and does not alter the
functional behaviour for !PREEMPT_RT.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
net/netfilter/nft_set_pipapo.c | 5 +++++
net/netfilter/nft_set_pipapo.h | 1 +
net/netfilter/nft_set_pipapo_avx2.c | 15 +++++++--------
3 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 8b71a4630aa86..35edaed4170db 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -428,6 +428,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
goto out;
scratch = *raw_cpu_ptr(m->scratch);
+ __local_lock_nested_bh(&scratch->bh_lock);
map_index = scratch->map_index;
@@ -464,6 +465,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
last);
if (b < 0) {
scratch->map_index = map_index;
+ __local_unlock_nested_bh(&scratch->bh_lock);
local_bh_enable();
return false;
@@ -481,6 +483,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
* *next* bitmap (not initial) for the next packet.
*/
scratch->map_index = map_index;
+ __local_unlock_nested_bh(&scratch->bh_lock);
local_bh_enable();
return true;
@@ -496,6 +499,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
rp += NFT_PIPAPO_GROUPS_PADDING(f);
}
+ __local_unlock_nested_bh(&scratch->bh_lock);
out:
local_bh_enable();
return false;
@@ -1249,6 +1253,7 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
}
pipapo_free_scratch(clone, i);
+ local_lock_init(&scratch->bh_lock);
*per_cpu_ptr(clone->scratch, i) = scratch;
}
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 3655aa41fa949..4d9addea854c4 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -129,6 +129,7 @@ struct nft_pipapo_field {
* @__map: store partial matching results during lookup
*/
struct nft_pipapo_scratch {
+ local_lock_t bh_lock;
u8 map_index;
unsigned long __map[];
};
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index daf23a996e612..69af6eaf8a630 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1169,20 +1169,18 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
}
m = rcu_dereference(priv->match);
-
+ scratch = *raw_cpu_ptr(m->scratch);
+ if (unlikely(!scratch)) {
+ local_bh_enable();
+ return false;
+ }
+ __local_lock_nested_bh(&scratch->bh_lock);
/* Note that we don't need a valid MXCSR state for any of the
* operations we use here, so pass 0 as mask and spare a LDMXCSR
* instruction.
*/
kernel_fpu_begin_mask(0);
- scratch = *raw_cpu_ptr(m->scratch);
- if (unlikely(!scratch)) {
- kernel_fpu_end();
- local_bh_enable();
- return false;
- }
-
map_index = scratch->map_index;
map = NFT_PIPAPO_LT_ALIGN(&scratch->__map[0]);
res = map + (map_index ? m->bsize_max : 0);
@@ -1260,6 +1258,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
if (i % 2)
scratch->map_index = !map_index;
kernel_fpu_end();
+ __local_unlock_nested_bh(&scratch->bh_lock);
local_bh_enable();
return ret >= 0;
--
2.50.0
^ permalink raw reply related [flat|nested] 6+ messages in thread