* [PATCH nf-next 1/7] netfilter: nf_tables: pass netns to set->ops->remove()
2017-01-18 17:30 [PATCH nf-next 0/7] nf_tables set enhancements Pablo Neira Ayuso
@ 2017-01-18 17:30 ` Pablo Neira Ayuso
2017-01-18 17:30 ` [PATCH nf-next 2/7] netfilter: nf_tables: use struct nft_set_iter in set element flush Pablo Neira Ayuso
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Pablo Neira Ayuso @ 2017-01-18 17:30 UTC (permalink / raw)
To: netfilter-devel
This is required by the new bitmap set type that comes in a follow up
patch.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_tables.h | 3 ++-
net/netfilter/nf_tables_api.c | 4 ++--
net/netfilter/nft_set_hash.c | 3 ++-
net/netfilter/nft_set_rbtree.c | 3 ++-
4 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 924325c46aab..1b6fd97d268c 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -298,7 +298,8 @@ struct nft_set_ops {
bool (*deactivate_one)(const struct net *net,
const struct nft_set *set,
void *priv);
- void (*remove)(const struct nft_set *set,
+ void (*remove)(const struct net *net,
+ const struct nft_set *set,
const struct nft_set_elem *elem);
void (*walk)(const struct nft_ctx *ctx,
const struct nft_set *set,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a4619cbf2fe2..6598a48ead2f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4803,7 +4803,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
NFT_MSG_DELSETELEM, 0);
- te->set->ops->remove(te->set, &te->elem);
+ te->set->ops->remove(net, te->set, &te->elem);
atomic_dec(&te->set->nelems);
te->set->ndeact--;
break;
@@ -4924,7 +4924,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWSETELEM:
te = (struct nft_trans_elem *)trans->data;
- te->set->ops->remove(te->set, &te->elem);
+ te->set->ops->remove(net, te->set, &te->elem);
atomic_dec(&te->set->nelems);
break;
case NFT_MSG_DELSETELEM:
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 1e20e2bbb6d9..12df560edee1 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -203,7 +203,8 @@ static void *nft_hash_deactivate(const struct net *net,
return he;
}
-static void nft_hash_remove(const struct nft_set *set,
+static void nft_hash_remove(const struct net *net,
+ const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash *priv = nft_set_priv(set);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 08376e50f6cd..afccbd2aee31 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -151,7 +151,8 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
return err;
}
-static void nft_rbtree_remove(const struct nft_set *set,
+static void nft_rbtree_remove(const struct net *net,
+ const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_rbtree *priv = nft_set_priv(set);
--
2.1.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH nf-next 2/7] netfilter: nf_tables: use struct nft_set_iter in set element flush
2017-01-18 17:30 [PATCH nf-next 0/7] nf_tables set enhancements Pablo Neira Ayuso
2017-01-18 17:30 ` [PATCH nf-next 1/7] netfilter: nf_tables: pass netns to set->ops->remove() Pablo Neira Ayuso
@ 2017-01-18 17:30 ` Pablo Neira Ayuso
2017-01-18 17:30 ` [PATCH nf-next 3/7] netfilter: nf_tables: rename deactivate_one() to flush() Pablo Neira Ayuso
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Pablo Neira Ayuso @ 2017-01-18 17:30 UTC (permalink / raw)
To: netfilter-devel
Instead of struct nft_set_dump_args, remove unnecessary wrapper
structure.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_tables_api.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6598a48ead2f..494b49420149 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3934,15 +3934,13 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
return -EBUSY;
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) {
- struct nft_set_dump_args args = {
- .iter = {
- .genmask = genmask,
- .fn = nft_flush_set,
- },
+ struct nft_set_iter iter = {
+ .genmask = genmask,
+ .fn = nft_flush_set,
};
- set->ops->walk(&ctx, set, &args.iter);
+ set->ops->walk(&ctx, set, &iter);
- return args.iter.err;
+ return iter.err;
}
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
--
2.1.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH nf-next 3/7] netfilter: nf_tables: rename deactivate_one() to flush()
2017-01-18 17:30 [PATCH nf-next 0/7] nf_tables set enhancements Pablo Neira Ayuso
2017-01-18 17:30 ` [PATCH nf-next 1/7] netfilter: nf_tables: pass netns to set->ops->remove() Pablo Neira Ayuso
2017-01-18 17:30 ` [PATCH nf-next 2/7] netfilter: nf_tables: use struct nft_set_iter in set element flush Pablo Neira Ayuso
@ 2017-01-18 17:30 ` Pablo Neira Ayuso
2017-01-18 17:30 ` [PATCH nf-next 4/7] netfilter: nf_tables: add flush field to struct nft_set_iter Pablo Neira Ayuso
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Pablo Neira Ayuso @ 2017-01-18 17:30 UTC (permalink / raw)
To: netfilter-devel
Although semantics are similar to deactivate() with no implicit element
lookup, this is only called from the set flush path, so better rename
this to flush.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_tables.h | 8 ++++----
net/netfilter/nf_tables_api.c | 2 +-
net/netfilter/nft_set_hash.c | 8 ++++----
net/netfilter/nft_set_rbtree.c | 8 ++++----
4 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1b6fd97d268c..1c3bf162cef8 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -260,7 +260,7 @@ struct nft_expr;
* @insert: insert new element into set
* @activate: activate new element in the next generation
* @deactivate: lookup for element and deactivate it in the next generation
- * @deactivate_one: deactivate element in the next generation
+ * @flush: deactivate element in the next generation
* @remove: remove element from set
* @walk: iterate over all set elemeennts
* @privsize: function to return size of set private data
@@ -295,9 +295,9 @@ struct nft_set_ops {
void * (*deactivate)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem);
- bool (*deactivate_one)(const struct net *net,
- const struct nft_set *set,
- void *priv);
+ bool (*flush)(const struct net *net,
+ const struct nft_set *set,
+ void *priv);
void (*remove)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 494b49420149..26238dba6314 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3897,7 +3897,7 @@ static int nft_flush_set(const struct nft_ctx *ctx,
if (!trans)
return -ENOMEM;
- if (!set->ops->deactivate_one(ctx->net, set, elem->priv)) {
+ if (!set->ops->flush(ctx->net, set, elem->priv)) {
err = -ENOENT;
goto err1;
}
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 12df560edee1..f9e69e8a8dc3 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -167,8 +167,8 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set,
nft_set_elem_clear_busy(&he->ext);
}
-static bool nft_hash_deactivate_one(const struct net *net,
- const struct nft_set *set, void *priv)
+static bool nft_hash_flush(const struct net *net,
+ const struct nft_set *set, void *priv)
{
struct nft_hash_elem *he = priv;
@@ -195,7 +195,7 @@ static void *nft_hash_deactivate(const struct net *net,
rcu_read_lock();
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
if (he != NULL &&
- !nft_hash_deactivate_one(net, set, he))
+ !nft_hash_flush(net, set, he))
he = NULL;
rcu_read_unlock();
@@ -398,7 +398,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
.insert = nft_hash_insert,
.activate = nft_hash_activate,
.deactivate = nft_hash_deactivate,
- .deactivate_one = nft_hash_deactivate_one,
+ .flush = nft_hash_flush,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
.update = nft_hash_update,
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index afccbd2aee31..46504c2c65d7 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -172,8 +172,8 @@ static void nft_rbtree_activate(const struct net *net,
nft_set_elem_change_active(net, set, &rbe->ext);
}
-static bool nft_rbtree_deactivate_one(const struct net *net,
- const struct nft_set *set, void *priv)
+static bool nft_rbtree_flush(const struct net *net,
+ const struct nft_set *set, void *priv)
{
struct nft_rbtree_elem *rbe = priv;
@@ -214,7 +214,7 @@ static void *nft_rbtree_deactivate(const struct net *net,
parent = parent->rb_right;
continue;
}
- nft_rbtree_deactivate_one(net, set, rbe);
+ nft_rbtree_flush(net, set, rbe);
return rbe;
}
}
@@ -305,7 +305,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.insert = nft_rbtree_insert,
.remove = nft_rbtree_remove,
.deactivate = nft_rbtree_deactivate,
- .deactivate_one = nft_rbtree_deactivate_one,
+ .flush = nft_rbtree_flush,
.activate = nft_rbtree_activate,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
--
2.1.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH nf-next 4/7] netfilter: nf_tables: add flush field to struct nft_set_iter
2017-01-18 17:30 [PATCH nf-next 0/7] nf_tables set enhancements Pablo Neira Ayuso
` (2 preceding siblings ...)
2017-01-18 17:30 ` [PATCH nf-next 3/7] netfilter: nf_tables: rename deactivate_one() to flush() Pablo Neira Ayuso
@ 2017-01-18 17:30 ` Pablo Neira Ayuso
2017-01-18 17:30 ` [PATCH nf-next 5/7] netfilter: nf_tables: rename struct nft_set_estimate class field Pablo Neira Ayuso
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Pablo Neira Ayuso @ 2017-01-18 17:30 UTC (permalink / raw)
To: netfilter-devel
This provides context to walk callback iterator, thus, we know if the
walk happens from the set flush path. This is required by the new bitmap
set type coming in a follow up patch which has no real struct
nft_set_ext, so it has to allocate it based on the two bit compact
element representation.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_tables.h | 1 +
net/netfilter/nf_tables_api.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1c3bf162cef8..deff701137e8 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -203,6 +203,7 @@ struct nft_set_elem {
struct nft_set;
struct nft_set_iter {
u8 genmask;
+ bool flush;
unsigned int count;
unsigned int skip;
int err;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 26238dba6314..b5f6cb70d3df 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3937,6 +3937,7 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
struct nft_set_iter iter = {
.genmask = genmask,
.fn = nft_flush_set,
+ .flush = true,
};
set->ops->walk(&ctx, set, &iter);
--
2.1.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH nf-next 5/7] netfilter: nf_tables: rename struct nft_set_estimate class field
2017-01-18 17:30 [PATCH nf-next 0/7] nf_tables set enhancements Pablo Neira Ayuso
` (3 preceding siblings ...)
2017-01-18 17:30 ` [PATCH nf-next 4/7] netfilter: nf_tables: add flush field to struct nft_set_iter Pablo Neira Ayuso
@ 2017-01-18 17:30 ` Pablo Neira Ayuso
2017-01-18 17:30 ` [PATCH nf-next 6/7] netfilter: nf_tables: add space notation to sets Pablo Neira Ayuso
2017-01-18 17:30 ` [PATCH nf-next 7/7] netfilter: nf_tables: add bitmap set type Pablo Neira Ayuso
6 siblings, 0 replies; 8+ messages in thread
From: Pablo Neira Ayuso @ 2017-01-18 17:30 UTC (permalink / raw)
To: netfilter-devel
Use lookup as field name instead, to prepare the introduction of the
memory class in a follow up patch.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_tables.h | 4 ++--
net/netfilter/nf_tables_api.c | 12 ++++++------
net/netfilter/nft_set_hash.c | 2 +-
net/netfilter/nft_set_rbtree.c | 2 +-
4 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index deff701137e8..9256a3cba5fe 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -244,11 +244,11 @@ enum nft_set_class {
* characteristics
*
* @size: required memory
- * @class: lookup performance class
+ * @lookup: lookup performance class
*/
struct nft_set_estimate {
unsigned int size;
- enum nft_set_class class;
+ enum nft_set_class lookup;
};
struct nft_set_ext;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b5f6cb70d3df..4f18c027163e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2400,9 +2400,9 @@ nft_select_set_ops(const struct nlattr * const nla[],
features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT;
}
- bops = NULL;
- best.size = ~0;
- best.class = ~0;
+ bops = NULL;
+ best.size = ~0;
+ best.lookup = ~0;
list_for_each_entry(ops, &nf_tables_set_ops, list) {
if ((ops->features & features) != features)
@@ -2412,15 +2412,15 @@ nft_select_set_ops(const struct nlattr * const nla[],
switch (policy) {
case NFT_SET_POL_PERFORMANCE:
- if (est.class < best.class)
+ if (est.lookup < best.lookup)
break;
- if (est.class == best.class && est.size < best.size)
+ if (est.lookup == best.lookup && est.size < best.size)
break;
continue;
case NFT_SET_POL_MEMORY:
if (est.size < best.size)
break;
- if (est.size == best.size && est.class < best.class)
+ if (est.size == best.size && est.lookup < best.lookup)
break;
continue;
default:
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index f9e69e8a8dc3..2c96bdeb186f 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -384,7 +384,7 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
est->size = esize + 2 * sizeof(struct nft_hash_elem *);
}
- est->class = NFT_SET_CLASS_O_1;
+ est->lookup = NFT_SET_CLASS_O_1;
return true;
}
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 46504c2c65d7..9b3ec6a3be2c 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -291,7 +291,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
else
est->size = nsize;
- est->class = NFT_SET_CLASS_O_LOG_N;
+ est->lookup = NFT_SET_CLASS_O_LOG_N;
return true;
}
--
2.1.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH nf-next 6/7] netfilter: nf_tables: add space notation to sets
2017-01-18 17:30 [PATCH nf-next 0/7] nf_tables set enhancements Pablo Neira Ayuso
` (4 preceding siblings ...)
2017-01-18 17:30 ` [PATCH nf-next 5/7] netfilter: nf_tables: rename struct nft_set_estimate class field Pablo Neira Ayuso
@ 2017-01-18 17:30 ` Pablo Neira Ayuso
2017-01-18 17:30 ` [PATCH nf-next 7/7] netfilter: nf_tables: add bitmap set type Pablo Neira Ayuso
6 siblings, 0 replies; 8+ messages in thread
From: Pablo Neira Ayuso @ 2017-01-18 17:30 UTC (permalink / raw)
To: netfilter-devel
The space notation allows us to classify the set backend implementation
based on the amount of required memory. This provides an order of the
set representation scalability in terms of memory. The size field is
still left in place so use this if the userspace provides no explicit
number of elements, so we cannot calculate the real memory that this set
needs. This also helps us break ties in the set backend selection
routine, eg. two backend implementations provide the same performance.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_tables.h | 2 ++
net/netfilter/nf_tables_api.c | 22 +++++++++++++++++-----
net/netfilter/nft_set_hash.c | 1 +
net/netfilter/nft_set_rbtree.c | 1 +
4 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 9256a3cba5fe..6f0d087a8c67 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -245,10 +245,12 @@ enum nft_set_class {
*
* @size: required memory
* @lookup: lookup performance class
+ * @space: memory class
*/
struct nft_set_estimate {
unsigned int size;
enum nft_set_class lookup;
+ enum nft_set_class space;
};
struct nft_set_ext;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4f18c027163e..e5c35a222280 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2403,6 +2403,7 @@ nft_select_set_ops(const struct nlattr * const nla[],
bops = NULL;
best.size = ~0;
best.lookup = ~0;
+ best.space = ~0;
list_for_each_entry(ops, &nf_tables_set_ops, list) {
if ((ops->features & features) != features)
@@ -2414,14 +2415,25 @@ nft_select_set_ops(const struct nlattr * const nla[],
case NFT_SET_POL_PERFORMANCE:
if (est.lookup < best.lookup)
break;
- if (est.lookup == best.lookup && est.size < best.size)
- break;
+ if (est.lookup == best.lookup) {
+ if (!desc->size) {
+ if (est.space < best.space)
+ break;
+ } else if (est.size < best.size) {
+ break;
+ }
+ }
continue;
case NFT_SET_POL_MEMORY:
- if (est.size < best.size)
- break;
- if (est.size == best.size && est.lookup < best.lookup)
+ if (!desc->size) {
+ if (est.space < best.space)
+ break;
+ if (est.space == best.space &&
+ est.lookup < best.lookup)
+ break;
+ } else if (est.size < best.size) {
break;
+ }
continue;
default:
break;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 2c96bdeb186f..0520b1b9e06f 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -385,6 +385,7 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
}
est->lookup = NFT_SET_CLASS_O_1;
+ est->space = NFT_SET_CLASS_O_N;
return true;
}
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 9b3ec6a3be2c..47f7cd2b764f 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -292,6 +292,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
est->size = nsize;
est->lookup = NFT_SET_CLASS_O_LOG_N;
+ est->space = NFT_SET_CLASS_O_N;
return true;
}
--
2.1.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH nf-next 7/7] netfilter: nf_tables: add bitmap set type
2017-01-18 17:30 [PATCH nf-next 0/7] nf_tables set enhancements Pablo Neira Ayuso
` (5 preceding siblings ...)
2017-01-18 17:30 ` [PATCH nf-next 6/7] netfilter: nf_tables: add space notation to sets Pablo Neira Ayuso
@ 2017-01-18 17:30 ` Pablo Neira Ayuso
6 siblings, 0 replies; 8+ messages in thread
From: Pablo Neira Ayuso @ 2017-01-18 17:30 UTC (permalink / raw)
To: netfilter-devel
This patch adds a new bitmap set type. This bitmap uses two bits to
represent one element. These two bits determine the element state in the
current and the future generation that fits into the nf_tables commit
protocol. When dumping elements back to userspace, the two bits are
expanded into a struct nft_set_ext object.
If no NFTA_SET_DESC_SIZE is specified, the existing automatic set
backend selection prefers bitmap over hash in case of keys whose size is
<= 16 bit. If the set size is know, the bitmap set type is selected if
with 16 bit kets and more than 390 elements in the set, otherwise the
hash table set implementation is used.
For 8 bit keys, the bitmap consumes 66 bytes. For 16 bit keys, the
bitmap takes 16388 bytes.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/Kconfig | 6 +
net/netfilter/Makefile | 1 +
net/netfilter/nft_set_bitmap.c | 314 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 321 insertions(+)
create mode 100644 net/netfilter/nft_set_bitmap.c
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index bbc45f8a7b2d..d522fd11e8fd 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -509,6 +509,12 @@ config NFT_SET_HASH
This option adds the "hash" set type that is used to build one-way
mappings between matchings and actions.
+config NFT_SET_BITMAP
+ tristate "Netfilter nf_tables bitmap set module"
+ help
+ This option adds the "bitmap" set type that is used to build sets
+ whose keys are smaller or equal to 16 bits.
+
config NFT_COUNTER
tristate "Netfilter nf_tables counter module"
help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index ca30d1960f1d..2f22d55d06fa 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -95,6 +95,7 @@ obj-$(CONFIG_NFT_REJECT) += nft_reject.o
obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o
obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o
+obj-$(CONFIG_NFT_SET_BITMAP) += nft_set_bitmap.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
new file mode 100644
index 000000000000..697d407ab59c
--- /dev/null
+++ b/net/netfilter/nft_set_bitmap.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2017 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+/* This bitmap uses two bits to represent one element. These two bits determine
+ * the element state in the current and the future generation.
+ *
+ * An element can be in three states. The generation cursor is represented using
+ * the ^ character, note that this cursor shifts on every succesful transaction.
+ * If no transaction is going on, we observe all elements are in the following
+ * state:
+ *
+ * 11 = this element is active in the current generation. In case of no updates,
+ * ^ it stays active in the next generation.
+ * 00 = this element is inactive in the current generation. In case of no
+ * ^ updates, it stays inactive in the next generation.
+ *
+ * On transaction handling, we observe these two temporary states:
+ *
+ * 01 = this element is inactive in the current generation and it becomes active
+ * ^ in the next one. This happens when the element is inserted but commit
+ * path has not yet been executed yet, so activation is still pending. On
+ * transaction abortion, the element is removed.
+ * 10 = this element is active in the current generation and it becomes inactive
+ * ^ in the next one. This happens when the element is deactivated but commit
+ * path has not yet been executed yet, so removal is still pending. On
+ * transation abortion, the next generation bit is reset to go back to
+ * restore its previous state.
+ */
+struct nft_bitmap {
+ u16 bitmap_size;
+ u8 bitmap[];
+};
+
+static inline void nft_bitmap_location(u32 key, u32 *idx, u32 *off)
+{
+ u32 k = (key << 1);
+
+ *idx = k / BITS_PER_BYTE;
+ *off = k % BITS_PER_BYTE;
+}
+
+/* Fetch the two bits that represent the element and check if it is active based
+ * on the generation mask.
+ */
+static inline bool
+nft_bitmap_active(const u8 *bitmap, u32 idx, u32 off, u8 genmask)
+{
+ return (bitmap[idx] & (0x3 << off)) & (genmask << off);
+}
+
+static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ const struct nft_bitmap *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_cur(net);
+ u32 idx, off;
+
+ nft_bitmap_location(*key, &idx, &off);
+
+ return nft_bitmap_active(priv->bitmap, idx, off, genmask);
+}
+
+static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ struct nft_set_ext **_ext)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_set_ext *ext = elem->priv;
+ u8 genmask = nft_genmask_next(net);
+ u32 idx, off;
+
+ nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ if (nft_bitmap_active(priv->bitmap, idx, off, genmask))
+ return -EEXIST;
+
+ /* Enter 01 state. */
+ priv->bitmap[idx] |= (genmask << off);
+
+ return 0;
+}
+
+static void nft_bitmap_remove(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_set_ext *ext = elem->priv;
+ u8 genmask = nft_genmask_next(net);
+ u32 idx, off;
+
+ nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ /* Enter 00 state. */
+ priv->bitmap[idx] &= ~(genmask << off);
+}
+
+static void nft_bitmap_activate(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_set_ext *ext = elem->priv;
+ u8 genmask = nft_genmask_next(net);
+ u32 idx, off;
+
+ nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ /* Enter 11 state. */
+ priv->bitmap[idx] |= (genmask << off);
+}
+
+static bool nft_bitmap_flush(const struct net *net,
+ const struct nft_set *set, void *ext)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_next(net);
+ u32 idx, off;
+
+ nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ /* Enter 10 state, similar to deactivation. */
+ priv->bitmap[idx] &= ~(genmask << off);
+
+ return true;
+}
+
+static struct nft_set_ext *nft_bitmap_ext_alloc(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_set_ext_tmpl tmpl;
+ struct nft_set_ext *ext;
+
+ nft_set_ext_prepare(&tmpl);
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+
+ ext = kzalloc(tmpl.len, GFP_KERNEL);
+ if (!ext)
+ return NULL;
+
+ nft_set_ext_init(ext, &tmpl);
+ memcpy(nft_set_ext_key(ext), elem->key.val.data, set->klen);
+
+ return ext;
+}
+
+static void *nft_bitmap_deactivate(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_next(net);
+ struct nft_set_ext *ext;
+ u32 idx, off, key = 0;
+
+ memcpy(&key, elem->key.val.data, set->klen);
+ nft_bitmap_location(key, &idx, &off);
+
+ if (!nft_bitmap_active(priv->bitmap, idx, off, genmask))
+ return NULL;
+
+ /* We have no real set extension since this is a bitmap, allocate this
+ * dummy object that is released from the commit/abort path.
+ */
+ ext = nft_bitmap_ext_alloc(set, elem);
+ if (!ext)
+ return NULL;
+
+ /* Enter 10 state. */
+ priv->bitmap[idx] &= ~(genmask << off);
+
+ return ext;
+}
+
+static void nft_bitmap_walk(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ const struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_set_ext_tmpl tmpl;
+ struct nft_set_elem elem;
+ struct nft_set_ext *ext;
+ int idx, off;
+ u16 key;
+
+ nft_set_ext_prepare(&tmpl);
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+
+ for (idx = 0; idx < priv->bitmap_size; idx++) {
+ for (off = 0; off < BITS_PER_BYTE; off += 2) {
+ if (iter->count < iter->skip)
+ goto cont;
+
+ if (!nft_bitmap_active(priv->bitmap, idx, off,
+ iter->genmask))
+ goto cont;
+
+ ext = kzalloc(tmpl.len, GFP_KERNEL);
+ if (!ext) {
+ iter->err = -ENOMEM;
+ return;
+ }
+ nft_set_ext_init(ext, &tmpl);
+ key = ((idx * BITS_PER_BYTE) + off) >> 1;
+ memcpy(nft_set_ext_key(ext), &key, set->klen);
+
+ elem.priv = ext;
+ iter->err = iter->fn(ctx, set, iter, &elem);
+
+ /* On set flush, this dummy extension object is released
+ * from the commit/abort path.
+ */
+ if (!iter->flush)
+ kfree(ext);
+
+ if (iter->err < 0)
+ return;
+cont:
+ iter->count++;
+ }
+ }
+}
+
+/* The bitmap size is pow(2, key length in bits) / bits per byte. This is
+ * multiplied by two since each element takes two bits. For 8 bit keys, the
+ * bitmap consumes 66 bytes. For 16 bit keys, 16388 bytes.
+ */
+static inline u32 nft_bitmap_size(u32 klen)
+{
+ return ((2 << ((klen * BITS_PER_BYTE) - 1)) / BITS_PER_BYTE) << 1;
+}
+
+static inline u32 nft_bitmap_total_size(u32 klen)
+{
+ return sizeof(struct nft_bitmap) + nft_bitmap_size(klen);
+}
+
+static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[])
+{
+ u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
+
+ return nft_bitmap_total_size(klen);
+}
+
+static int nft_bitmap_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
+ const struct nlattr * const nla[])
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+
+ priv->bitmap_size = nft_bitmap_total_size(set->klen);
+
+ return 0;
+}
+
+static void nft_bitmap_destroy(const struct nft_set *set)
+{
+}
+
+static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ /* Make sure bitmaps we don't get bitmaps larger than 16 Kbytes. */
+ if (desc->klen > 2)
+ return false;
+
+ est->size = nft_bitmap_total_size(desc->klen);
+ est->lookup = NFT_SET_CLASS_O_1;
+ est->space = NFT_SET_CLASS_O_1;
+
+ return true;
+}
+
+static struct nft_set_ops nft_bitmap_ops __read_mostly = {
+ .privsize = nft_bitmap_privsize,
+ .estimate = nft_bitmap_estimate,
+ .init = nft_bitmap_init,
+ .destroy = nft_bitmap_destroy,
+ .insert = nft_bitmap_insert,
+ .remove = nft_bitmap_remove,
+ .deactivate = nft_bitmap_deactivate,
+ .flush = nft_bitmap_flush,
+ .activate = nft_bitmap_activate,
+ .lookup = nft_bitmap_lookup,
+ .walk = nft_bitmap_walk,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_bitmap_module_init(void)
+{
+ return nft_register_set(&nft_bitmap_ops);
+}
+
+static void __exit nft_bitmap_module_exit(void)
+{
+ nft_unregister_set(&nft_bitmap_ops);
+}
+
+module_init(nft_bitmap_module_init);
+module_exit(nft_bitmap_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_SET();
--
2.1.4
^ permalink raw reply related [flat|nested] 8+ messages in thread