From: Patrick McHardy <kaber@trash.net>
To: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Thomas Graf <tgraf@suug.ch>, Ying Xue <ying.xue@windriver.com>,
davem@davemloft.net, paulmck@linux.vnet.ibm.com,
netdev@vger.kernel.org, netfilter-devel@vger.kernel.org
Subject: Re: [PATCH 2/3] netlink: Mark dumps as inconsistent which have been interrupted by a resize
Date: Thu, 22 Jan 2015 08:56:44 +0000 [thread overview]
Message-ID: <20150122085643.GB4037@acer.localdomain> (raw)
In-Reply-To: <20150122084924.GA4720@gondor.apana.org.au>
On 22.01, Herbert Xu wrote:
> On Wed, Jan 21, 2015 at 12:17:48PM +0000, Thomas Graf wrote:
> >
> > Thanks for the review. We also need to avoid hitting 0 when we overflow
> > on a seq increment. The netfilter code is doing this correctly but several
> > other users are suffering from this as well.
> >
> > I'll address this in v2 together with the other discussed changes.
>
> Could you hold off for a bit? I've got some changes that touch
> this area that I'd like to push out. Basically I'm trying to
> eliminate direct access of rhashtable internals from the existing
> users.
Hope it will still be possible, I need it for GC in timeout support for
nftables sets.
Current patch attached for reference.
commit 91a334436d2f8eaa8261251d7d98cb700138f8b6
Author: Patrick McHardy <kaber@trash.net>
Date: Fri Jan 16 18:12:31 2015 +0000
netfilter: nft_hash: add support for timeouts
Signed-off-by: Patrick McHardy <kaber@trash.net>
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index cba0ad2..e7cf886 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -15,6 +15,7 @@
#include <linux/log2.h>
#include <linux/jhash.h>
#include <linux/netlink.h>
+#include <linux/workqueue.h>
#include <linux/rhashtable.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
@@ -23,19 +24,47 @@
/* We target a hash table size of 4, element hint is 75% of final size */
#define NFT_HASH_ELEMENT_HINT 3
+struct nft_hash {
+ struct rhashtable ht;
+ struct delayed_work gc_work;
+};
+
struct nft_hash_elem {
struct rhash_head node;
struct nft_set_ext ext;
};
+struct nft_hash_compare_arg {
+ const struct nft_data *key;
+ unsigned int len;
+};
+
+static bool nft_hash_compare(void *ptr, void *arg)
+{
+ const struct nft_hash_elem *he = ptr;
+ struct nft_hash_compare_arg *x = arg;
+
+ if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->len))
+ return false;
+ if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_TIMEOUT) &&
+ time_after_eq(jiffies, *nft_set_ext_timeout(&he->ext)))
+ return false;
+
+ return true;
+}
+
static bool nft_hash_lookup(const struct nft_set *set,
const struct nft_data *key,
const struct nft_set_ext **ext)
{
- struct rhashtable *priv = nft_set_priv(set);
+ struct nft_hash *priv = nft_set_priv(set);
const struct nft_hash_elem *he;
+ struct nft_hash_compare_arg arg = {
+ .key = key,
+ .len = set->klen,
+ };
- he = rhashtable_lookup(priv, key);
+ he = rhashtable_lookup_compare(&priv->ht, key, nft_hash_compare, &arg);
if (he != NULL)
*ext = &he->ext;
@@ -45,10 +74,10 @@ static bool nft_hash_lookup(const struct nft_set *set,
static int nft_hash_insert(const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct rhashtable *priv = nft_set_priv(set);
+ struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he = elem->priv;
- rhashtable_insert(priv, &he->node);
+ rhashtable_insert(&priv->ht, &he->node);
return 0;
}
@@ -64,58 +93,43 @@ static void nft_hash_elem_destroy(const struct nft_set *set,
static void nft_hash_remove(const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct rhashtable *priv = nft_set_priv(set);
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->cookie;
- rhashtable_remove(priv, elem->cookie);
+ rhashtable_remove(&priv->ht, &he->node);
synchronize_rcu();
kfree(elem->cookie);
}
-struct nft_compare_arg {
- const struct nft_set *set;
- struct nft_set_elem *elem;
-};
-
-static bool nft_hash_compare(void *ptr, void *arg)
-{
- struct nft_hash_elem *he = ptr;
- struct nft_compare_arg *x = arg;
-
- if (!nft_data_cmp(nft_set_ext_key(&he->ext), &x->elem->key,
- x->set->klen)) {
- x->elem->cookie = he;
- x->elem->priv = he;
- return true;
- }
-
- return false;
-}
-
static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
{
- struct rhashtable *priv = nft_set_priv(set);
- struct nft_compare_arg arg = {
- .set = set,
- .elem = elem,
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct nft_hash_compare_arg arg = {
+ .key = &elem->key,
+ .len = set->klen,
};
- if (rhashtable_lookup_compare(priv, &elem->key,
- &nft_hash_compare, &arg))
+ he = rhashtable_lookup_compare(&priv->ht, &elem->key,
+ nft_hash_compare, &arg);
+ if (he != NULL) {
+ elem->cookie = he;
+ elem->priv = he;
return 0;
-
+ }
return -ENOENT;
}
static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
struct nft_set_iter *iter)
{
- struct rhashtable *priv = nft_set_priv(set);
+ struct nft_hash *priv = nft_set_priv(set);
const struct bucket_table *tbl;
struct nft_hash_elem *he;
struct nft_set_elem elem;
unsigned int i;
- tbl = rht_dereference_rcu(priv->tbl, priv);
+ tbl = rht_dereference_rcu(priv->ht.tbl, &priv->ht);
for (i = 0; i < tbl->size; i++) {
struct rhash_head *pos;
@@ -134,16 +148,48 @@ cont:
}
}
+static void nft_hash_gc(struct work_struct *work)
+{
+ const struct nft_set *set;
+ const struct bucket_table *tbl;
+ struct rhash_head *pos, *next;
+ struct nft_hash_elem *he;
+ struct nft_hash *priv;
+ unsigned long timeout;
+ unsigned int i;
+
+ priv = container_of(work, struct nft_hash, gc_work.work);
+ set = (void *)priv - offsetof(struct nft_set, data);
+
+ mutex_lock(&priv->ht.mutex);
+ tbl = rht_dereference(priv->ht.tbl, &priv->ht);
+ for (i = 0; i < tbl->size; i++) {
+ rht_for_each_entry_safe(he, pos, next, tbl, i, node) {
+ if (!nft_set_ext_exists(&he->ext, NFT_SET_EXT_TIMEOUT))
+ continue;
+ timeout = *nft_set_ext_timeout(&he->ext);
+ if (time_before(jiffies, timeout))
+ continue;
+
+ rhashtable_remove(&priv->ht, &he->node);
+ nft_hash_elem_destroy(set, he);
+ }
+ }
+ mutex_unlock(&priv->ht.mutex);
+
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work, HZ);
+}
+
static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
{
- return sizeof(struct rhashtable);
+ return sizeof(struct nft_hash);
}
static int nft_hash_init(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const tb[])
{
- struct rhashtable *priv = nft_set_priv(set);
+ struct nft_hash *priv = nft_set_priv(set);
struct rhashtable_params params = {
.nelem_hint = desc->size ? : NFT_HASH_ELEMENT_HINT,
.head_offset = offsetof(struct nft_hash_elem, node),
@@ -153,30 +199,42 @@ static int nft_hash_init(const struct nft_set *set,
.grow_decision = rht_grow_above_75,
.shrink_decision = rht_shrink_below_30,
};
+ int err;
- return rhashtable_init(priv, ¶ms);
+ err = rhashtable_init(&priv->ht, ¶ms);
+ if (err < 0)
+ return err;
+
+ INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+ if (set->flags & NFT_SET_TIMEOUT)
+ queue_delayed_work(system_power_efficient_wq,
+ &priv->gc_work, HZ);
+
+ return 0;
}
static void nft_hash_destroy(const struct nft_set *set)
{
- struct rhashtable *priv = nft_set_priv(set);
+ struct nft_hash *priv = nft_set_priv(set);
const struct bucket_table *tbl;
struct nft_hash_elem *he;
struct rhash_head *pos, *next;
unsigned int i;
+ cancel_delayed_work_sync(&priv->gc_work);
+
/* Stop an eventual async resizing */
- priv->being_destroyed = true;
- mutex_lock(&priv->mutex);
+ priv->ht.being_destroyed = true;
+ mutex_lock(&priv->ht.mutex);
- tbl = rht_dereference(priv->tbl, priv);
+ tbl = rht_dereference(priv->ht.tbl, &priv->ht);
for (i = 0; i < tbl->size; i++) {
rht_for_each_entry_safe(he, pos, next, tbl, i, node)
nft_hash_elem_destroy(set, he);
}
- mutex_unlock(&priv->mutex);
+ mutex_unlock(&priv->ht.mutex);
- rhashtable_destroy(priv);
+ rhashtable_destroy(&priv->ht);
}
static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
@@ -187,9 +245,11 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
esize = sizeof(struct nft_hash_elem);
if (features & NFT_SET_MAP)
esize += sizeof(struct nft_data);
+ if (features & NFT_SET_TIMEOUT)
+ esize += sizeof(unsigned long);
if (desc->size) {
- est->size = sizeof(struct rhashtable) +
+ est->size = sizeof(struct nft_hash) +
roundup_pow_of_two(desc->size * 4 / 3) *
sizeof(struct nft_hash_elem *) +
desc->size * esize;
@@ -218,7 +278,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
.walk = nft_hash_walk,
- .features = NFT_SET_MAP,
+ .features = NFT_SET_MAP | NFT_SET_TIMEOUT,
.owner = THIS_MODULE,
};
@@ -238,3 +298,4 @@ module_exit(nft_hash_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_SET();
+
next prev parent reply other threads:[~2015-01-22 8:56 UTC|newest]
Thread overview: 79+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-01-20 13:20 [PATCH 0/3 net-next] rhashtable: Notify on resize to allow signaling interrupted dumps Thomas Graf
2015-01-20 13:20 ` [PATCH 1/3] rhashtable: Provide notifier for deferred resizes Thomas Graf
2015-01-20 13:20 ` [PATCH 2/3] netlink: Mark dumps as inconsistent which have been interrupted by a resize Thomas Graf
2015-01-21 8:13 ` Ying Xue
2015-01-21 12:17 ` Thomas Graf
2015-01-22 8:49 ` Herbert Xu
2015-01-22 8:56 ` Patrick McHardy [this message]
2015-01-22 9:22 ` Herbert Xu
2015-01-22 10:07 ` Patrick McHardy
2015-01-25 23:20 ` [PATCH 0/2] rhashtable: Add walk iterator primitives and use them in netlink Herbert Xu
2015-01-25 23:21 ` [PATCH 1/2] rhashtable: Introduce rhashtable_walk_* Herbert Xu
2015-01-26 8:20 ` Thomas Graf
2015-01-26 22:21 ` Herbert Xu
2015-01-26 10:09 ` David Laight
2015-01-26 22:23 ` Herbert Xu
2015-01-26 22:36 ` David Miller
2015-01-26 22:42 ` Herbert Xu
2015-01-26 23:31 ` Herbert Xu
2015-01-27 9:45 ` Thomas Graf
2015-01-27 9:54 ` Herbert Xu
2015-01-27 10:15 ` Thomas Graf
2015-01-27 10:24 ` Herbert Xu
2015-01-27 11:16 ` Thomas Graf
2015-01-27 11:23 ` Herbert Xu
2015-01-27 11:40 ` Thomas Graf
2015-01-27 20:39 ` Herbert Xu
2015-01-27 22:10 ` David Miller
2015-01-27 23:16 ` Herbert Xu
2015-01-27 13:09 ` Patrick McHardy
2015-01-27 20:36 ` Herbert Xu
2015-01-28 19:07 ` Patrick McHardy
2015-01-30 5:58 ` Herbert Xu
2015-01-30 8:10 ` Patrick McHardy
2015-01-27 10:09 ` David Laight
2015-01-27 10:12 ` Herbert Xu
2015-01-25 23:21 ` [PATCH 2/2] netlink: Use rhashtable walk iterator Herbert Xu
2015-01-27 23:19 ` [PATCH 0/2] rhashtable: Add walk iterator primitives and use them in netlink Herbert Xu
2015-01-27 23:20 ` [PATCH 1/2] rhashtable: Introduce rhashtable_walk_* Herbert Xu
2015-01-29 22:26 ` Thomas Graf
2015-01-27 23:20 ` [PATCH 2/2] netlink: Use rhashtable walk iterator Herbert Xu
2015-01-29 22:27 ` Thomas Graf
2015-01-29 22:42 ` [PATCH 0/2] rhashtable: Add walk iterator primitives and use them in netlink David Miller
2015-01-31 3:13 ` Herbert Xu
2015-01-31 3:14 ` [PATCH 1/2] rhashtable: Introduce rhashtable_walk_* Herbert Xu
2015-01-31 3:14 ` [PATCH 2/2] netlink: Use rhashtable walk iterator Herbert Xu
2015-01-31 4:31 ` netfilter: " Herbert Xu
2015-02-01 7:45 ` Patrick McHardy
2015-02-03 3:19 ` David Miller
2015-02-03 3:19 ` [PATCH 0/2] rhashtable: Add walk iterator primitives and use them in netlink David Miller
2015-01-20 13:20 ` [PATCH 3/3] netlink: Lock out table resizes while dumping Netlink sockets Thomas Graf
2015-01-20 14:31 ` Patrick McHardy
2015-01-20 14:55 ` Thomas Graf
2015-01-20 15:21 ` Patrick McHardy
2015-01-20 15:35 ` Thomas Graf
2015-01-21 5:08 ` Herbert Xu
2015-01-21 5:15 ` Herbert Xu
2015-01-21 9:14 ` Herbert Xu
2015-01-21 9:56 ` Thomas Graf
2015-01-21 9:59 ` Herbert Xu
2015-01-21 10:00 ` Patrick McHardy
2015-01-21 9:37 ` Thomas Graf
2015-01-21 9:38 ` Herbert Xu
2015-01-21 9:49 ` Thomas Graf
2015-01-21 9:58 ` Herbert Xu
2015-01-21 10:23 ` Thomas Graf
2015-01-22 6:35 ` Herbert Xu
2015-01-22 7:20 ` Herbert Xu
2015-01-22 9:05 ` Thomas Graf
2015-01-22 9:50 ` Herbert Xu
2015-01-21 10:34 ` Thomas Graf
2015-01-21 10:40 ` Patrick McHardy
2015-01-21 11:37 ` Thomas Graf
2015-01-21 11:59 ` Patrick McHardy
2015-01-21 12:07 ` Thomas Graf
2015-01-21 12:09 ` Patrick McHardy
2015-01-21 10:36 ` David Laight
2015-01-20 15:00 ` David Laight
2015-01-20 15:05 ` Thomas Graf
2015-01-21 5:11 ` Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150122085643.GB4037@acer.localdomain \
--to=kaber@trash.net \
--cc=davem@davemloft.net \
--cc=herbert@gondor.apana.org.au \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=paulmck@linux.vnet.ibm.com \
--cc=tgraf@suug.ch \
--cc=ying.xue@windriver.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).