netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Patrick McHardy <kaber@trash.net>
To: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Thomas Graf <tgraf@suug.ch>, Ying Xue <ying.xue@windriver.com>,
	davem@davemloft.net, paulmck@linux.vnet.ibm.com,
	netdev@vger.kernel.org, netfilter-devel@vger.kernel.org
Subject: Re: [PATCH 2/3] netlink: Mark dumps as inconsistent which have been interrupted by a resize
Date: Thu, 22 Jan 2015 08:56:44 +0000	[thread overview]
Message-ID: <20150122085643.GB4037@acer.localdomain> (raw)
In-Reply-To: <20150122084924.GA4720@gondor.apana.org.au>

On 22.01, Herbert Xu wrote:
> On Wed, Jan 21, 2015 at 12:17:48PM +0000, Thomas Graf wrote:
> >
> > Thanks for the review. We also need to avoid hitting 0 when we overflow
> > on a seq increment.  The netfilter code is doing this correctly but several
> > other users are suffering from this as well.
> > 
> > I'll address this in v2 together with the other discussed changes.
> 
> Could you hold off for a bit? I've got some changes that touch
> this area that I'd like to push out.  Basically I'm trying to
> eliminate direct access of rhashtable internals from the existing
> users.

Hope it will still be possible, I need it for GC in timeout support for
nftables sets.

Current patch attached for reference.


commit 91a334436d2f8eaa8261251d7d98cb700138f8b6
Author: Patrick McHardy <kaber@trash.net>
Date:   Fri Jan 16 18:12:31 2015 +0000

    netfilter: nft_hash: add support for timeouts
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>

diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index cba0ad2..e7cf886 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -15,6 +15,7 @@
 #include <linux/log2.h>
 #include <linux/jhash.h>
 #include <linux/netlink.h>
+#include <linux/workqueue.h>
 #include <linux/rhashtable.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
@@ -23,19 +24,47 @@
 /* We target a hash table size of 4, element hint is 75% of final size */
 #define NFT_HASH_ELEMENT_HINT 3
 
+struct nft_hash {
+	struct rhashtable		ht;
+	struct delayed_work		gc_work;
+};
+
 struct nft_hash_elem {
 	struct rhash_head		node;
 	struct nft_set_ext		ext;
 };
 
+struct nft_hash_compare_arg {
+	const struct nft_data		*key;
+	unsigned int			len;
+};
+
+static bool nft_hash_compare(void *ptr, void *arg)
+{
+	const struct nft_hash_elem *he = ptr;
+	struct nft_hash_compare_arg *x = arg;
+
+	if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->len))
+		return false;
+	if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_TIMEOUT) &&
+	    time_after_eq(jiffies, *nft_set_ext_timeout(&he->ext)))
+		return false;
+
+	return true;
+}
+
 static bool nft_hash_lookup(const struct nft_set *set,
 			    const struct nft_data *key,
 			    const struct nft_set_ext **ext)
 {
-	struct rhashtable *priv = nft_set_priv(set);
+	struct nft_hash *priv = nft_set_priv(set);
 	const struct nft_hash_elem *he;
+	struct nft_hash_compare_arg arg = {
+		.key	= key,
+		.len	= set->klen,
+	};
 
-	he = rhashtable_lookup(priv, key);
+	he = rhashtable_lookup_compare(&priv->ht, key, nft_hash_compare, &arg);
 	if (he != NULL)
 		*ext = &he->ext;
 
@@ -45,10 +74,10 @@ static bool nft_hash_lookup(const struct nft_set *set,
 static int nft_hash_insert(const struct nft_set *set,
 			   const struct nft_set_elem *elem)
 {
-	struct rhashtable *priv = nft_set_priv(set);
+	struct nft_hash *priv = nft_set_priv(set);
 	struct nft_hash_elem *he = elem->priv;
 
-	rhashtable_insert(priv, &he->node);
+	rhashtable_insert(&priv->ht, &he->node);
 	return 0;
 }
 
@@ -64,58 +93,43 @@ static void nft_hash_elem_destroy(const struct nft_set *set,
 static void nft_hash_remove(const struct nft_set *set,
 			    const struct nft_set_elem *elem)
 {
-	struct rhashtable *priv = nft_set_priv(set);
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he = elem->cookie;
 
-	rhashtable_remove(priv, elem->cookie);
+	rhashtable_remove(&priv->ht, &he->node);
 	synchronize_rcu();
 	kfree(elem->cookie);
 }
 
-struct nft_compare_arg {
-	const struct nft_set *set;
-	struct nft_set_elem *elem;
-};
-
-static bool nft_hash_compare(void *ptr, void *arg)
-{
-	struct nft_hash_elem *he = ptr;
-	struct nft_compare_arg *x = arg;
-
-	if (!nft_data_cmp(nft_set_ext_key(&he->ext), &x->elem->key,
-			  x->set->klen)) {
-		x->elem->cookie = he;
-		x->elem->priv  = he;
-		return true;
-	}
-
-	return false;
-}
-
 static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
 {
-	struct rhashtable *priv = nft_set_priv(set);
-	struct nft_compare_arg arg = {
-		.set = set,
-		.elem = elem,
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he;
+	struct nft_hash_compare_arg arg = {
+		.key	= &elem->key,
+		.len	= set->klen,
 	};
 
-	if (rhashtable_lookup_compare(priv, &elem->key,
-				      &nft_hash_compare, &arg))
+	he = rhashtable_lookup_compare(&priv->ht, &elem->key,
+				       nft_hash_compare, &arg);
+	if (he != NULL) {
+		elem->cookie = he;
+		elem->priv   = he;
 		return 0;
-
+	}
 	return -ENOENT;
 }
 
 static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 			  struct nft_set_iter *iter)
 {
-	struct rhashtable *priv = nft_set_priv(set);
+	struct nft_hash *priv = nft_set_priv(set);
 	const struct bucket_table *tbl;
 	struct nft_hash_elem *he;
 	struct nft_set_elem elem;
 	unsigned int i;
 
-	tbl = rht_dereference_rcu(priv->tbl, priv);
+	tbl = rht_dereference_rcu(priv->ht.tbl, &priv->ht);
 	for (i = 0; i < tbl->size; i++) {
 		struct rhash_head *pos;
 
@@ -134,16 +148,48 @@ cont:
 	}
 }
 
+static void nft_hash_gc(struct work_struct *work)
+{
+	const struct nft_set *set;
+	const struct bucket_table *tbl;
+	struct rhash_head *pos, *next;
+	struct nft_hash_elem *he;
+	struct nft_hash *priv;
+	unsigned long timeout;
+	unsigned int i;
+
+	priv = container_of(work, struct nft_hash, gc_work.work);
+	set  = (void *)priv - offsetof(struct nft_set, data);
+
+	mutex_lock(&priv->ht.mutex);
+	tbl = rht_dereference(priv->ht.tbl, &priv->ht);
+	for (i = 0; i < tbl->size; i++) {
+		rht_for_each_entry_safe(he, pos, next, tbl, i, node) {
+			if (!nft_set_ext_exists(&he->ext, NFT_SET_EXT_TIMEOUT))
+				continue;
+			timeout = *nft_set_ext_timeout(&he->ext);
+			if (time_before(jiffies, timeout))
+				continue;
+
+			rhashtable_remove(&priv->ht, &he->node);
+			nft_hash_elem_destroy(set, he);
+		}
+	}
+	mutex_unlock(&priv->ht.mutex);
+
+	queue_delayed_work(system_power_efficient_wq, &priv->gc_work, HZ);
+}
+
 static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
 {
-	return sizeof(struct rhashtable);
+	return sizeof(struct nft_hash);
 }
 
 static int nft_hash_init(const struct nft_set *set,
 			 const struct nft_set_desc *desc,
 			 const struct nlattr * const tb[])
 {
-	struct rhashtable *priv = nft_set_priv(set);
+	struct nft_hash *priv = nft_set_priv(set);
 	struct rhashtable_params params = {
 		.nelem_hint = desc->size ? : NFT_HASH_ELEMENT_HINT,
 		.head_offset = offsetof(struct nft_hash_elem, node),
@@ -153,30 +199,42 @@ static int nft_hash_init(const struct nft_set *set,
 		.grow_decision = rht_grow_above_75,
 		.shrink_decision = rht_shrink_below_30,
 	};
+	int err;
 
-	return rhashtable_init(priv, &params);
+	err = rhashtable_init(&priv->ht, &params);
+	if (err < 0)
+		return err;
+
+	INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+	if (set->flags & NFT_SET_TIMEOUT)
+		queue_delayed_work(system_power_efficient_wq,
+				   &priv->gc_work, HZ);
+
+	return 0;
 }
 
 static void nft_hash_destroy(const struct nft_set *set)
 {
-	struct rhashtable *priv = nft_set_priv(set);
+	struct nft_hash *priv = nft_set_priv(set);
 	const struct bucket_table *tbl;
 	struct nft_hash_elem *he;
 	struct rhash_head *pos, *next;
 	unsigned int i;
 
+	cancel_delayed_work_sync(&priv->gc_work);
+
 	/* Stop an eventual async resizing */
-	priv->being_destroyed = true;
-	mutex_lock(&priv->mutex);
+	priv->ht.being_destroyed = true;
+	mutex_lock(&priv->ht.mutex);
 
-	tbl = rht_dereference(priv->tbl, priv);
+	tbl = rht_dereference(priv->ht.tbl, &priv->ht);
 	for (i = 0; i < tbl->size; i++) {
 		rht_for_each_entry_safe(he, pos, next, tbl, i, node)
 			nft_hash_elem_destroy(set, he);
 	}
-	mutex_unlock(&priv->mutex);
+	mutex_unlock(&priv->ht.mutex);
 
-	rhashtable_destroy(priv);
+	rhashtable_destroy(&priv->ht);
 }
 
 static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
@@ -187,9 +245,11 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
 	esize = sizeof(struct nft_hash_elem);
 	if (features & NFT_SET_MAP)
 		esize += sizeof(struct nft_data);
+	if (features & NFT_SET_TIMEOUT)
+		esize += sizeof(unsigned long);
 
 	if (desc->size) {
-		est->size = sizeof(struct rhashtable) +
+		est->size = sizeof(struct nft_hash) +
 			    roundup_pow_of_two(desc->size * 4 / 3) *
 			    sizeof(struct nft_hash_elem *) +
 			    desc->size * esize;
@@ -218,7 +278,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
 	.remove		= nft_hash_remove,
 	.lookup		= nft_hash_lookup,
 	.walk		= nft_hash_walk,
-	.features	= NFT_SET_MAP,
+	.features	= NFT_SET_MAP | NFT_SET_TIMEOUT,
 	.owner		= THIS_MODULE,
 };
 
@@ -238,3 +298,4 @@ module_exit(nft_hash_module_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_ALIAS_NFT_SET();
+

  reply	other threads:[~2015-01-22  8:56 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-20 13:20 [PATCH 0/3 net-next] rhashtable: Notify on resize to allow signaling interrupted dumps Thomas Graf
2015-01-20 13:20 ` [PATCH 1/3] rhashtable: Provide notifier for deferred resizes Thomas Graf
2015-01-20 13:20 ` [PATCH 2/3] netlink: Mark dumps as inconsistent which have been interrupted by a resize Thomas Graf
2015-01-21  8:13   ` Ying Xue
2015-01-21 12:17     ` Thomas Graf
2015-01-22  8:49       ` Herbert Xu
2015-01-22  8:56         ` Patrick McHardy [this message]
2015-01-22  9:22           ` Herbert Xu
2015-01-22 10:07             ` Patrick McHardy
2015-01-25 23:20         ` [PATCH 0/2] rhashtable: Add walk iterator primitives and use them in netlink Herbert Xu
2015-01-25 23:21           ` [PATCH 1/2] rhashtable: Introduce rhashtable_walk_* Herbert Xu
2015-01-26  8:20             ` Thomas Graf
2015-01-26 22:21               ` Herbert Xu
2015-01-26 10:09             ` David Laight
2015-01-26 22:23               ` Herbert Xu
2015-01-26 22:36                 ` David Miller
2015-01-26 22:42                   ` Herbert Xu
2015-01-26 23:31                     ` Herbert Xu
2015-01-27  9:45                       ` Thomas Graf
2015-01-27  9:54                         ` Herbert Xu
2015-01-27 10:15                           ` Thomas Graf
2015-01-27 10:24                             ` Herbert Xu
2015-01-27 11:16                               ` Thomas Graf
2015-01-27 11:23                                 ` Herbert Xu
2015-01-27 11:40                                   ` Thomas Graf
2015-01-27 20:39                                     ` Herbert Xu
2015-01-27 22:10                                       ` David Miller
2015-01-27 23:16                                         ` Herbert Xu
2015-01-27 13:09                                   ` Patrick McHardy
2015-01-27 20:36                                     ` Herbert Xu
2015-01-28 19:07                                       ` Patrick McHardy
2015-01-30  5:58                                         ` Herbert Xu
2015-01-30  8:10                                           ` Patrick McHardy
2015-01-27 10:09                 ` David Laight
2015-01-27 10:12                   ` Herbert Xu
2015-01-25 23:21           ` [PATCH 2/2] netlink: Use rhashtable walk iterator Herbert Xu
2015-01-27 23:19           ` [PATCH 0/2] rhashtable: Add walk iterator primitives and use them in netlink Herbert Xu
2015-01-27 23:20             ` [PATCH 1/2] rhashtable: Introduce rhashtable_walk_* Herbert Xu
2015-01-29 22:26               ` Thomas Graf
2015-01-27 23:20             ` [PATCH 2/2] netlink: Use rhashtable walk iterator Herbert Xu
2015-01-29 22:27               ` Thomas Graf
2015-01-29 22:42             ` [PATCH 0/2] rhashtable: Add walk iterator primitives and use them in netlink David Miller
2015-01-31  3:13               ` Herbert Xu
2015-01-31  3:14                 ` [PATCH 1/2] rhashtable: Introduce rhashtable_walk_* Herbert Xu
2015-01-31  3:14                 ` [PATCH 2/2] netlink: Use rhashtable walk iterator Herbert Xu
2015-01-31  4:31                 ` netfilter: " Herbert Xu
2015-02-01  7:45                   ` Patrick McHardy
2015-02-03  3:19                   ` David Miller
2015-02-03  3:19                 ` [PATCH 0/2] rhashtable: Add walk iterator primitives and use them in netlink David Miller
2015-01-20 13:20 ` [PATCH 3/3] netlink: Lock out table resizes while dumping Netlink sockets Thomas Graf
2015-01-20 14:31   ` Patrick McHardy
2015-01-20 14:55     ` Thomas Graf
2015-01-20 15:21       ` Patrick McHardy
2015-01-20 15:35         ` Thomas Graf
2015-01-21  5:08           ` Herbert Xu
2015-01-21  5:15             ` Herbert Xu
2015-01-21  9:14               ` Herbert Xu
2015-01-21  9:56                 ` Thomas Graf
2015-01-21  9:59                   ` Herbert Xu
2015-01-21 10:00                   ` Patrick McHardy
2015-01-21  9:37             ` Thomas Graf
2015-01-21  9:38               ` Herbert Xu
2015-01-21  9:49                 ` Thomas Graf
2015-01-21  9:58                   ` Herbert Xu
2015-01-21 10:23                     ` Thomas Graf
2015-01-22  6:35                       ` Herbert Xu
2015-01-22  7:20                         ` Herbert Xu
2015-01-22  9:05                           ` Thomas Graf
2015-01-22  9:50                             ` Herbert Xu
2015-01-21 10:34                     ` Thomas Graf
2015-01-21 10:40                       ` Patrick McHardy
2015-01-21 11:37                         ` Thomas Graf
2015-01-21 11:59                           ` Patrick McHardy
2015-01-21 12:07                             ` Thomas Graf
2015-01-21 12:09                               ` Patrick McHardy
2015-01-21 10:36               ` David Laight
2015-01-20 15:00     ` David Laight
2015-01-20 15:05       ` Thomas Graf
2015-01-21  5:11     ` Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150122085643.GB4037@acer.localdomain \
    --to=kaber@trash.net \
    --cc=davem@davemloft.net \
    --cc=herbert@gondor.apana.org.au \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=tgraf@suug.ch \
    --cc=ying.xue@windriver.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).