From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org
Subject: [PATCH 20/32] netfilter: ipset: Fix parallel resizing and listing of the same set
Date: Mon, 15 Jun 2015 23:26:17 +0200 [thread overview]
Message-ID: <1434403589-24796-21-git-send-email-pablo@netfilter.org> (raw)
In-Reply-To: <1434403589-24796-1-git-send-email-pablo@netfilter.org>
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
When elements added to a hash:* type of set and resizing triggered,
parallel listing could start to list the original set (before resizing)
and "continue" with listing the new set. Fix it by references and
using the original hash table for listing. Therefore the destroying of
the original hash table may happen from the resizing or listing functions.
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
include/linux/netfilter/ipset/ip_set.h | 13 ++++++----
net/netfilter/ipset/ip_set_core.c | 29 ++++++++++++---------
net/netfilter/ipset/ip_set_hash_gen.h | 44 +++++++++++++++++++++++++++++---
3 files changed, 66 insertions(+), 20 deletions(-)
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index a6fe1ce..5674b6a 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -176,6 +176,9 @@ struct ip_set_type_variant {
/* List elements */
int (*list)(const struct ip_set *set, struct sk_buff *skb,
struct netlink_callback *cb);
+ /* Keep listing private when resizing runs parallel */
+ void (*uref)(struct ip_set *set, struct netlink_callback *cb,
+ bool start);
/* Return true if "b" set is the same as "a"
* according to the create set parameters */
@@ -380,12 +383,12 @@ ip_set_init_counter(struct ip_set_counter *counter,
/* Netlink CB args */
enum {
- IPSET_CB_NET = 0,
- IPSET_CB_DUMP,
- IPSET_CB_INDEX,
- IPSET_CB_ARG0,
+ IPSET_CB_NET = 0, /* net namespace */
+ IPSET_CB_DUMP, /* dump single set/all sets */
+ IPSET_CB_INDEX, /* set index */
+ IPSET_CB_PRIVATE, /* set private data */
+ IPSET_CB_ARG0, /* type specific */
IPSET_CB_ARG1,
- IPSET_CB_ARG2,
};
/* register and unregister set references */
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 68ae551..777cac6 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1211,12 +1211,16 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
static int
ip_set_dump_done(struct netlink_callback *cb)
{
- struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
if (cb->args[IPSET_CB_ARG0]) {
- pr_debug("release set %s\n",
- ip_set(inst, cb->args[IPSET_CB_INDEX])->name);
- __ip_set_put_byindex(inst,
- (ip_set_id_t) cb->args[IPSET_CB_INDEX]);
+ struct ip_set_net *inst =
+ (struct ip_set_net *)cb->args[IPSET_CB_NET];
+ ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
+ struct ip_set *set = ip_set(inst, index);
+
+ if (set->variant->uref)
+ set->variant->uref(set, cb, false);
+ pr_debug("release set %s\n", set->name);
+ __ip_set_put_byindex(inst, index);
}
return 0;
}
@@ -1247,12 +1251,6 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
nla_parse(cda, IPSET_ATTR_CMD_MAX,
attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
- /* cb->args[IPSET_CB_NET]: net namespace
- * [IPSET_CB_DUMP]: dump single set/all sets
- * [IPSET_CB_INDEX]: set index
- * [IPSET_CB_ARG0]: type specific
- */
-
if (cda[IPSET_ATTR_SETNAME]) {
struct ip_set *set;
@@ -1359,6 +1357,8 @@ dump_last:
goto release_refcount;
if (dump_flags & IPSET_FLAG_LIST_HEADER)
goto next_set;
+ if (set->variant->uref)
+ set->variant->uref(set, cb, true);
/* Fall through and add elements */
default:
read_lock_bh(&set->lock);
@@ -1375,6 +1375,8 @@ dump_last:
dump_type = DUMP_LAST;
cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
cb->args[IPSET_CB_INDEX] = 0;
+ if (set && set->variant->uref)
+ set->variant->uref(set, cb, false);
goto dump_last;
}
goto out;
@@ -1389,7 +1391,10 @@ next_set:
release_refcount:
/* If there was an error or set is done, release set */
if (ret || !cb->args[IPSET_CB_ARG0]) {
- pr_debug("release set %s\n", ip_set(inst, index)->name);
+ set = ip_set(inst, index);
+ if (set->variant->uref)
+ set->variant->uref(set, cb, false);
+ pr_debug("release set %s\n", set->name);
__ip_set_put_byindex(inst, index);
cb->args[IPSET_CB_ARG0] = 0;
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 2f1985e..5fcf70b 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -71,6 +71,8 @@ struct hbucket {
/* The hash table: the table size stored here in order to make resizing easy */
struct htable {
+ atomic_t ref; /* References for resizing */
+ atomic_t uref; /* References for dumping */
u8 htable_bits; /* size of hash table == 2^htable_bits */
struct hbucket bucket[0]; /* hashtable buckets */
};
@@ -207,6 +209,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#undef mtype_del
#undef mtype_test_cidrs
#undef mtype_test
+#undef mtype_uref
#undef mtype_expire
#undef mtype_resize
#undef mtype_head
@@ -248,6 +251,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
#define mtype_test IPSET_TOKEN(MTYPE, _test)
+#define mtype_uref IPSET_TOKEN(MTYPE, _uref)
#define mtype_expire IPSET_TOKEN(MTYPE, _expire)
#define mtype_resize IPSET_TOKEN(MTYPE, _resize)
#define mtype_head IPSET_TOKEN(MTYPE, _head)
@@ -595,6 +599,9 @@ retry:
t->htable_bits = htable_bits;
read_lock_bh(&set->lock);
+ /* There can't be another parallel resizing, but dumping is possible */
+ atomic_set(&orig->ref, 1);
+ atomic_inc(&orig->uref);
for (i = 0; i < jhash_size(orig->htable_bits); i++) {
n = hbucket(orig, i);
for (j = 0; j < n->pos; j++) {
@@ -609,6 +616,8 @@ retry:
#ifdef IP_SET_HASH_WITH_NETS
mtype_data_reset_flags(data, &flags);
#endif
+ atomic_set(&orig->ref, 0);
+ atomic_dec(&orig->uref);
read_unlock_bh(&set->lock);
mtype_ahash_destroy(set, t, false);
if (ret == -EAGAIN)
@@ -631,7 +640,11 @@ retry:
pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
orig->htable_bits, orig, t->htable_bits, t);
- mtype_ahash_destroy(set, orig, false);
+ /* If there's nobody else dumping the table, destroy it */
+ if (atomic_dec_and_test(&orig->uref)) {
+ pr_debug("Table destroy by resize %p\n", orig);
+ mtype_ahash_destroy(set, orig, false);
+ }
return 0;
}
@@ -961,13 +974,36 @@ nla_put_failure:
return -EMSGSIZE;
}
+/* Make possible to run dumping parallel with resizing */
+static void
+mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
+{
+ struct htype *h = set->data;
+ struct htable *t;
+
+ if (start) {
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh_nfnl(h->table);
+ atomic_inc(&t->uref);
+ cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
+ rcu_read_unlock_bh();
+ } else if (cb->args[IPSET_CB_PRIVATE]) {
+ t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ /* Resizing didn't destroy the hash table */
+ pr_debug("Table destroy by dump: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
+ cb->args[IPSET_CB_PRIVATE] = 0;
+ }
+}
+
/* Reply a LIST/SAVE request: dump the elements of the specified set */
static int
mtype_list(const struct ip_set *set,
struct sk_buff *skb, struct netlink_callback *cb)
{
- const struct htype *h = set->data;
- const struct htable *t = rcu_dereference_bh_nfnl(h->table);
+ const struct htable *t;
struct nlattr *atd, *nested;
const struct hbucket *n;
const struct mtype_elem *e;
@@ -980,6 +1016,7 @@ mtype_list(const struct ip_set *set,
if (!atd)
return -EMSGSIZE;
pr_debug("list hash set %s\n", set->name);
+ t = (const struct htable *)cb->args[IPSET_CB_PRIVATE];
for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
cb->args[IPSET_CB_ARG0]++) {
incomplete = skb_tail_pointer(skb);
@@ -1047,6 +1084,7 @@ static const struct ip_set_type_variant mtype_variant = {
.flush = mtype_flush,
.head = mtype_head,
.list = mtype_list,
+ .uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
};
--
1.7.10.4
next prev parent reply other threads:[~2015-06-15 21:21 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-06-15 21:25 [PATCH 00/32] Netfilter updates for net-next Pablo Neira Ayuso
2015-06-15 21:25 ` [PATCH 01/32] netfilter: conntrack: warn the user if there is a better helper to use Pablo Neira Ayuso
2015-06-15 21:25 ` [PATCH 02/32] netfilter: bridge: refactor clearing BRNF_NF_BRIDGE_PREROUTING Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 03/32] netfilter: bridge: re-order br_nf_pre_routing_finish_ipv6() Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 04/32] netfilter: bridge: detect NAT66 correctly and change MAC address Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 05/32] netfilter: bridge: refactor frag_max_size Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 06/32] netfilter: bridge: rename br_parse_ip_options Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 07/32] netfilter: bridge: re-order check_hbh_len() Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 08/32] netfilter: bridge: forward IPv6 fragmented packets Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 09/32] net: ip_fragment: remove BRIDGE_NETFILTER mtu special handling Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 10/32] netfilter: bridge: restore vlan tag when refragmenting Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 11/32] netfilter: xtables: use percpu rule counters Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 12/32] netfilter: xtables: avoid percpu ruleset duplication Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 13/32] netfilter: ipset: Use MSEC_PER_SEC consistently Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 14/32] netfilter: ipset: Use SET_WITH_*() helpers to test set extensions Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 15/32] netfilter: ipset: Check extensions attributes before getting extensions Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 16/32] netfilter: ipset: Permit CIDR equal to the host address CIDR in IPv6 Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 17/32] netfilter: ipset: Make sure we always return line number on batch Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 18/32] netfilter: ipset: Check CIDR value only when attribute is given Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 19/32] netfilter: ipset: Fix cidr handling for hash:*net* types Pablo Neira Ayuso
2015-06-15 21:26 ` Pablo Neira Ayuso [this message]
2015-06-15 21:26 ` [PATCH 21/32] netfilter: ipset: Make sure listing doesn't grab a set which is just being destroyed Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 22/32] netfilter:ipset Remove rbtree from hash:net,iface Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 23/32] netfilter: ipset: Prepare the ipset core to use RCU at set level Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 24/32] netfilter: ipset: Introduce RCU locking in bitmap:* types Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 25/32] netfilter: ipset: Introduce RCU locking in hash:* types Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 26/32] netfilter: ipset: Introduce RCU locking in list type Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 27/32] netfilter: ipset: Fix coding styles reported by checkpatch.pl Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 28/32] netfilter: Kconfig: get rid of parens around depends on Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 29/32] netfilter: x_tables: remove XT_TABLE_INFO_SZ and a dereference Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 30/32] netfilter: nf_tables: attach net_device to basechain Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 31/32] netfilter: nf_tables: add nft_register_basechain() and nft_unregister_basechain() Pablo Neira Ayuso
2015-06-15 21:26 ` [PATCH 32/32] netfilter: nf_tables_netdev: unregister hooks on net_device removal Pablo Neira Ayuso
2015-06-15 21:41 ` [PATCH 00/32] Netfilter updates for net-next David Miller
2015-06-20 13:11 ` Jakub Kiciński
2015-06-20 18:30 ` Pablo Neira Ayuso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1434403589-24796-21-git-send-email-pablo@netfilter.org \
--to=pablo@netfilter.org \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).