netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] netfilter:ipset fixes: list:set and refcounting
@ 2011-03-25 10:42 Jozsef Kadlecsik
  2011-03-25 10:42 ` [PATCH 1/2] netfilter:ipset: list:set timeout variant fixes Jozsef Kadlecsik
  0 siblings, 1 reply; 7+ messages in thread
From: Jozsef Kadlecsik @ 2011-03-25 10:42 UTC (permalink / raw)
  To: netfilter-devel; +Cc: Patrick McHardy, Jozsef Kadlecsik

Hi Patrick,

Please apply the next two patches for ipset:

- list:set timeout variant fixes
- References are protected by rwlock instead of mutex

Best regards,
Jozsef

Jozsef Kadlecsik (2):
  netfilter:ipset: list:set timeout variant fixes
  netfilter:ipset: References are protected by rwlock instead of mutex

 include/linux/netfilter/ipset/ip_set.h       |    2 +-
 include/linux/netfilter/ipset/ip_set_ahash.h |    3 +-
 net/netfilter/ipset/ip_set_bitmap_ip.c       |    3 +-
 net/netfilter/ipset/ip_set_bitmap_ipmac.c    |    3 +-
 net/netfilter/ipset/ip_set_bitmap_port.c     |    3 +-
 net/netfilter/ipset/ip_set_core.c            |  109 ++++++++++++++++----------
 net/netfilter/ipset/ip_set_list_set.c        |   53 ++++++-------
 7 files changed, 94 insertions(+), 82 deletions(-)


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] netfilter:ipset: list:set timeout variant fixes
  2011-03-25 10:42 [PATCH 0/2] netfilter:ipset fixes: list:set and refcounting Jozsef Kadlecsik
@ 2011-03-25 10:42 ` Jozsef Kadlecsik
  2011-03-25 10:42   ` [PATCH 2/2] netfilter:ipset: References are protected by rwlock instead of mutex Jozsef Kadlecsik
  2011-03-28 12:25   ` [PATCH 1/2] netfilter:ipset: list:set timeout variant fixes Patrick McHardy
  0 siblings, 2 replies; 7+ messages in thread
From: Jozsef Kadlecsik @ 2011-03-25 10:42 UTC (permalink / raw)
  To: netfilter-devel; +Cc: Patrick McHardy, Jozsef Kadlecsik

- the timeout value was actually not set
- the garbage collector was broken

The variant is fixed, the tests to the ipset testsuite are added.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 net/netfilter/ipset/ip_set_list_set.c |   53 +++++++++++++++------------------
 1 files changed, 24 insertions(+), 29 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index a47c329..f4a46c0 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -43,14 +43,19 @@ struct list_set {
 static inline struct set_elem *
 list_set_elem(const struct list_set *map, u32 id)
 {
-	return (struct set_elem *)((char *)map->members + id * map->dsize);
+	return (struct set_elem *)((void *)map->members + id * map->dsize);
+}
+
+static inline struct set_telem *
+list_set_telem(const struct list_set *map, u32 id)
+{
+	return (struct set_telem *)((void *)map->members + id * map->dsize);
 }
 
 static inline bool
 list_set_timeout(const struct list_set *map, u32 id)
 {
-	const struct set_telem *elem =
-		(const struct set_telem *) list_set_elem(map, id);
+	const struct set_telem *elem = list_set_telem(map, id);
 
 	return ip_set_timeout_test(elem->timeout);
 }
@@ -58,19 +63,11 @@ list_set_timeout(const struct list_set *map, u32 id)
 static inline bool
 list_set_expired(const struct list_set *map, u32 id)
 {
-	const struct set_telem *elem =
-		(const struct set_telem *) list_set_elem(map, id);
+	const struct set_telem *elem = list_set_telem(map, id);
 
 	return ip_set_timeout_expired(elem->timeout);
 }
 
-static inline int
-list_set_exist(const struct set_telem *elem)
-{
-	return elem->id != IPSET_INVALID_ID &&
-	       !ip_set_timeout_expired(elem->timeout);
-}
-
 /* Set list without and with timeout */
 
 static int
@@ -146,11 +143,11 @@ list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id,
 	struct set_telem *e;
 
 	for (; i < map->size; i++) {
-		e = (struct set_telem *)list_set_elem(map, i);
+		e = list_set_telem(map, i);
 		swap(e->id, id);
+		swap(e->timeout, timeout);
 		if (e->id == IPSET_INVALID_ID)
 			break;
-		swap(e->timeout, timeout);
 	}
 }
 
@@ -164,7 +161,7 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id,
 		/* Last element replaced: e.g. add new,before,last */
 		ip_set_put_byindex(e->id);
 	if (with_timeout(map->timeout))
-		list_elem_tadd(map, i, id, timeout);
+		list_elem_tadd(map, i, id, ip_set_timeout_set(timeout));
 	else
 		list_elem_add(map, i, id);
 
@@ -172,11 +169,11 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id,
 }
 
 static int
-list_set_del(struct list_set *map, ip_set_id_t id, u32 i)
+list_set_del(struct list_set *map, u32 i)
 {
 	struct set_elem *a = list_set_elem(map, i), *b;
 
-	ip_set_put_byindex(id);
+	ip_set_put_byindex(a->id);
 
 	for (; i < map->size - 1; i++) {
 		b = list_set_elem(map, i + 1);
@@ -308,11 +305,11 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 				 (before == 0 ||
 				  (before > 0 &&
 				   next_id_eq(map, i, refid))))
-				ret = list_set_del(map, id, i);
+				ret = list_set_del(map, i);
 			else if (before < 0 &&
 				 elem->id == refid &&
 				 next_id_eq(map, i, id))
-				ret = list_set_del(map, id, i + 1);
+				ret = list_set_del(map, i + 1);
 		}
 		break;
 	default:
@@ -460,17 +457,15 @@ list_set_gc(unsigned long ul_set)
 	struct list_set *map = set->data;
 	struct set_telem *e;
 	u32 i;
-
-	/* We run parallel with other readers (test element)
-	 * but adding/deleting new entries is locked out */
-	read_lock_bh(&set->lock);
-	for (i = map->size - 1; i >= 0; i--) {
-		e = (struct set_telem *) list_set_elem(map, i);
-		if (e->id != IPSET_INVALID_ID &&
-		    list_set_expired(map, i))
-			list_set_del(map, e->id, i);
+	
+	/* nfnl_lock should be called */
+	write_lock_bh(&set->lock);
+	for (i = 0; i < map->size; i++) {
+		e = list_set_telem(map, i);
+		if (e->id != IPSET_INVALID_ID && list_set_expired(map, i))
+			list_set_del(map, i);
 	}
-	read_unlock_bh(&set->lock);
+	write_unlock_bh(&set->lock);
 
 	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
 	add_timer(&map->gc);
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] netfilter:ipset: References are protected by rwlock instead of mutex
  2011-03-25 10:42 ` [PATCH 1/2] netfilter:ipset: list:set timeout variant fixes Jozsef Kadlecsik
@ 2011-03-25 10:42   ` Jozsef Kadlecsik
  2011-03-28 12:35     ` Patrick McHardy
  2011-03-28 12:25   ` [PATCH 1/2] netfilter:ipset: list:set timeout variant fixes Patrick McHardy
  1 sibling, 1 reply; 7+ messages in thread
From: Jozsef Kadlecsik @ 2011-03-25 10:42 UTC (permalink / raw)
  To: netfilter-devel; +Cc: Patrick McHardy, Jozsef Kadlecsik

The timeout variant of the list:set type must reference the member sets.
However, its garbage collector runs at timer interrupt so the mutex protection
of the references is a no go. Therefore the reference protection
is converted to rwlock.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h       |    2 +-
 include/linux/netfilter/ipset/ip_set_ahash.h |    3 +-
 net/netfilter/ipset/ip_set_bitmap_ip.c       |    3 +-
 net/netfilter/ipset/ip_set_bitmap_ipmac.c    |    3 +-
 net/netfilter/ipset/ip_set_bitmap_port.c     |    3 +-
 net/netfilter/ipset/ip_set_core.c            |  109 ++++++++++++++++----------
 net/netfilter/ipset/ip_set_list_set.c        |    6 +-
 7 files changed, 73 insertions(+), 56 deletions(-)

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index ec333d8..5a262e3 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -293,7 +293,7 @@ struct ip_set {
 	/* Lock protecting the set data */
 	rwlock_t lock;
 	/* References to the set */
-	atomic_t ref;
+	u32 ref;
 	/* The core set type */
 	struct ip_set_type *type;
 	/* The type variant doing the real job */
diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h
index ec9d9be..a0196ac 100644
--- a/include/linux/netfilter/ipset/ip_set_ahash.h
+++ b/include/linux/netfilter/ipset/ip_set_ahash.h
@@ -515,8 +515,7 @@ type_pf_head(struct ip_set *set, struct sk_buff *skb)
 	if (h->netmask != HOST_MASK)
 		NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask);
 #endif
-	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
-		      htonl(atomic_read(&set->ref) - 1));
+	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
 	NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize));
 	if (with_timeout(h->timeout))
 		NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout));
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index bca9699..a113ff0 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -338,8 +338,7 @@ bitmap_ip_head(struct ip_set *set, struct sk_buff *skb)
 	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
 	if (map->netmask != 32)
 		NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask);
-	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
-		      htonl(atomic_read(&set->ref) - 1));
+	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
 	NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
 		      htonl(sizeof(*map) + map->memsize));
 	if (with_timeout(map->timeout))
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 5e79017..00a3324 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -434,8 +434,7 @@ bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb)
 		goto nla_put_failure;
 	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip));
 	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
-	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
-		      htonl(atomic_read(&set->ref) - 1));
+	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
 	NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
 		      htonl(sizeof(*map)
 			    + (map->last_ip - map->first_ip + 1) * map->dsize));
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 165f09b..6b38eb8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -320,8 +320,7 @@ bitmap_port_head(struct ip_set *set, struct sk_buff *skb)
 		goto nla_put_failure;
 	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port));
 	NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
-	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
-		      htonl(atomic_read(&set->ref) - 1));
+	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
 	NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
 		      htonl(sizeof(*map) + map->memsize));
 	if (with_timeout(map->timeout))
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index d6b4823..e88ac3c 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -26,6 +26,7 @@
 
 static LIST_HEAD(ip_set_type_list);		/* all registered set types */
 static DEFINE_MUTEX(ip_set_type_mutex);		/* protects ip_set_type_list */
+static DEFINE_RWLOCK(ip_set_ref_lock);		/* protects the set refs */
 
 static struct ip_set **ip_set_list;		/* all individual sets */
 static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
@@ -301,13 +302,18 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
 static inline void
 __ip_set_get(ip_set_id_t index)
 {
-	atomic_inc(&ip_set_list[index]->ref);
+	write_lock_bh(&ip_set_ref_lock);
+	ip_set_list[index]->ref++;
+	write_unlock_bh(&ip_set_ref_lock);
 }
 
 static inline void
 __ip_set_put(ip_set_id_t index)
 {
-	atomic_dec(&ip_set_list[index]->ref);
+	write_lock_bh(&ip_set_ref_lock);
+	BUG_ON(ip_set_list[index]->ref == 0);
+	ip_set_list[index]->ref--;
+	write_unlock_bh(&ip_set_ref_lock);
 }
 
 /*
@@ -324,7 +330,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 	struct ip_set *set = ip_set_list[index];
 	int ret = 0;
 
-	BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
+	BUG_ON(set == NULL);
 	pr_debug("set %s, index %u\n", set->name, index);
 
 	if (dim < set->type->dimension ||
@@ -356,7 +362,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
 	struct ip_set *set = ip_set_list[index];
 	int ret;
 
-	BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
+	BUG_ON(set == NULL);
 	pr_debug("set %s, index %u\n", set->name, index);
 
 	if (dim < set->type->dimension ||
@@ -378,7 +384,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
 	struct ip_set *set = ip_set_list[index];
 	int ret = 0;
 
-	BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
+	BUG_ON(set == NULL);
 	pr_debug("set %s, index %u\n", set->name, index);
 
 	if (dim < set->type->dimension ||
@@ -397,7 +403,6 @@ EXPORT_SYMBOL_GPL(ip_set_del);
  * Find set by name, reference it once. The reference makes sure the
  * thing pointed to, does not go away under our feet.
  *
- * The nfnl mutex must already be activated.
  */
 ip_set_id_t
 ip_set_get_byname(const char *name, struct ip_set **set)
@@ -423,15 +428,12 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname);
  * reference count by 1. The caller shall not assume the index
  * to be valid, after calling this function.
  *
- * The nfnl mutex must already be activated.
  */
 void
 ip_set_put_byindex(ip_set_id_t index)
 {
-	if (ip_set_list[index] != NULL) {
-		BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0);
+	if (ip_set_list[index] != NULL)
 		__ip_set_put(index);
-	}
 }
 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
 
@@ -441,7 +443,6 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex);
  * can't be destroyed. The set cannot be renamed due to
  * the referencing either.
  *
- * The nfnl mutex must already be activated.
  */
 const char *
 ip_set_name_byindex(ip_set_id_t index)
@@ -449,7 +450,7 @@ ip_set_name_byindex(ip_set_id_t index)
 	const struct ip_set *set = ip_set_list[index];
 
 	BUG_ON(set == NULL);
-	BUG_ON(atomic_read(&set->ref) == 0);
+	BUG_ON(set->ref == 0);
 
 	/* Referenced, so it's safe */
 	return set->name;
@@ -515,10 +516,7 @@ void
 ip_set_nfnl_put(ip_set_id_t index)
 {
 	nfnl_lock();
-	if (ip_set_list[index] != NULL) {
-		BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0);
-		__ip_set_put(index);
-	}
+	ip_set_put_byindex(index);
 	nfnl_unlock();
 }
 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
@@ -526,7 +524,7 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
 /*
  * Communication protocol with userspace over netlink.
  *
- * We already locked by nfnl_lock.
+ * The commands are serialized by the nfnl mutex.
  */
 
 static inline bool
@@ -657,7 +655,6 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 		return -ENOMEM;
 	rwlock_init(&set->lock);
 	strlcpy(set->name, name, IPSET_MAXNAMELEN);
-	atomic_set(&set->ref, 0);
 	set->family = family;
 
 	/*
@@ -690,8 +687,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 
 	/*
 	 * Here, we have a valid, constructed set and we are protected
-	 * by nfnl_lock. Find the first free index in ip_set_list and
-	 * check clashing.
+	 * by the nfnl mutex. Find the first free index in ip_set_list
+	 * and check clashing.
 	 */
 	if ((ret = find_free_id(set->name, &index, &clash)) != 0) {
 		/* If this is the same set and requested, ignore error */
@@ -751,31 +748,51 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 	       const struct nlattr * const attr[])
 {
 	ip_set_id_t i;
+	int ret = 0;
 
 	if (unlikely(protocol_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
 
-	/* References are protected by the nfnl mutex */
+	/* Commands are serialized and references are
+	 * protected by the ip_set_ref_lock.
+	 * External systems (i.e. xt_set) must call
+	 * ip_set_put|get_nfnl_* functions, that way we
+	 * can safely check references here.
+	 *
+	 * list:set timer can only decrement the reference
+	 * counter, so if it's already zero, we can proceed
+	 * without holding the lock.
+	 */
+	read_lock_bh(&ip_set_ref_lock);
 	if (!attr[IPSET_ATTR_SETNAME]) {
 		for (i = 0; i < ip_set_max; i++) {
-			if (ip_set_list[i] != NULL &&
-			    (atomic_read(&ip_set_list[i]->ref)))
-				return -IPSET_ERR_BUSY;
+			if (ip_set_list[i] != NULL && ip_set_list[i]->ref) {
+				ret = IPSET_ERR_BUSY;
+				goto out;
+			}
 		}
+		read_unlock_bh(&ip_set_ref_lock);
 		for (i = 0; i < ip_set_max; i++) {
 			if (ip_set_list[i] != NULL)
 				ip_set_destroy_set(i);
 		}
 	} else {
 		i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
-		if (i == IPSET_INVALID_ID)
-			return -ENOENT;
-		else if (atomic_read(&ip_set_list[i]->ref))
-			return -IPSET_ERR_BUSY;
+		if (i == IPSET_INVALID_ID) {
+			ret = -ENOENT;
+			goto out;
+		} else if (ip_set_list[i]->ref) {
+			ret = -IPSET_ERR_BUSY;
+			goto out;
+		}
+		read_unlock_bh(&ip_set_ref_lock);
 
 		ip_set_destroy_set(i);
 	}
 	return 0;
+out:
+	read_unlock_bh(&ip_set_ref_lock);
+	return ret;
 }
 
 /* Flush sets */
@@ -834,6 +851,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	struct ip_set *set;
 	const char *name2;
 	ip_set_id_t i;
+	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
 		     attr[IPSET_ATTR_SETNAME] == NULL ||
@@ -843,25 +861,33 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
-	if (atomic_read(&set->ref) != 0)
-		return -IPSET_ERR_REFERENCED;
+
+	read_lock_bh(&ip_set_ref_lock);
+	if (set->ref != 0) {
+		ret = -IPSET_ERR_REFERENCED;
+		goto out;
+	}
 
 	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
 	for (i = 0; i < ip_set_max; i++) {
 		if (ip_set_list[i] != NULL &&
-		    STREQ(ip_set_list[i]->name, name2))
-			return -IPSET_ERR_EXIST_SETNAME2;
+		    STREQ(ip_set_list[i]->name, name2)) {
+			ret = -IPSET_ERR_EXIST_SETNAME2;
+			goto out;
+		}
 	}
 	strncpy(set->name, name2, IPSET_MAXNAMELEN);
 
-	return 0;
+out:
+	read_unlock_bh(&ip_set_ref_lock);
+	return ret;
 }
 
 /* Swap two sets so that name/index points to the other.
  * References and set names are also swapped.
  *
- * We are protected by the nfnl mutex and references are
- * manipulated only by holding the mutex. The kernel interfaces
+ * The commands are serialized by the nfnl mutex and references are
+ * protected by the ip_set_ref_lock. The kernel interfaces
  * do not hold the mutex but the pointer settings are atomic
  * so the ip_set_list always contains valid pointers to the sets.
  */
@@ -874,7 +900,6 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 	struct ip_set *from, *to;
 	ip_set_id_t from_id, to_id;
 	char from_name[IPSET_MAXNAMELEN];
-	u32 from_ref;
 
 	if (unlikely(protocol_failed(attr) ||
 		     attr[IPSET_ATTR_SETNAME] == NULL ||
@@ -899,17 +924,15 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 	      from->type->family == to->type->family))
 		return -IPSET_ERR_TYPE_MISMATCH;
 
-	/* No magic here: ref munging protected by the nfnl_lock */
 	strncpy(from_name, from->name, IPSET_MAXNAMELEN);
-	from_ref = atomic_read(&from->ref);
-
 	strncpy(from->name, to->name, IPSET_MAXNAMELEN);
-	atomic_set(&from->ref, atomic_read(&to->ref));
 	strncpy(to->name, from_name, IPSET_MAXNAMELEN);
-	atomic_set(&to->ref, from_ref);
 
+	write_lock_bh(&ip_set_ref_lock);
+	swap(from->ref, to->ref);
 	ip_set_list[from_id] = to;
 	ip_set_list[to_id] = from;
+	write_unlock_bh(&ip_set_ref_lock);
 
 	return 0;
 }
@@ -926,7 +949,7 @@ ip_set_dump_done(struct netlink_callback *cb)
 {
 	if (cb->args[2]) {
 		pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name);
-		__ip_set_put((ip_set_id_t) cb->args[1]);
+		ip_set_put_byindex((ip_set_id_t) cb->args[1]);
 	}
 	return 0;
 }
@@ -1068,7 +1091,7 @@ release_refcount:
 	/* If there was an error or set is done, release set */
 	if (ret || !cb->args[2]) {
 		pr_debug("release set %s\n", ip_set_list[index]->name);
-		__ip_set_put(index);
+		ip_set_put_byindex(index);
 	}
 
 	/* If we dump all sets, continue with dumping last ones */
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index f4a46c0..e9159e9 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -366,8 +366,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
 	NLA_PUT_NET32(skb, IPSET_ATTR_SIZE, htonl(map->size));
 	if (with_timeout(map->timeout))
 		NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
-	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
-		      htonl(atomic_read(&set->ref) - 1));
+	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
 	NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
 		      htonl(sizeof(*map) + map->size * map->dsize));
 	ipset_nest_end(skb, nested);
@@ -457,8 +456,7 @@ list_set_gc(unsigned long ul_set)
 	struct list_set *map = set->data;
 	struct set_telem *e;
 	u32 i;
-	
-	/* nfnl_lock should be called */
+
 	write_lock_bh(&set->lock);
 	for (i = 0; i < map->size; i++) {
 		e = list_set_telem(map, i);
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] netfilter:ipset: list:set timeout variant fixes
  2011-03-25 10:42 ` [PATCH 1/2] netfilter:ipset: list:set timeout variant fixes Jozsef Kadlecsik
  2011-03-25 10:42   ` [PATCH 2/2] netfilter:ipset: References are protected by rwlock instead of mutex Jozsef Kadlecsik
@ 2011-03-28 12:25   ` Patrick McHardy
  1 sibling, 0 replies; 7+ messages in thread
From: Patrick McHardy @ 2011-03-28 12:25 UTC (permalink / raw)
  To: Jozsef Kadlecsik; +Cc: netfilter-devel

Am 25.03.2011 11:42, schrieb Jozsef Kadlecsik:
> - the timeout value was actually not set
> - the garbage collector was broken
> 
> The variant is fixed, the tests to the ipset testsuite are added.

Applied, thanks Jozsef.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] netfilter:ipset: References are protected by rwlock instead of mutex
  2011-03-25 10:42   ` [PATCH 2/2] netfilter:ipset: References are protected by rwlock instead of mutex Jozsef Kadlecsik
@ 2011-03-28 12:35     ` Patrick McHardy
  2011-03-28 13:33       ` Jozsef Kadlecsik
  0 siblings, 1 reply; 7+ messages in thread
From: Patrick McHardy @ 2011-03-28 12:35 UTC (permalink / raw)
  To: Jozsef Kadlecsik; +Cc: netfilter-devel

Am 25.03.2011 11:42, schrieb Jozsef Kadlecsik:
> The timeout variant of the list:set type must reference the member sets.
> However, its garbage collector runs at timer interrupt so the mutex protection
> of the references is a no go. Therefore the reference protection
> is converted to rwlock.
> 

>  __ip_set_get(ip_set_id_t index)
>  {
> -	atomic_inc(&ip_set_list[index]->ref);
> +	write_lock_bh(&ip_set_ref_lock);
> +	ip_set_list[index]->ref++;
> +	write_unlock_bh(&ip_set_ref_lock);
>  }
>  

I'm not sure I get this, why aren't regular atomic ops working
here?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] netfilter:ipset: References are protected by rwlock instead of mutex
  2011-03-28 12:35     ` Patrick McHardy
@ 2011-03-28 13:33       ` Jozsef Kadlecsik
  2011-04-04 13:20         ` Patrick McHardy
  0 siblings, 1 reply; 7+ messages in thread
From: Jozsef Kadlecsik @ 2011-03-28 13:33 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netfilter-devel

On Mon, 28 Mar 2011, Patrick McHardy wrote:

> Am 25.03.2011 11:42, schrieb Jozsef Kadlecsik:
> > The timeout variant of the list:set type must reference the member sets.
> > However, its garbage collector runs at timer interrupt so the mutex protection
> > of the references is a no go. Therefore the reference protection
> > is converted to rwlock.
> > 
> 
> >  __ip_set_get(ip_set_id_t index)
> >  {
> > -	atomic_inc(&ip_set_list[index]->ref);
> > +	write_lock_bh(&ip_set_ref_lock);
> > +	ip_set_list[index]->ref++;
> > +	write_unlock_bh(&ip_set_ref_lock);
> >  }
> >  
> 
> I'm not sure I get this, why aren't regular atomic ops working
> here?

A set can only be destroyed if it's not referenced. The reference counter 
is incremented/decremented by the checkentry/destroy functions of the set 
match and target, and when the set is added/deleted to/from a list:set 
type of set. These operations are serialized by the nfnl mutex.

However sets get deleted from the timeout variant of a list:set types by 
the garbage collector, too, which runs at timer interrupt. The set 
destroying happens by first checking the reference counter, then 
destroying the set without holding any lock. Because the garbage collector 
only decrements the refcount, we could go without using any rwlock and 
having atomic refcounts: if the counter is zero, we are safe and can 
destroy the set.

But when swapping two sets, the operation swaps the names, reference 
counters and the pointers of the sets. The values of two atomic counters 
cannot safely be swapped without holding some kind of lock. So I could not 
avoid introducing a rwlock to protect the refcounts, but then those are
not needed to be atomic.

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlec@mail.kfki.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : KFKI Research Institute for Particle and Nuclear Physics
          H-1525 Budapest 114, POB. 49, Hungary

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] netfilter:ipset: References are protected by rwlock instead of mutex
  2011-03-28 13:33       ` Jozsef Kadlecsik
@ 2011-04-04 13:20         ` Patrick McHardy
  0 siblings, 0 replies; 7+ messages in thread
From: Patrick McHardy @ 2011-04-04 13:20 UTC (permalink / raw)
  To: Jozsef Kadlecsik; +Cc: netfilter-devel

On 28.03.2011 15:33, Jozsef Kadlecsik wrote:
> On Mon, 28 Mar 2011, Patrick McHardy wrote:
> 
>> Am 25.03.2011 11:42, schrieb Jozsef Kadlecsik:
>>> The timeout variant of the list:set type must reference the member sets.
>>> However, its garbage collector runs at timer interrupt so the mutex protection
>>> of the references is a no go. Therefore the reference protection
>>> is converted to rwlock.
>>>
>>
>>>  __ip_set_get(ip_set_id_t index)
>>>  {
>>> -	atomic_inc(&ip_set_list[index]->ref);
>>> +	write_lock_bh(&ip_set_ref_lock);
>>> +	ip_set_list[index]->ref++;
>>> +	write_unlock_bh(&ip_set_ref_lock);
>>>  }
>>>  
>>
>> I'm not sure I get this, why aren't regular atomic ops working
>> here?
> 
> A set can only be destroyed if it's not referenced. The reference counter 
> is incremented/decremented by the checkentry/destroy functions of the set 
> match and target, and when the set is added/deleted to/from a list:set 
> type of set. These operations are serialized by the nfnl mutex.
> 
> However sets get deleted from the timeout variant of a list:set types by 
> the garbage collector, too, which runs at timer interrupt. The set 
> destroying happens by first checking the reference counter, then 
> destroying the set without holding any lock. Because the garbage collector 
> only decrements the refcount, we could go without using any rwlock and 
> having atomic refcounts: if the counter is zero, we are safe and can 
> destroy the set.
> 
> But when swapping two sets, the operation swaps the names, reference 
> counters and the pointers of the sets. The values of two atomic counters 
> cannot safely be swapped without holding some kind of lock. So I could not 
> avoid introducing a rwlock to protect the refcounts, but then those are
> not needed to be atomic.

Thanks for the explanation, applied.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2011-04-04 13:20 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-25 10:42 [PATCH 0/2] netfilter:ipset fixes: list:set and refcounting Jozsef Kadlecsik
2011-03-25 10:42 ` [PATCH 1/2] netfilter:ipset: list:set timeout variant fixes Jozsef Kadlecsik
2011-03-25 10:42   ` [PATCH 2/2] netfilter:ipset: References are protected by rwlock instead of mutex Jozsef Kadlecsik
2011-03-28 12:35     ` Patrick McHardy
2011-03-28 13:33       ` Jozsef Kadlecsik
2011-04-04 13:20         ` Patrick McHardy
2011-03-28 12:25   ` [PATCH 1/2] netfilter:ipset: list:set timeout variant fixes Patrick McHardy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).