netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Timo Teräs" <timo.teras@iki.fi>
To: Herbert Xu <herbert@gondor.apana.org.au>
Cc: David Miller <davem@davemloft.net>,
	netdev@vger.kernel.org, jamal <hadi@cyberus.ca>
Subject: Re: xfrm_state locking regression...
Date: Wed, 24 Sep 2008 16:18:37 +0300	[thread overview]
Message-ID: <48DA3E2D.4070909@iki.fi> (raw)
In-Reply-To: <20080924075441.GA7391@gondor.apana.org.au>

Herbert Xu wrote:
> On Wed, Sep 24, 2008 at 10:29:15AM +0300, Timo Teräs wrote:
>> Same warning still applies: only compile tested.
> 
> Looks good to me, I espeically like the count/seq bit.  But I'm
> sure Dave would appreciate it if we actually ran this too :)

Gave it a test spin and found one bug. The walking routine will
get called one extra time when it has successfully dumped
everything so we need to check for that in the beginning; the
earlier version would start all over from the beginning resulting
never ending dumps.

So the patch is now as follows. I have an almost ready patch
for xfrm_policy dumping too. Will finish it and send tomorrow.

- Timo

ipsec: Put xfrm_state dumpers on the dump list

Based on Herbert Xu's patch and idea. Improved the original patch
to apply cleanly and modified iteration code to be more readable
by using a common struct for entries in all list.

As it is we go to extraordinary lengths to ensure that states
don't go away while dumpers go to sleep.  It's much easier if
we just put the dumpers themselves on the list since they can't
go away while they're going.

I've also changed the order of addition on new states to prevent
a never-ending dump.

Finally the obsolete last optimisation is now gone.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Timo Teras <timo.teras@iki.fi>
---
 include/linux/netlink.h |    2 +-
 include/net/xfrm.h      |   29 ++++++-------
 net/xfrm/xfrm_state.c   |  109 +++++++++++++++-------------------------------
 3 files changed, 50 insertions(+), 90 deletions(-)

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index cbba776..9ff1b54 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -220,7 +220,7 @@ struct netlink_callback
 	int		(*dump)(struct sk_buff * skb, struct netlink_callback *cb);
 	int		(*done)(struct netlink_callback *cb);
 	int		family;
-	long		args[7];
+	long		args[6];
 };
 
 struct netlink_notify
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 48630b2..becaa1e 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -117,12 +117,21 @@ extern struct mutex xfrm_cfg_mutex;
       metrics. Plus, it will be made via sk->sk_dst_cache. Solved.
  */
 
+struct xfrm_state_walk {
+	struct list_head	all;
+	u8			state;
+	union {
+		u8		dying;
+		u8		proto;
+	};
+	u32			seq;
+};
+
 /* Full description of state of transformer. */
 struct xfrm_state
 {
-	struct list_head	all;
 	union {
-		struct list_head	gclist;
+		struct hlist_node	gclist;
 		struct hlist_node	bydst;
 	};
 	struct hlist_node	bysrc;
@@ -136,12 +145,8 @@ struct xfrm_state
 
 	u32			genid;
 
-	/* Key manger bits */
-	struct {
-		u8		state;
-		u8		dying;
-		u32		seq;
-	} km;
+	/* Key manager bits */
+	struct xfrm_state_walk	km;
 
 	/* Parameters of this state. */
 	struct {
@@ -1245,14 +1250,6 @@ struct xfrm6_tunnel {
 	int priority;
 };
 
-struct xfrm_state_walk {
-	struct list_head list;
-	unsigned long genid;
-	struct xfrm_state *state;
-	int count;
-	u8 proto;
-};
-
 struct xfrm_policy_walk {
 	struct xfrm_policy *policy;
 	int count;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 053970e..747fd8c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -59,14 +59,6 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static unsigned int xfrm_state_num;
 static unsigned int xfrm_state_genid;
 
-/* Counter indicating ongoing walk, protected by xfrm_state_lock. */
-static unsigned long xfrm_state_walk_ongoing;
-/* Counter indicating walk completion, protected by xfrm_cfg_mutex. */
-static unsigned long xfrm_state_walk_completed;
-
-/* List of outstanding state walks used to set the completed counter.  */
-static LIST_HEAD(xfrm_state_walks);
-
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
 
@@ -199,8 +191,7 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
 
 static struct work_struct xfrm_state_gc_work;
-static LIST_HEAD(xfrm_state_gc_leftovers);
-static LIST_HEAD(xfrm_state_gc_list);
+static HLIST_HEAD(xfrm_state_gc_list);
 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
 int __xfrm_state_delete(struct xfrm_state *x);
@@ -412,23 +403,16 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 
 static void xfrm_state_gc_task(struct work_struct *data)
 {
-	struct xfrm_state *x, *tmp;
-	unsigned long completed;
+	struct xfrm_state *x;
+	struct hlist_node *entry, *tmp;
+	struct hlist_head gc_list;
 
-	mutex_lock(&xfrm_cfg_mutex);
 	spin_lock_bh(&xfrm_state_gc_lock);
-	list_splice_tail_init(&xfrm_state_gc_list, &xfrm_state_gc_leftovers);
+	hlist_move_list(&xfrm_state_gc_list, &gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 
-	completed = xfrm_state_walk_completed;
-	mutex_unlock(&xfrm_cfg_mutex);
-
-	list_for_each_entry_safe(x, tmp, &xfrm_state_gc_leftovers, gclist) {
-		if ((long)(x->lastused - completed) > 0)
-			break;
-		list_del(&x->gclist);
+	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist)
 		xfrm_state_gc_destroy(x);
-	}
 
 	wake_up(&km_waitq);
 }
@@ -529,7 +513,7 @@ struct xfrm_state *xfrm_state_alloc(void)
 	if (x) {
 		atomic_set(&x->refcnt, 1);
 		atomic_set(&x->tunnel_users, 0);
-		INIT_LIST_HEAD(&x->all);
+		INIT_LIST_HEAD(&x->km.all);
 		INIT_HLIST_NODE(&x->bydst);
 		INIT_HLIST_NODE(&x->bysrc);
 		INIT_HLIST_NODE(&x->byspi);
@@ -556,7 +540,7 @@ void __xfrm_state_destroy(struct xfrm_state *x)
 	WARN_ON(x->km.state != XFRM_STATE_DEAD);
 
 	spin_lock_bh(&xfrm_state_gc_lock);
-	list_add_tail(&x->gclist, &xfrm_state_gc_list);
+	hlist_add_head(&x->gclist, &xfrm_state_gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 	schedule_work(&xfrm_state_gc_work);
 }
@@ -569,8 +553,7 @@ int __xfrm_state_delete(struct xfrm_state *x)
 	if (x->km.state != XFRM_STATE_DEAD) {
 		x->km.state = XFRM_STATE_DEAD;
 		spin_lock(&xfrm_state_lock);
-		x->lastused = xfrm_state_walk_ongoing;
-		list_del_rcu(&x->all);
+		list_del(&x->km.all);
 		hlist_del(&x->bydst);
 		hlist_del(&x->bysrc);
 		if (x->id.spi)
@@ -871,7 +854,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 
 		if (km_query(x, tmpl, pol) == 0) {
 			x->km.state = XFRM_STATE_ACQ;
-			list_add_tail(&x->all, &xfrm_state_all);
+			list_add(&x->km.all, &xfrm_state_all);
 			hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 			h = xfrm_src_hash(daddr, saddr, family);
 			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
@@ -940,7 +923,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 
 	x->genid = ++xfrm_state_genid;
 
-	list_add_tail(&x->all, &xfrm_state_all);
+	list_add(&x->km.all, &xfrm_state_all);
 
 	h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
 			  x->props.reqid, x->props.family);
@@ -1069,7 +1052,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 		xfrm_state_hold(x);
 		x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
 		add_timer(&x->timer);
-		list_add_tail(&x->all, &xfrm_state_all);
+		list_add(&x->km.all, &xfrm_state_all);
 		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 		h = xfrm_src_hash(daddr, saddr, family);
 		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
@@ -1566,79 +1549,59 @@ int xfrm_state_walk(struct xfrm_state_walk *walk,
 		    int (*func)(struct xfrm_state *, int, void*),
 		    void *data)
 {
-	struct xfrm_state *old, *x, *last = NULL;
+	struct xfrm_state *state;
+	struct xfrm_state_walk *x;
 	int err = 0;
 
-	if (walk->state == NULL && walk->count != 0)
+	if (walk->seq != 0 && list_empty(&walk->all))
 		return 0;
 
-	old = x = walk->state;
-	walk->state = NULL;
 	spin_lock_bh(&xfrm_state_lock);
-	if (x == NULL)
-		x = list_first_entry(&xfrm_state_all, struct xfrm_state, all);
+	if (list_empty(&walk->all))
+		x = list_first_entry(&xfrm_state_all, struct xfrm_state_walk, all);
+	else
+		x = list_entry(&walk->all, struct xfrm_state_walk, all);
 	list_for_each_entry_from(x, &xfrm_state_all, all) {
-		if (x->km.state == XFRM_STATE_DEAD)
+		if (x->state == XFRM_STATE_DEAD)
 			continue;
-		if (!xfrm_id_proto_match(x->id.proto, walk->proto))
+		state = container_of(x, struct xfrm_state, km);
+		if (!xfrm_id_proto_match(state->id.proto, walk->proto))
 			continue;
-		if (last) {
-			err = func(last, walk->count, data);
-			if (err) {
-				xfrm_state_hold(last);
-				walk->state = last;
-				goto out;
-			}
+		err = func(state, walk->seq, data);
+		if (err) {
+			list_move_tail(&walk->all, &x->all);
+			goto out;
 		}
-		last = x;
-		walk->count++;
+		walk->seq++;
 	}
-	if (walk->count == 0) {
+	if (walk->seq == 0) {
 		err = -ENOENT;
 		goto out;
 	}
-	if (last)
-		err = func(last, 0, data);
+	list_del_init(&walk->all);
 out:
 	spin_unlock_bh(&xfrm_state_lock);
-	if (old != NULL)
-		xfrm_state_put(old);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_walk);
 
 void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
 {
+	INIT_LIST_HEAD(&walk->all);
 	walk->proto = proto;
-	walk->state = NULL;
-	walk->count = 0;
-	list_add_tail(&walk->list, &xfrm_state_walks);
-	walk->genid = ++xfrm_state_walk_ongoing;
+	walk->state = XFRM_STATE_DEAD;
+	walk->seq = 0;
 }
 EXPORT_SYMBOL(xfrm_state_walk_init);
 
 void xfrm_state_walk_done(struct xfrm_state_walk *walk)
 {
-	struct list_head *prev;
-
-	if (walk->state != NULL) {
-		xfrm_state_put(walk->state);
-		walk->state = NULL;
-	}
-
-	prev = walk->list.prev;
-	list_del(&walk->list);
-
-	if (prev != &xfrm_state_walks) {
-		list_entry(prev, struct xfrm_state_walk, list)->genid =
-			walk->genid;
+	if (list_empty(&walk->all))
 		return;
-	}
-
-	xfrm_state_walk_completed = walk->genid;
 
-	if (!list_empty(&xfrm_state_gc_leftovers))
-		schedule_work(&xfrm_state_gc_work);
+	spin_lock_bh(&xfrm_state_lock);
+	list_del(&walk->all);
+	spin_lock_bh(&xfrm_state_lock);
 }
 EXPORT_SYMBOL(xfrm_state_walk_done);
 

  reply	other threads:[~2008-09-24 13:18 UTC|newest]

Thread overview: 95+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-03  2:51 xfrm_state locking regression David Miller
2008-09-03  3:00 ` David Miller
2008-09-03  5:01 ` Herbert Xu
2008-09-03  5:07   ` Timo Teräs
2008-09-03  5:23     ` Herbert Xu
2008-09-03  5:39       ` Timo Teräs
2008-09-03  5:40         ` Herbert Xu
2008-09-09 12:25       ` David Miller
2008-09-03  5:39     ` Herbert Xu
2008-09-03  5:45       ` Timo Teräs
2008-09-03  5:50         ` Herbert Xu
2008-09-03  6:14           ` David Miller
2008-09-03  6:27             ` Timo Teräs
2008-09-03  6:35               ` David Miller
2008-09-03  6:45                 ` Timo Teräs
2008-09-03  6:47                   ` David Miller
2008-09-03  7:14                     ` Timo Teräs
2008-09-05 11:55                     ` Herbert Xu
2008-09-09  0:09                       ` David Miller
2008-09-09  0:18                         ` Herbert Xu
2008-09-09  0:20                           ` David Miller
2008-09-09  0:25                       ` David Miller
2008-09-09 14:33                         ` Herbert Xu
2008-09-09 20:20                           ` David Miller
2008-09-10  3:01                           ` David Miller
2008-09-11 21:24                           ` Paul E. McKenney
2008-09-11 22:00                             ` David Miller
2008-09-11 23:22                               ` Paul E. McKenney
2008-09-12 16:08                               ` Herbert Xu
2008-09-12 17:37                                 ` Paul E. McKenney
2008-09-21 12:29                           ` Timo Teräs
2008-09-21 15:21                             ` Timo Teräs
2008-09-22 11:42                               ` Herbert Xu
2008-09-22 13:01                                 ` Timo Teräs
2008-09-22 23:50                                   ` Herbert Xu
2008-09-23  4:53                                     ` Timo Teräs
2008-09-23  4:59                                       ` Herbert Xu
2008-09-23  5:17                                         ` Timo Teräs
2008-09-23  5:22                                           ` Herbert Xu
2008-09-23  6:25                                             ` Timo Teräs
2008-09-23  6:47                                               ` Herbert Xu
2008-09-23  6:56                                                 ` Timo Teräs
2008-09-23  9:39                                                 ` Timo Teräs
2008-09-23 11:24                                                   ` Herbert Xu
2008-09-23 12:08                                                     ` Timo Teräs
2008-09-23 12:14                                                       ` Herbert Xu
2008-09-23 12:25                                                         ` Timo Teräs
2008-09-23 12:56                                                           ` Herbert Xu
2008-09-23 13:01                                                             ` Timo Teräs
2008-09-23 13:07                                                               ` Herbert Xu
2008-09-23 13:30                                                                 ` Timo Teräs
2008-09-23 13:32                                                                   ` Herbert Xu
2008-09-23 13:46                                                                     ` Timo Teräs
2008-09-24  4:23                                                                       ` Herbert Xu
2008-09-24  5:14                                                                         ` Timo Teräs
2008-09-24  5:15                                                                           ` Herbert Xu
2008-09-24  5:46                                                                             ` Timo Teräs
2008-09-24  5:55                                                                               ` Herbert Xu
2008-09-24  6:04                                                                                 ` Timo Teräs
2008-09-24  6:13                                                                                   ` Herbert Xu
2008-09-24  6:20                                                                                     ` Timo Teräs
2008-09-24  6:21                                                                                       ` Herbert Xu
2008-09-24  7:29                                                                                         ` Timo Teräs
2008-09-24  7:54                                                                                           ` Herbert Xu
2008-09-24 13:18                                                                                             ` Timo Teräs [this message]
2008-09-24 14:08                                                                                               ` Herbert Xu
2008-09-25  6:03                                                                                                 ` Timo Teräs
2008-09-25  7:57                                                                                                   ` Herbert Xu
2008-09-25  8:42                                                                                                     ` Timo Teräs
2008-09-25  8:56                                                                                                       ` Herbert Xu
2008-09-25  9:01                                                                                                         ` Timo Teräs
2008-09-25  9:49                                                                                                           ` Herbert Xu
2008-09-25 12:12                                                                                                             ` Timo Teräs
2008-09-25 12:36                                                                                                               ` Timo Teräs
2008-09-26  2:08                                                                                                                 ` Herbert Xu
2008-10-01 10:07                                                                                                                 ` David Miller
2008-10-01 14:05                                                                                                                   ` Herbert Xu
2008-09-23  2:48                                 ` David Miller
2008-09-10  3:04           ` David Miller
2008-09-10  3:15             ` Herbert Xu
2008-09-10  3:22               ` David Miller
2008-09-10  3:23                 ` Herbert Xu
2008-09-10  3:38                   ` David Miller
2008-09-10  4:01                     ` Herbert Xu
2008-09-10  4:06                       ` David Miller
2008-09-10  4:22                         ` Herbert Xu
2008-09-10  4:24                           ` David Miller
2008-09-10  4:48                             ` David Miller
2008-09-10  4:52                               ` David Miller
2008-09-10  4:53                               ` Herbert Xu
2008-09-10  5:21                                 ` David Miller
2008-09-10  5:16                         ` Timo Teräs
2008-09-10  5:23                           ` David Miller
2008-09-10  5:46                             ` Herbert Xu
2008-09-03  6:10     ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48DA3E2D.4070909@iki.fi \
    --to=timo.teras@iki.fi \
    --cc=davem@davemloft.net \
    --cc=hadi@cyberus.ca \
    --cc=herbert@gondor.apana.org.au \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).