netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Timo Teräs" <timo.teras@iki.fi>
To: Herbert Xu <herbert@gondor.apana.org.au>
Cc: David Miller <davem@davemloft.net>, netdev@vger.kernel.org
Subject: Re: xfrm_state locking regression...
Date: Tue, 23 Sep 2008 09:25:16 +0300	[thread overview]
Message-ID: <48D88BCC.5030806@iki.fi> (raw)
In-Reply-To: <20080923052239.GA26233@gondor.apana.org.au>

Herbert Xu wrote:
> On Tue, Sep 23, 2008 at 08:17:14AM +0300, Timo Teräs wrote:
>> The extra step there wold be a hold call. The recursive _put on a
>> _put of some entry can happen only on dump path. As otherwise the
>> ->next entry is first held in state delete, but would be immediately
>> _put on the _put as the final step of _delete().
>>
>> So basically one additional atomic_inc() and one atomic_dec() on the
>> normal _delete() path.
> 
> Can you post a patch? If this can be done locklessly then yes
> it would probably be a good way to go.  However, I'm not sure
> whether I understand your lockless proposal yet.

Something like this. I just compile tested, so I'm not sure if it
actually works.

This basically reverts the GC changes. The interesting bits are in
xfrm_state_delete(). It will _hold() the ->next entry unless we
were at last entry. This will make sure that when an entry is
in XFRM_STATE_DEAD the all.next is valid all the way until the
entry is destroyed.

The corresponding put is in _destroy(). Added it as a final thing
to do so hopefully compiler optimizes tail recursion to iteration.
(Seemed to do that in my case.)

And finally, _walk() was right all along. It returns to dumping
and first skips all dead entries. And just before return, when
all dead entries are already skipped the originally held entry
is _put(). That _put call (or the one in _walk_done()) will result
all dead entries that were held, to be iteratively put.

- Timo

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index cbba776..9ff1b54 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -220,7 +220,7 @@ struct netlink_callback
 	int		(*dump)(struct sk_buff * skb, struct netlink_callback *cb);
 	int		(*done)(struct netlink_callback *cb);
 	int		family;
-	long		args[7];
+	long		args[6];
 };
 
 struct netlink_notify
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 48630b2..d5cba7b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -122,7 +122,7 @@ struct xfrm_state
 {
 	struct list_head	all;
 	union {
-		struct list_head	gclist;
+		struct hlist_node	gclist;
 		struct hlist_node	bydst;
 	};
 	struct hlist_node	bysrc;
@@ -1246,8 +1246,6 @@ struct xfrm6_tunnel {
 };
 
 struct xfrm_state_walk {
-	struct list_head list;
-	unsigned long genid;
 	struct xfrm_state *state;
 	int count;
 	u8 proto;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 053970e..f45f006 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -59,14 +59,6 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static unsigned int xfrm_state_num;
 static unsigned int xfrm_state_genid;
 
-/* Counter indicating ongoing walk, protected by xfrm_state_lock. */
-static unsigned long xfrm_state_walk_ongoing;
-/* Counter indicating walk completion, protected by xfrm_cfg_mutex. */
-static unsigned long xfrm_state_walk_completed;
-
-/* List of outstanding state walks used to set the completed counter.  */
-static LIST_HEAD(xfrm_state_walks);
-
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
 
@@ -199,8 +191,7 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
 
 static struct work_struct xfrm_state_gc_work;
-static LIST_HEAD(xfrm_state_gc_leftovers);
-static LIST_HEAD(xfrm_state_gc_list);
+static HLIST_HEAD(xfrm_state_gc_list);
 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
 int __xfrm_state_delete(struct xfrm_state *x);
@@ -412,23 +403,17 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 
 static void xfrm_state_gc_task(struct work_struct *data)
 {
-	struct xfrm_state *x, *tmp;
-	unsigned long completed;
+	struct xfrm_state *x;
+	struct hlist_node *entry, *tmp;
+	struct hlist_head gc_list;
 
-	mutex_lock(&xfrm_cfg_mutex);
 	spin_lock_bh(&xfrm_state_gc_lock);
-	list_splice_tail_init(&xfrm_state_gc_list, &xfrm_state_gc_leftovers);
+	gc_list.first = xfrm_state_gc_list.first;
+	INIT_HLIST_HEAD(&xfrm_state_gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 
-	completed = xfrm_state_walk_completed;
-	mutex_unlock(&xfrm_cfg_mutex);
-
-	list_for_each_entry_safe(x, tmp, &xfrm_state_gc_leftovers, gclist) {
-		if ((long)(x->lastused - completed) > 0)
-			break;
-		list_del(&x->gclist);
+	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist)
 		xfrm_state_gc_destroy(x);
-	}
 
 	wake_up(&km_waitq);
 }
@@ -553,12 +538,19 @@ EXPORT_SYMBOL(xfrm_state_alloc);
 
 void __xfrm_state_destroy(struct xfrm_state *x)
 {
+	struct xfrm_state *next = NULL;
+
 	WARN_ON(x->km.state != XFRM_STATE_DEAD);
 
+	if (x->all.next != &xfrm_state_all)
+		next = container_of(x->all.next, struct xfrm_state, all);
 	spin_lock_bh(&xfrm_state_gc_lock);
-	list_add_tail(&x->gclist, &xfrm_state_gc_list);
+	hlist_add_head(&x->gclist, &xfrm_state_gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 	schedule_work(&xfrm_state_gc_work);
+
+	if (next != NULL)
+		xfrm_state_put(next);
 }
 EXPORT_SYMBOL(__xfrm_state_destroy);
 
@@ -569,8 +561,10 @@ int __xfrm_state_delete(struct xfrm_state *x)
 	if (x->km.state != XFRM_STATE_DEAD) {
 		x->km.state = XFRM_STATE_DEAD;
 		spin_lock(&xfrm_state_lock);
-		x->lastused = xfrm_state_walk_ongoing;
 		list_del_rcu(&x->all);
+		if (x->all.next != &xfrm_state_all)
+			xfrm_state_hold(container_of(x->all.next,
+						     struct xfrm_state, all));
 		hlist_del(&x->bydst);
 		hlist_del(&x->bysrc);
 		if (x->id.spi)
@@ -1612,33 +1606,15 @@ void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
 	walk->proto = proto;
 	walk->state = NULL;
 	walk->count = 0;
-	list_add_tail(&walk->list, &xfrm_state_walks);
-	walk->genid = ++xfrm_state_walk_ongoing;
 }
 EXPORT_SYMBOL(xfrm_state_walk_init);
 
 void xfrm_state_walk_done(struct xfrm_state_walk *walk)
 {
-	struct list_head *prev;
-
 	if (walk->state != NULL) {
 		xfrm_state_put(walk->state);
 		walk->state = NULL;
 	}
-
-	prev = walk->list.prev;
-	list_del(&walk->list);
-
-	if (prev != &xfrm_state_walks) {
-		list_entry(prev, struct xfrm_state_walk, list)->genid =
-			walk->genid;
-		return;
-	}
-
-	xfrm_state_walk_completed = walk->genid;
-
-	if (!list_empty(&xfrm_state_gc_leftovers))
-		schedule_work(&xfrm_state_gc_work);
 }
 EXPORT_SYMBOL(xfrm_state_walk_done);
 

  reply	other threads:[~2008-09-23  6:25 UTC|newest]

Thread overview: 95+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-03  2:51 xfrm_state locking regression David Miller
2008-09-03  3:00 ` David Miller
2008-09-03  5:01 ` Herbert Xu
2008-09-03  5:07   ` Timo Teräs
2008-09-03  5:23     ` Herbert Xu
2008-09-03  5:39       ` Timo Teräs
2008-09-03  5:40         ` Herbert Xu
2008-09-09 12:25       ` David Miller
2008-09-03  5:39     ` Herbert Xu
2008-09-03  5:45       ` Timo Teräs
2008-09-03  5:50         ` Herbert Xu
2008-09-03  6:14           ` David Miller
2008-09-03  6:27             ` Timo Teräs
2008-09-03  6:35               ` David Miller
2008-09-03  6:45                 ` Timo Teräs
2008-09-03  6:47                   ` David Miller
2008-09-03  7:14                     ` Timo Teräs
2008-09-05 11:55                     ` Herbert Xu
2008-09-09  0:09                       ` David Miller
2008-09-09  0:18                         ` Herbert Xu
2008-09-09  0:20                           ` David Miller
2008-09-09  0:25                       ` David Miller
2008-09-09 14:33                         ` Herbert Xu
2008-09-09 20:20                           ` David Miller
2008-09-10  3:01                           ` David Miller
2008-09-11 21:24                           ` Paul E. McKenney
2008-09-11 22:00                             ` David Miller
2008-09-11 23:22                               ` Paul E. McKenney
2008-09-12 16:08                               ` Herbert Xu
2008-09-12 17:37                                 ` Paul E. McKenney
2008-09-21 12:29                           ` Timo Teräs
2008-09-21 15:21                             ` Timo Teräs
2008-09-22 11:42                               ` Herbert Xu
2008-09-22 13:01                                 ` Timo Teräs
2008-09-22 23:50                                   ` Herbert Xu
2008-09-23  4:53                                     ` Timo Teräs
2008-09-23  4:59                                       ` Herbert Xu
2008-09-23  5:17                                         ` Timo Teräs
2008-09-23  5:22                                           ` Herbert Xu
2008-09-23  6:25                                             ` Timo Teräs [this message]
2008-09-23  6:47                                               ` Herbert Xu
2008-09-23  6:56                                                 ` Timo Teräs
2008-09-23  9:39                                                 ` Timo Teräs
2008-09-23 11:24                                                   ` Herbert Xu
2008-09-23 12:08                                                     ` Timo Teräs
2008-09-23 12:14                                                       ` Herbert Xu
2008-09-23 12:25                                                         ` Timo Teräs
2008-09-23 12:56                                                           ` Herbert Xu
2008-09-23 13:01                                                             ` Timo Teräs
2008-09-23 13:07                                                               ` Herbert Xu
2008-09-23 13:30                                                                 ` Timo Teräs
2008-09-23 13:32                                                                   ` Herbert Xu
2008-09-23 13:46                                                                     ` Timo Teräs
2008-09-24  4:23                                                                       ` Herbert Xu
2008-09-24  5:14                                                                         ` Timo Teräs
2008-09-24  5:15                                                                           ` Herbert Xu
2008-09-24  5:46                                                                             ` Timo Teräs
2008-09-24  5:55                                                                               ` Herbert Xu
2008-09-24  6:04                                                                                 ` Timo Teräs
2008-09-24  6:13                                                                                   ` Herbert Xu
2008-09-24  6:20                                                                                     ` Timo Teräs
2008-09-24  6:21                                                                                       ` Herbert Xu
2008-09-24  7:29                                                                                         ` Timo Teräs
2008-09-24  7:54                                                                                           ` Herbert Xu
2008-09-24 13:18                                                                                             ` Timo Teräs
2008-09-24 14:08                                                                                               ` Herbert Xu
2008-09-25  6:03                                                                                                 ` Timo Teräs
2008-09-25  7:57                                                                                                   ` Herbert Xu
2008-09-25  8:42                                                                                                     ` Timo Teräs
2008-09-25  8:56                                                                                                       ` Herbert Xu
2008-09-25  9:01                                                                                                         ` Timo Teräs
2008-09-25  9:49                                                                                                           ` Herbert Xu
2008-09-25 12:12                                                                                                             ` Timo Teräs
2008-09-25 12:36                                                                                                               ` Timo Teräs
2008-09-26  2:08                                                                                                                 ` Herbert Xu
2008-10-01 10:07                                                                                                                 ` David Miller
2008-10-01 14:05                                                                                                                   ` Herbert Xu
2008-09-23  2:48                                 ` David Miller
2008-09-10  3:04           ` David Miller
2008-09-10  3:15             ` Herbert Xu
2008-09-10  3:22               ` David Miller
2008-09-10  3:23                 ` Herbert Xu
2008-09-10  3:38                   ` David Miller
2008-09-10  4:01                     ` Herbert Xu
2008-09-10  4:06                       ` David Miller
2008-09-10  4:22                         ` Herbert Xu
2008-09-10  4:24                           ` David Miller
2008-09-10  4:48                             ` David Miller
2008-09-10  4:52                               ` David Miller
2008-09-10  4:53                               ` Herbert Xu
2008-09-10  5:21                                 ` David Miller
2008-09-10  5:16                         ` Timo Teräs
2008-09-10  5:23                           ` David Miller
2008-09-10  5:46                             ` Herbert Xu
2008-09-03  6:10     ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48D88BCC.5030806@iki.fi \
    --to=timo.teras@iki.fi \
    --cc=davem@davemloft.net \
    --cc=herbert@gondor.apana.org.au \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).