From: "Timo Teräs" <timo.teras@iki.fi>
To: Herbert Xu <herbert@gondor.apana.org.au>
Cc: David Miller <davem@davemloft.net>, netdev@vger.kernel.org
Subject: Re: xfrm_state locking regression...
Date: Tue, 23 Sep 2008 09:25:16 +0300 [thread overview]
Message-ID: <48D88BCC.5030806@iki.fi> (raw)
In-Reply-To: <20080923052239.GA26233@gondor.apana.org.au>
Herbert Xu wrote:
> On Tue, Sep 23, 2008 at 08:17:14AM +0300, Timo Teräs wrote:
>> The extra step there wold be a hold call. The recursive _put on a
>> _put of some entry can happen only on dump path. As otherwise the
>> ->next entry is first held in state delete, but would be immediately
>> _put on the _put as the final step of _delete().
>>
>> So basically one additional atomic_inc() and one atomic_dec() on the
>> normal _delete() path.
>
> Can you post a patch? If this can be done locklessly then yes
> it would probably be a good way to go. However, I'm not sure
> whether I understand your lockless proposal yet.
Something like this. I just compile tested, so I'm not sure if it
actually works.
This basically reverts the GC changes. The interesting bits are in
xfrm_state_delete(). It will _hold() the ->next entry unless we
were at last entry. This will make sure that when an entry is
in XFRM_STATE_DEAD the all.next is valid all the way until the
entry is destroyed.
The corresponding put is in _destroy(). Added it as a final thing
to do so hopefully compiler optimizes tail recursion to iteration.
(Seemed to do that in my case.)
And finally, _walk() was right all along. It returns to dumping
and first skips all dead entries. And just before return, when
all dead entries are already skipped the originally held entry
is _put(). That _put call (or the one in _walk_done()) will result
all dead entries that were held, to be iteratively put.
- Timo
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index cbba776..9ff1b54 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -220,7 +220,7 @@ struct netlink_callback
int (*dump)(struct sk_buff * skb, struct netlink_callback *cb);
int (*done)(struct netlink_callback *cb);
int family;
- long args[7];
+ long args[6];
};
struct netlink_notify
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 48630b2..d5cba7b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -122,7 +122,7 @@ struct xfrm_state
{
struct list_head all;
union {
- struct list_head gclist;
+ struct hlist_node gclist;
struct hlist_node bydst;
};
struct hlist_node bysrc;
@@ -1246,8 +1246,6 @@ struct xfrm6_tunnel {
};
struct xfrm_state_walk {
- struct list_head list;
- unsigned long genid;
struct xfrm_state *state;
int count;
u8 proto;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 053970e..f45f006 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -59,14 +59,6 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
static unsigned int xfrm_state_num;
static unsigned int xfrm_state_genid;
-/* Counter indicating ongoing walk, protected by xfrm_state_lock. */
-static unsigned long xfrm_state_walk_ongoing;
-/* Counter indicating walk completion, protected by xfrm_cfg_mutex. */
-static unsigned long xfrm_state_walk_completed;
-
-/* List of outstanding state walks used to set the completed counter. */
-static LIST_HEAD(xfrm_state_walks);
-
static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -199,8 +191,7 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
static struct work_struct xfrm_state_gc_work;
-static LIST_HEAD(xfrm_state_gc_leftovers);
-static LIST_HEAD(xfrm_state_gc_list);
+static HLIST_HEAD(xfrm_state_gc_list);
static DEFINE_SPINLOCK(xfrm_state_gc_lock);
int __xfrm_state_delete(struct xfrm_state *x);
@@ -412,23 +403,17 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
static void xfrm_state_gc_task(struct work_struct *data)
{
- struct xfrm_state *x, *tmp;
- unsigned long completed;
+ struct xfrm_state *x;
+ struct hlist_node *entry, *tmp;
+ struct hlist_head gc_list;
- mutex_lock(&xfrm_cfg_mutex);
spin_lock_bh(&xfrm_state_gc_lock);
- list_splice_tail_init(&xfrm_state_gc_list, &xfrm_state_gc_leftovers);
+ gc_list.first = xfrm_state_gc_list.first;
+ INIT_HLIST_HEAD(&xfrm_state_gc_list);
spin_unlock_bh(&xfrm_state_gc_lock);
- completed = xfrm_state_walk_completed;
- mutex_unlock(&xfrm_cfg_mutex);
-
- list_for_each_entry_safe(x, tmp, &xfrm_state_gc_leftovers, gclist) {
- if ((long)(x->lastused - completed) > 0)
- break;
- list_del(&x->gclist);
+ hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist)
xfrm_state_gc_destroy(x);
- }
wake_up(&km_waitq);
}
@@ -553,12 +538,19 @@ EXPORT_SYMBOL(xfrm_state_alloc);
void __xfrm_state_destroy(struct xfrm_state *x)
{
+ struct xfrm_state *next = NULL;
+
WARN_ON(x->km.state != XFRM_STATE_DEAD);
+ if (x->all.next != &xfrm_state_all)
+ next = container_of(x->all.next, struct xfrm_state, all);
spin_lock_bh(&xfrm_state_gc_lock);
- list_add_tail(&x->gclist, &xfrm_state_gc_list);
+ hlist_add_head(&x->gclist, &xfrm_state_gc_list);
spin_unlock_bh(&xfrm_state_gc_lock);
schedule_work(&xfrm_state_gc_work);
+
+ if (next != NULL)
+ xfrm_state_put(next);
}
EXPORT_SYMBOL(__xfrm_state_destroy);
@@ -569,8 +561,10 @@ int __xfrm_state_delete(struct xfrm_state *x)
if (x->km.state != XFRM_STATE_DEAD) {
x->km.state = XFRM_STATE_DEAD;
spin_lock(&xfrm_state_lock);
- x->lastused = xfrm_state_walk_ongoing;
list_del_rcu(&x->all);
+ if (x->all.next != &xfrm_state_all)
+ xfrm_state_hold(container_of(x->all.next,
+ struct xfrm_state, all));
hlist_del(&x->bydst);
hlist_del(&x->bysrc);
if (x->id.spi)
@@ -1612,33 +1606,15 @@ void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
walk->proto = proto;
walk->state = NULL;
walk->count = 0;
- list_add_tail(&walk->list, &xfrm_state_walks);
- walk->genid = ++xfrm_state_walk_ongoing;
}
EXPORT_SYMBOL(xfrm_state_walk_init);
void xfrm_state_walk_done(struct xfrm_state_walk *walk)
{
- struct list_head *prev;
-
if (walk->state != NULL) {
xfrm_state_put(walk->state);
walk->state = NULL;
}
-
- prev = walk->list.prev;
- list_del(&walk->list);
-
- if (prev != &xfrm_state_walks) {
- list_entry(prev, struct xfrm_state_walk, list)->genid =
- walk->genid;
- return;
- }
-
- xfrm_state_walk_completed = walk->genid;
-
- if (!list_empty(&xfrm_state_gc_leftovers))
- schedule_work(&xfrm_state_gc_work);
}
EXPORT_SYMBOL(xfrm_state_walk_done);
next prev parent reply other threads:[~2008-09-23 6:25 UTC|newest]
Thread overview: 95+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-09-03 2:51 xfrm_state locking regression David Miller
2008-09-03 3:00 ` David Miller
2008-09-03 5:01 ` Herbert Xu
2008-09-03 5:07 ` Timo Teräs
2008-09-03 5:23 ` Herbert Xu
2008-09-03 5:39 ` Timo Teräs
2008-09-03 5:40 ` Herbert Xu
2008-09-09 12:25 ` David Miller
2008-09-03 5:39 ` Herbert Xu
2008-09-03 5:45 ` Timo Teräs
2008-09-03 5:50 ` Herbert Xu
2008-09-03 6:14 ` David Miller
2008-09-03 6:27 ` Timo Teräs
2008-09-03 6:35 ` David Miller
2008-09-03 6:45 ` Timo Teräs
2008-09-03 6:47 ` David Miller
2008-09-03 7:14 ` Timo Teräs
2008-09-05 11:55 ` Herbert Xu
2008-09-09 0:09 ` David Miller
2008-09-09 0:18 ` Herbert Xu
2008-09-09 0:20 ` David Miller
2008-09-09 0:25 ` David Miller
2008-09-09 14:33 ` Herbert Xu
2008-09-09 20:20 ` David Miller
2008-09-10 3:01 ` David Miller
2008-09-11 21:24 ` Paul E. McKenney
2008-09-11 22:00 ` David Miller
2008-09-11 23:22 ` Paul E. McKenney
2008-09-12 16:08 ` Herbert Xu
2008-09-12 17:37 ` Paul E. McKenney
2008-09-21 12:29 ` Timo Teräs
2008-09-21 15:21 ` Timo Teräs
2008-09-22 11:42 ` Herbert Xu
2008-09-22 13:01 ` Timo Teräs
2008-09-22 23:50 ` Herbert Xu
2008-09-23 4:53 ` Timo Teräs
2008-09-23 4:59 ` Herbert Xu
2008-09-23 5:17 ` Timo Teräs
2008-09-23 5:22 ` Herbert Xu
2008-09-23 6:25 ` Timo Teräs [this message]
2008-09-23 6:47 ` Herbert Xu
2008-09-23 6:56 ` Timo Teräs
2008-09-23 9:39 ` Timo Teräs
2008-09-23 11:24 ` Herbert Xu
2008-09-23 12:08 ` Timo Teräs
2008-09-23 12:14 ` Herbert Xu
2008-09-23 12:25 ` Timo Teräs
2008-09-23 12:56 ` Herbert Xu
2008-09-23 13:01 ` Timo Teräs
2008-09-23 13:07 ` Herbert Xu
2008-09-23 13:30 ` Timo Teräs
2008-09-23 13:32 ` Herbert Xu
2008-09-23 13:46 ` Timo Teräs
2008-09-24 4:23 ` Herbert Xu
2008-09-24 5:14 ` Timo Teräs
2008-09-24 5:15 ` Herbert Xu
2008-09-24 5:46 ` Timo Teräs
2008-09-24 5:55 ` Herbert Xu
2008-09-24 6:04 ` Timo Teräs
2008-09-24 6:13 ` Herbert Xu
2008-09-24 6:20 ` Timo Teräs
2008-09-24 6:21 ` Herbert Xu
2008-09-24 7:29 ` Timo Teräs
2008-09-24 7:54 ` Herbert Xu
2008-09-24 13:18 ` Timo Teräs
2008-09-24 14:08 ` Herbert Xu
2008-09-25 6:03 ` Timo Teräs
2008-09-25 7:57 ` Herbert Xu
2008-09-25 8:42 ` Timo Teräs
2008-09-25 8:56 ` Herbert Xu
2008-09-25 9:01 ` Timo Teräs
2008-09-25 9:49 ` Herbert Xu
2008-09-25 12:12 ` Timo Teräs
2008-09-25 12:36 ` Timo Teräs
2008-09-26 2:08 ` Herbert Xu
2008-10-01 10:07 ` David Miller
2008-10-01 14:05 ` Herbert Xu
2008-09-23 2:48 ` David Miller
2008-09-10 3:04 ` David Miller
2008-09-10 3:15 ` Herbert Xu
2008-09-10 3:22 ` David Miller
2008-09-10 3:23 ` Herbert Xu
2008-09-10 3:38 ` David Miller
2008-09-10 4:01 ` Herbert Xu
2008-09-10 4:06 ` David Miller
2008-09-10 4:22 ` Herbert Xu
2008-09-10 4:24 ` David Miller
2008-09-10 4:48 ` David Miller
2008-09-10 4:52 ` David Miller
2008-09-10 4:53 ` Herbert Xu
2008-09-10 5:21 ` David Miller
2008-09-10 5:16 ` Timo Teräs
2008-09-10 5:23 ` David Miller
2008-09-10 5:46 ` Herbert Xu
2008-09-03 6:10 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=48D88BCC.5030806@iki.fi \
--to=timo.teras@iki.fi \
--cc=davem@davemloft.net \
--cc=herbert@gondor.apana.org.au \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.