From: David Miller <davem@davemloft.net>
To: herbert@gondor.apana.org.au
Cc: timo.teras@iki.fi, netdev@vger.kernel.org
Subject: Re: xfrm_state locking regression...
Date: Tue, 09 Sep 2008 21:06:54 -0700 (PDT) [thread overview]
Message-ID: <20080909.210654.159776216.davem@davemloft.net> (raw)
In-Reply-To: <20080910040107.GA29695@gondor.apana.org.au>
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 10 Sep 2008 14:01:07 +1000
> On Tue, Sep 09, 2008 at 08:38:08PM -0700, David Miller wrote:
> >
> > No problem. It might be a little bit of a chore because this new
> > walker design is intimately tied to the af_key non-atomic dump
> > changes.
>
> Actually I was mistaken as to how the original dump worked. I'd
> thought that it actually kept track of which bucket it was in and
> resumed from that bucket. However in reality it only had a global
> counter and would always start walking from the beginning up until
> the counted value. So it isn't as easy as just copying the old
> code across :)
Only AF_KEY gave an error, and this ended the dump. This was
one of Timo's goals, to make AF_KEY continue where it left
off in subsequent dump calls done by the user, when we hit the
socket limit.
> While it is possible to restore the old order and save a little
> bit of memory, I certainly don't think this is very urgent.
Too late, I already implemented it :-)
I'm about to give the following patch a test:
ipsec: Restore hash based xfrm_state dumping.
Get rid of ->all member of struct xfrm_state, and just use a hash
iteration like we used before.
This shrinks the size of struct xfrm_state, and also restores the
dump ordering of 2.6.25 and previous.
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 4bb9499..ba0e47c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -120,7 +120,6 @@ extern struct mutex xfrm_cfg_mutex;
/* Full description of state of transformer. */
struct xfrm_state
{
- struct list_head all;
union {
struct list_head gclist;
struct hlist_node bydst;
@@ -1247,6 +1246,7 @@ struct xfrm6_tunnel {
struct xfrm_state_walk {
struct xfrm_state *state;
+ unsigned int chain;
int count;
u8 proto;
};
@@ -1283,9 +1283,10 @@ extern int xfrm_proc_init(void);
static inline void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
{
- walk->proto = proto;
walk->state = NULL;
+ walk->chain = 0;
walk->count = 0;
+ walk->proto = proto;
}
extern int xfrm_state_walk(struct xfrm_state_walk *walk,
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index aaafcee..ccf8492 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -50,7 +50,6 @@ static DEFINE_SPINLOCK(xfrm_state_lock);
* Main use is finding SA after policy selected tunnel or transport mode.
* Also, it can be used by ah/esp icmp error handler to find offending SA.
*/
-static LIST_HEAD(xfrm_state_all);
static struct hlist_head *xfrm_state_bydst __read_mostly;
static struct hlist_head *xfrm_state_bysrc __read_mostly;
static struct hlist_head *xfrm_state_byspi __read_mostly;
@@ -525,7 +524,6 @@ struct xfrm_state *xfrm_state_alloc(void)
if (x) {
atomic_set(&x->refcnt, 1);
atomic_set(&x->tunnel_users, 0);
- INIT_LIST_HEAD(&x->all);
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
@@ -566,7 +564,6 @@ int __xfrm_state_delete(struct xfrm_state *x)
x->km.state = XFRM_STATE_DEAD;
spin_lock(&xfrm_state_lock);
x->lastused = xfrm_state_walk_ongoing;
- list_del_rcu(&x->all);
hlist_del(&x->bydst);
hlist_del(&x->bysrc);
if (x->id.spi)
@@ -867,7 +864,6 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
if (km_query(x, tmpl, pol) == 0) {
x->km.state = XFRM_STATE_ACQ;
- list_add_tail(&x->all, &xfrm_state_all);
hlist_add_head(&x->bydst, xfrm_state_bydst+h);
h = xfrm_src_hash(daddr, saddr, family);
hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
@@ -936,8 +932,6 @@ static void __xfrm_state_insert(struct xfrm_state *x)
x->genid = ++xfrm_state_genid;
- list_add_tail(&x->all, &xfrm_state_all);
-
h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
x->props.reqid, x->props.family);
hlist_add_head(&x->bydst, xfrm_state_bydst+h);
@@ -1065,7 +1059,6 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
xfrm_state_hold(x);
x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
add_timer(&x->timer);
- list_add_tail(&x->all, &xfrm_state_all);
hlist_add_head(&x->bydst, xfrm_state_bydst+h);
h = xfrm_src_hash(daddr, saddr, family);
hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
@@ -1563,7 +1556,7 @@ int xfrm_state_walk(struct xfrm_state_walk *walk,
void *data)
{
struct xfrm_state *old, *x, *last = NULL;
- int err = 0;
+ int i, err = 0;
if (walk->state == NULL && walk->count != 0)
return 0;
@@ -1571,24 +1564,29 @@ int xfrm_state_walk(struct xfrm_state_walk *walk,
old = x = walk->state;
walk->state = NULL;
spin_lock_bh(&xfrm_state_lock);
- if (x == NULL)
- x = list_first_entry(&xfrm_state_all, struct xfrm_state, all);
- list_for_each_entry_from(x, &xfrm_state_all, all) {
- if (x->km.state == XFRM_STATE_DEAD)
- continue;
- if (!xfrm_id_proto_match(x->id.proto, walk->proto))
- continue;
- if (last) {
- err = func(last, walk->count, data);
- if (err) {
- xfrm_state_hold(last);
- walk->state = last;
- xfrm_state_walk_ongoing++;
- goto out;
+ for (i = walk->chain; i <= xfrm_state_hmask; i++) {
+ struct hlist_head *h = xfrm_state_bydst + i;
+ struct hlist_node *entry;
+
+ if (!x)
+ x = hlist_entry(h->first, struct xfrm_state, bydst);
+ hlist_for_each_entry_from(x, entry, bydst) {
+ if (!xfrm_id_proto_match(x->id.proto, walk->proto))
+ continue;
+ if (last) {
+ err = func(last, walk->count, data);
+ if (err) {
+ xfrm_state_hold(last);
+ walk->state = last;
+ walk->chain = i;
+ xfrm_state_walk_ongoing++;
+ goto out;
+ }
}
+ last = x;
+ walk->count++;
}
- last = x;
- walk->count++;
+ x = NULL;
}
if (walk->count == 0) {
err = -ENOENT;
next prev parent reply other threads:[~2008-09-10 4:07 UTC|newest]
Thread overview: 95+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-09-03 2:51 xfrm_state locking regression David Miller
2008-09-03 3:00 ` David Miller
2008-09-03 5:01 ` Herbert Xu
2008-09-03 5:07 ` Timo Teräs
2008-09-03 5:23 ` Herbert Xu
2008-09-03 5:39 ` Timo Teräs
2008-09-03 5:40 ` Herbert Xu
2008-09-09 12:25 ` David Miller
2008-09-03 5:39 ` Herbert Xu
2008-09-03 5:45 ` Timo Teräs
2008-09-03 5:50 ` Herbert Xu
2008-09-03 6:14 ` David Miller
2008-09-03 6:27 ` Timo Teräs
2008-09-03 6:35 ` David Miller
2008-09-03 6:45 ` Timo Teräs
2008-09-03 6:47 ` David Miller
2008-09-03 7:14 ` Timo Teräs
2008-09-05 11:55 ` Herbert Xu
2008-09-09 0:09 ` David Miller
2008-09-09 0:18 ` Herbert Xu
2008-09-09 0:20 ` David Miller
2008-09-09 0:25 ` David Miller
2008-09-09 14:33 ` Herbert Xu
2008-09-09 20:20 ` David Miller
2008-09-10 3:01 ` David Miller
2008-09-11 21:24 ` Paul E. McKenney
2008-09-11 22:00 ` David Miller
2008-09-11 23:22 ` Paul E. McKenney
2008-09-12 16:08 ` Herbert Xu
2008-09-12 17:37 ` Paul E. McKenney
2008-09-21 12:29 ` Timo Teräs
2008-09-21 15:21 ` Timo Teräs
2008-09-22 11:42 ` Herbert Xu
2008-09-22 13:01 ` Timo Teräs
2008-09-22 23:50 ` Herbert Xu
2008-09-23 4:53 ` Timo Teräs
2008-09-23 4:59 ` Herbert Xu
2008-09-23 5:17 ` Timo Teräs
2008-09-23 5:22 ` Herbert Xu
2008-09-23 6:25 ` Timo Teräs
2008-09-23 6:47 ` Herbert Xu
2008-09-23 6:56 ` Timo Teräs
2008-09-23 9:39 ` Timo Teräs
2008-09-23 11:24 ` Herbert Xu
2008-09-23 12:08 ` Timo Teräs
2008-09-23 12:14 ` Herbert Xu
2008-09-23 12:25 ` Timo Teräs
2008-09-23 12:56 ` Herbert Xu
2008-09-23 13:01 ` Timo Teräs
2008-09-23 13:07 ` Herbert Xu
2008-09-23 13:30 ` Timo Teräs
2008-09-23 13:32 ` Herbert Xu
2008-09-23 13:46 ` Timo Teräs
2008-09-24 4:23 ` Herbert Xu
2008-09-24 5:14 ` Timo Teräs
2008-09-24 5:15 ` Herbert Xu
2008-09-24 5:46 ` Timo Teräs
2008-09-24 5:55 ` Herbert Xu
2008-09-24 6:04 ` Timo Teräs
2008-09-24 6:13 ` Herbert Xu
2008-09-24 6:20 ` Timo Teräs
2008-09-24 6:21 ` Herbert Xu
2008-09-24 7:29 ` Timo Teräs
2008-09-24 7:54 ` Herbert Xu
2008-09-24 13:18 ` Timo Teräs
2008-09-24 14:08 ` Herbert Xu
2008-09-25 6:03 ` Timo Teräs
2008-09-25 7:57 ` Herbert Xu
2008-09-25 8:42 ` Timo Teräs
2008-09-25 8:56 ` Herbert Xu
2008-09-25 9:01 ` Timo Teräs
2008-09-25 9:49 ` Herbert Xu
2008-09-25 12:12 ` Timo Teräs
2008-09-25 12:36 ` Timo Teräs
2008-09-26 2:08 ` Herbert Xu
2008-10-01 10:07 ` David Miller
2008-10-01 14:05 ` Herbert Xu
2008-09-23 2:48 ` David Miller
2008-09-10 3:04 ` David Miller
2008-09-10 3:15 ` Herbert Xu
2008-09-10 3:22 ` David Miller
2008-09-10 3:23 ` Herbert Xu
2008-09-10 3:38 ` David Miller
2008-09-10 4:01 ` Herbert Xu
2008-09-10 4:06 ` David Miller [this message]
2008-09-10 4:22 ` Herbert Xu
2008-09-10 4:24 ` David Miller
2008-09-10 4:48 ` David Miller
2008-09-10 4:52 ` David Miller
2008-09-10 4:53 ` Herbert Xu
2008-09-10 5:21 ` David Miller
2008-09-10 5:16 ` Timo Teräs
2008-09-10 5:23 ` David Miller
2008-09-10 5:46 ` Herbert Xu
2008-09-03 6:10 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080909.210654.159776216.davem@davemloft.net \
--to=davem@davemloft.net \
--cc=herbert@gondor.apana.org.au \
--cc=netdev@vger.kernel.org \
--cc=timo.teras@iki.fi \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).