* [Patch net] tipc: switch to rhashtable iterator
@ 2018-08-24 19:28 Cong Wang
2018-08-30 1:05 ` David Miller
0 siblings, 1 reply; 2+ messages in thread
From: Cong Wang @ 2018-08-24 19:28 UTC (permalink / raw)
To: netdev; +Cc: tipc-discussion, Cong Wang, Jon Maloy, Ying Xue
syzbot reported a use-after-free in tipc_group_fill_sock_diag(),
where tipc_group_fill_sock_diag() still reads tsk->group meanwhile
tipc_group_delete() just deletes it in tipc_release().
tipc_nl_sk_walk() aims to lock this sock when walking each sock
in the hash table to close race conditions with sock changes like
this one, by acquiring tsk->sk.sk_lock.slock spinlock, unfortunately
this doesn't work at all. All non-BH call path should take
lock_sock() instead to make it work.
tipc_nl_sk_walk() brutally iterates with raw rht_for_each_entry_rcu()
where RCU read lock is required, this is the reason why lock_sock()
can't be taken on this path. This could be resolved by switching to
rhashtable iterator API's, where taking a sleepable lock is possible.
Also, the iterator API's are friendly for restartable calls like
diag dump, the last position is remembered behind the scence,
all we need to do here is saving the iterator into cb->args[].
I tested this with parallel tipc diag dump and thousands of tipc
socket creation and release, no crash or memory leak.
Reported-by: syzbot+b9c8f3ab2994b7cd1625@syzkaller.appspotmail.com
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
net/tipc/diag.c | 2 ++
net/tipc/netlink.c | 2 ++
net/tipc/socket.c | 76 +++++++++++++++++++++++++++++++++++-------------------
net/tipc/socket.h | 2 ++
4 files changed, 56 insertions(+), 26 deletions(-)
diff --git a/net/tipc/diag.c b/net/tipc/diag.c
index aaabb0b776dd..73137f4aeb68 100644
--- a/net/tipc/diag.c
+++ b/net/tipc/diag.c
@@ -84,7 +84,9 @@ static int tipc_sock_diag_handler_dump(struct sk_buff *skb,
if (h->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
+ .start = tipc_dump_start,
.dump = tipc_diag_dump,
+ .done = tipc_dump_done,
};
netlink_dump_start(net->diag_nlsk, skb, h, &c);
return 0;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 6ff2254088f6..99ee419210ba 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -167,7 +167,9 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
},
{
.cmd = TIPC_NL_SOCK_GET,
+ .start = tipc_dump_start,
.dumpit = tipc_nl_sk_dump,
+ .done = tipc_dump_done,
.policy = tipc_nl_policy,
},
{
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index c9a50b62c738..ab7a2a7178f7 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3229,45 +3229,69 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
struct netlink_callback *cb,
struct tipc_sock *tsk))
{
- struct net *net = sock_net(skb->sk);
- struct tipc_net *tn = tipc_net(net);
- const struct bucket_table *tbl;
- u32 prev_portid = cb->args[1];
- u32 tbl_id = cb->args[0];
- struct rhash_head *pos;
+ struct rhashtable_iter *iter = (void *)cb->args[0];
struct tipc_sock *tsk;
int err;
- rcu_read_lock();
- tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
- for (; tbl_id < tbl->size; tbl_id++) {
- rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) {
- spin_lock_bh(&tsk->sk.sk_lock.slock);
- if (prev_portid && prev_portid != tsk->portid) {
- spin_unlock_bh(&tsk->sk.sk_lock.slock);
+ rhashtable_walk_start(iter);
+ while ((tsk = rhashtable_walk_next(iter)) != NULL) {
+ if (IS_ERR(tsk)) {
+ err = PTR_ERR(tsk);
+ if (err == -EAGAIN) {
+ err = 0;
continue;
}
+ break;
+ }
- err = skb_handler(skb, cb, tsk);
- if (err) {
- prev_portid = tsk->portid;
- spin_unlock_bh(&tsk->sk.sk_lock.slock);
- goto out;
- }
-
- prev_portid = 0;
- spin_unlock_bh(&tsk->sk.sk_lock.slock);
+ sock_hold(&tsk->sk);
+ rhashtable_walk_stop(iter);
+ lock_sock(&tsk->sk);
+ err = skb_handler(skb, cb, tsk);
+ if (err) {
+ release_sock(&tsk->sk);
+ sock_put(&tsk->sk);
+ goto out;
}
+ release_sock(&tsk->sk);
+ rhashtable_walk_start(iter);
+ sock_put(&tsk->sk);
}
+ rhashtable_walk_stop(iter);
out:
- rcu_read_unlock();
- cb->args[0] = tbl_id;
- cb->args[1] = prev_portid;
-
return skb->len;
}
EXPORT_SYMBOL(tipc_nl_sk_walk);
+int tipc_dump_start(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter = (void *)cb->args[0];
+ struct net *net = sock_net(cb->skb->sk);
+ struct tipc_net *tn = tipc_net(net);
+
+ if (!iter) {
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ cb->args[0] = (long)iter;
+ }
+
+ rhashtable_walk_enter(&tn->sk_rht, iter);
+ return 0;
+}
+EXPORT_SYMBOL(tipc_dump_start);
+
+int tipc_dump_done(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *hti = (void *)cb->args[0];
+
+ rhashtable_walk_exit(hti);
+ kfree(hti);
+ return 0;
+}
+EXPORT_SYMBOL(tipc_dump_done);
+
int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
struct tipc_sock *tsk, u32 sk_filter_state,
u64 (*tipc_diag_gen_cookie)(struct sock *sk))
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index aff9b2ae5a1f..d43032e26532 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -68,4 +68,6 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
int (*skb_handler)(struct sk_buff *skb,
struct netlink_callback *cb,
struct tipc_sock *tsk));
+int tipc_dump_start(struct netlink_callback *cb);
+int tipc_dump_done(struct netlink_callback *cb);
#endif
--
2.14.4
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [Patch net] tipc: switch to rhashtable iterator
2018-08-24 19:28 [Patch net] tipc: switch to rhashtable iterator Cong Wang
@ 2018-08-30 1:05 ` David Miller
0 siblings, 0 replies; 2+ messages in thread
From: David Miller @ 2018-08-30 1:05 UTC (permalink / raw)
To: xiyou.wangcong; +Cc: netdev, tipc-discussion, jon.maloy, ying.xue
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Fri, 24 Aug 2018 12:28:06 -0700
> syzbot reported a use-after-free in tipc_group_fill_sock_diag(),
> where tipc_group_fill_sock_diag() still reads tsk->group meanwhile
> tipc_group_delete() just deletes it in tipc_release().
>
> tipc_nl_sk_walk() aims to lock this sock when walking each sock
> in the hash table to close race conditions with sock changes like
> this one, by acquiring tsk->sk.sk_lock.slock spinlock, unfortunately
> this doesn't work at all. All non-BH call path should take
> lock_sock() instead to make it work.
>
> tipc_nl_sk_walk() brutally iterates with raw rht_for_each_entry_rcu()
> where RCU read lock is required, this is the reason why lock_sock()
> can't be taken on this path. This could be resolved by switching to
> rhashtable iterator API's, where taking a sleepable lock is possible.
> Also, the iterator API's are friendly for restartable calls like
> diag dump, the last position is remembered behind the scence,
> all we need to do here is saving the iterator into cb->args[].
>
> I tested this with parallel tipc diag dump and thousands of tipc
> socket creation and release, no crash or memory leak.
>
> Reported-by: syzbot+b9c8f3ab2994b7cd1625@syzkaller.appspotmail.com
> Cc: Jon Maloy <jon.maloy@ericsson.com>
> Cc: Ying Xue <ying.xue@windriver.com>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Applied and queued up for -stable, thanks Cong.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2018-08-30 5:04 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-08-24 19:28 [Patch net] tipc: switch to rhashtable iterator Cong Wang
2018-08-30 1:05 ` David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).