From: Kirill Tkhai <ktkhai@virtuozzo.com>
To: davem@davemloft.net, vyasevic@redhat.com,
kstewart@linuxfoundation.org, pombredanne@nexb.com,
vyasevich@gmail.com, mark.rutland@arm.com,
gregkh@linuxfoundation.org, adobriyan@gmail.com, fw@strlen.de,
nicolas.dichtel@6wind.com, xiyou.wangcong@gmail.com,
roman.kapl@sysgo.com, paul@paul-moore.com, dsahern@gmail.com,
daniel@iogearbox.net, lucien.xin@gmail.com,
mschiffer@universe-factory.net, rshearma@brocade.com,
linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
ktkhai@virtuozzo.com, ebiederm@xmission.com,
avagin@virtuozzo.com, gorcunov@virtuozzo.com,
eric.dumazet@gmail.com, stephen@networkplumber.org,
ktkhai@virtuozzo.com
Subject: [PATCH RFC 03/25] net: Introduce net_sem for protection of pernet_list
Date: Fri, 17 Nov 2017 21:27:41 +0300 [thread overview]
Message-ID: <151094326157.20009.5480759391073404834.stgit@localhost.localdomain> (raw)
In-Reply-To: <151094119999.20009.6955267140148739392.stgit@localhost.localdomain>
Curently mutex is used to protect pernet operations list. It makes
cleanup_net() to execute ->exit methods of the same operations set,
which was used on the time of ->init, even after net namespace is
unlinked from net_namespace_list.
But the problem is it's need to synchronize_rcu() after net is removed
from net_namespace_list():
Destroy net_ns:
cleanup_net()
mutex_lock(&net_mutex)
list_del_rcu(&net->list)
synchronize_rcu() <--- Sleep there for ages
list_for_each_entry_reverse(ops, &pernet_list, list)
ops_exit_list(ops, &net_exit_list)
list_for_each_entry_reverse(ops, &pernet_list, list)
ops_free_list(ops, &net_exit_list)
mutex_unlock(&net_mutex)
This primitive is not fast, especially on the systems with many processors
and/or when preemptible RCU is enabled in config. So, all the time, while
cleanup_net() is waiting for RCU grace period, creation of new net namespaces
is not possible, the tasks, who makes it, are sleeping on the same mutex:
Create net_ns:
copy_net_ns()
mutex_lock_killable(&net_mutex) <--- Sleep there for ages
I observed 20-30 seconds hangs of "unshare -n" on ordinary 8-cpu laptop
with preemptible RCU enabled.
The solution is to convert net_mutex to the rw_semaphore and add small locks
to really small number of pernet_operations, what really need them. Then,
pernet_operations::init/::exit methods, modifying the net-related data,
will require down_read() locking only, while down_write() will be used
for changing pernet_list.
This gives signify performance increase, like you may see here:
https://www.spinics.net/lists/netdev/msg467095.html
It's 4.6 times performance increase on one-thread test.
Multi-thread tests increase may be close to 4.6 multiplied
to number of threads.
This patch starts replacing net_mutex to net_sem. It adds rw_semaphore,
describes the variables it protects, and makes to use where appropriate.
net_mutex is still present, and next patches will kick it out step-by-step.
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
include/linux/rtnetlink.h | 1 +
net/core/net_namespace.c | 37 +++++++++++++++++++++++++------------
net/core/rtnetlink.c | 4 ++--
3 files changed, 28 insertions(+), 14 deletions(-)
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 2032ce2eb20b..f640fc87fe1d 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -35,6 +35,7 @@ extern int rtnl_is_locked(void);
extern wait_queue_head_t netdev_unregistering_wq;
extern struct mutex net_mutex;
+extern struct rw_semaphore net_sem;
#ifdef CONFIG_PROVE_LOCKING
extern bool lockdep_rtnl_is_held(void);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2e512965bf42..2254b1639209 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -41,6 +41,11 @@ struct net init_net = {
EXPORT_SYMBOL(init_net);
static bool init_net_initialized;
+/*
+ * net_sem: protects: pernet_list, net_generic_ids,
+ * init_net_initialized and first_* pointers.
+ */
+DECLARE_RWSEM(net_sem);
#define MIN_PERNET_OPS_ID \
((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -411,12 +416,16 @@ struct net *copy_net_ns(unsigned long flags,
net->ucounts = ucounts;
get_user_ns(user_ns);
- rv = mutex_lock_killable(&net_mutex);
+ rv = down_read_killable(&net_sem);
if (rv < 0)
goto put_userns;
-
+ rv = mutex_lock_killable(&net_mutex);
+ if (rv < 0)
+ goto up_read;
rv = setup_net(net, user_ns);
mutex_unlock(&net_mutex);
+up_read:
+ up_read(&net_sem);
if (rv < 0) {
put_userns:
put_user_ns(user_ns);
@@ -443,6 +452,7 @@ static void cleanup_net(struct work_struct *work)
list_replace_init(&cleanup_list, &net_kill_list);
spin_unlock_irq(&cleanup_list_lock);
+ down_read(&net_sem);
mutex_lock(&net_mutex);
/* Don't let anyone else find us. */
@@ -484,6 +494,7 @@ static void cleanup_net(struct work_struct *work)
ops_free_list(ops, &net_exit_list);
mutex_unlock(&net_mutex);
+ up_read(&net_sem);
/* Ensure there are no outstanding rcu callbacks using this
* network namespace.
@@ -510,8 +521,10 @@ static void cleanup_net(struct work_struct *work)
*/
void net_ns_barrier(void)
{
+ down_write(&net_sem);
mutex_lock(&net_mutex);
mutex_unlock(&net_mutex);
+ up_write(&net_sem);
}
EXPORT_SYMBOL(net_ns_barrier);
@@ -838,12 +851,12 @@ static int __init net_ns_init(void)
rcu_assign_pointer(init_net.gen, ng);
- mutex_lock(&net_mutex);
+ down_write(&net_sem);
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
init_net_initialized = true;
- mutex_unlock(&net_mutex);
+ up_write(&net_sem);
register_pernet_subsys(&net_ns_ops);
@@ -983,9 +996,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
int register_pernet_subsys(struct pernet_operations *ops)
{
int error;
- mutex_lock(&net_mutex);
+ down_write(&net_sem);
error = register_pernet_operations(first_device, ops);
- mutex_unlock(&net_mutex);
+ up_write(&net_sem);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);
@@ -1001,9 +1014,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
*/
void unregister_pernet_subsys(struct pernet_operations *ops)
{
- mutex_lock(&net_mutex);
+ down_write(&net_sem);
unregister_pernet_operations(ops);
- mutex_unlock(&net_mutex);
+ up_write(&net_sem);
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
@@ -1029,11 +1042,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
int register_pernet_device(struct pernet_operations *ops)
{
int error;
- mutex_lock(&net_mutex);
+ down_write(&net_sem);
error = register_pernet_operations(&pernet_list, ops);
if (!error && (first_device == &pernet_list))
first_device = &ops->list;
- mutex_unlock(&net_mutex);
+ up_write(&net_sem);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);
@@ -1049,11 +1062,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device);
*/
void unregister_pernet_device(struct pernet_operations *ops)
{
- mutex_lock(&net_mutex);
+ down_write(&net_sem);
if (&ops->list == first_device)
first_device = first_device->next;
unregister_pernet_operations(ops);
- mutex_unlock(&net_mutex);
+ up_write(&net_sem);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dabba2a91fc8..cb06d43c4230 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -390,11 +390,11 @@ static void rtnl_lock_unregistering_all(void)
void rtnl_link_unregister(struct rtnl_link_ops *ops)
{
/* Close the race with cleanup_net() */
- mutex_lock(&net_mutex);
+ down_write(&net_sem);
rtnl_lock_unregistering_all();
__rtnl_link_unregister(ops);
rtnl_unlock();
- mutex_unlock(&net_mutex);
+ up_write(&net_sem);
}
EXPORT_SYMBOL_GPL(rtnl_link_unregister);
next prev parent reply other threads:[~2017-11-17 18:27 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-11-17 18:27 [PATCH RFC 00/25] Replacing net_mutex with rw_semaphore Kirill Tkhai
2017-11-17 18:27 ` [PATCH RFC 01/25] net: Assign net to net_namespace_list in setup_net() Kirill Tkhai
2017-11-17 18:27 ` [PATCH RFC 02/25] net: Cleanup copy_net_ns() Kirill Tkhai
2017-11-17 18:27 ` Kirill Tkhai [this message]
2017-11-17 18:27 ` [PATCH RFC 04/25] net: Move mutex_unlock() in cleanup_net() up Kirill Tkhai
2017-11-17 18:28 ` [PATCH RFC 05/25] net: Add primitives to update heads of pernet_list sublists Kirill Tkhai
2017-11-17 18:28 ` [PATCH RFC 06/25] net: Add pernet sys and registration functions Kirill Tkhai
2017-11-17 18:28 ` [PATCH RFC 07/25] net: Make sys sublist pernet_operations executed out of net_mutex Kirill Tkhai
2017-11-17 18:28 ` [PATCH RFC 08/25] net: Move proc_net_ns_ops to pernet_sys list Kirill Tkhai
2017-11-17 18:28 ` [PATCH RFC 09/25] net: Move net_ns_ops " Kirill Tkhai
2017-11-17 18:28 ` [PATCH RFC 10/25] net: Move sysctl_pernet_ops " Kirill Tkhai
2017-11-17 18:29 ` [PATCH RFC 11/25] net: Move netfilter_net_ops " Kirill Tkhai
2017-11-17 18:29 ` [PATCH RFC 12/25] net: Move nf_log_net_ops " Kirill Tkhai
2017-11-17 18:29 ` [PATCH RFC 13/25] net: Move net_inuse_ops " Kirill Tkhai
2017-11-17 18:29 ` [PATCH RFC 14/25] net: Move net_defaults_ops " Kirill Tkhai
2017-11-17 18:29 ` [PATCH RFC 15/25] net: Move netlink_net_ops " Kirill Tkhai
2017-11-17 18:29 ` [PATCH RFC 16/25] net: Move rtnetlink_net_ops " Kirill Tkhai
2017-11-17 18:29 ` [PATCH RFC 17/25] net: Move audit_net_ops " Kirill Tkhai
2017-11-17 18:30 ` [PATCH RFC 18/25] net: Move uevent_net_ops " Kirill Tkhai
2017-11-17 18:30 ` [PATCH RFC 19/25] net: Move proto_net_ops " Kirill Tkhai
2017-11-17 18:30 ` [PATCH RFC 20/25] net: Move pernet_subsys, registered via net_dev_init(), " Kirill Tkhai
2017-11-17 18:30 ` [PATCH RFC 21/25] net: Move fib_* pernet_operations, registered via subsys_initcall(), " Kirill Tkhai
2017-11-17 18:30 ` [PATCH RFC 22/25] net: Move subsys_initcall() registered pernet_operations from net/sched " Kirill Tkhai
2017-11-17 18:30 ` [PATCH RFC 23/25] net: Move genl_pernet_ops " Kirill Tkhai
2017-11-17 18:31 ` [PATCH RFC 24/25] net: Move wext_pernet_ops " Kirill Tkhai
2017-11-17 18:31 ` [PATCH RFC 25/25] net: Move sysctl_core_ops " Kirill Tkhai
2017-11-19 1:52 ` [PATCH RFC 00/25] Replacing net_mutex with rw_semaphore Eric W. Biederman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=151094326157.20009.5480759391073404834.stgit@localhost.localdomain \
--to=ktkhai@virtuozzo.com \
--cc=adobriyan@gmail.com \
--cc=avagin@virtuozzo.com \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=dsahern@gmail.com \
--cc=ebiederm@xmission.com \
--cc=eric.dumazet@gmail.com \
--cc=fw@strlen.de \
--cc=gorcunov@virtuozzo.com \
--cc=gregkh@linuxfoundation.org \
--cc=kstewart@linuxfoundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=lucien.xin@gmail.com \
--cc=mark.rutland@arm.com \
--cc=mschiffer@universe-factory.net \
--cc=netdev@vger.kernel.org \
--cc=nicolas.dichtel@6wind.com \
--cc=paul@paul-moore.com \
--cc=pombredanne@nexb.com \
--cc=roman.kapl@sysgo.com \
--cc=rshearma@brocade.com \
--cc=stephen@networkplumber.org \
--cc=vyasevic@redhat.com \
--cc=vyasevich@gmail.com \
--cc=xiyou.wangcong@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox