From: Stanislav Kinsbursky <skinsbursky@parallels.com>
To: bfields@fieldses.org, akpm@linux-foundation.org
Cc: linux-nfs@vger.kernel.org, Trond.Myklebust@netapp.com,
linux-kernel@vger.kernel.org, devel@openvz.org
Subject: [PATCH 2/2] SUNRPC: protect transport processing with per-cpu rw semaphore
Date: Fri, 01 Feb 2013 14:28:35 +0300 [thread overview]
Message-ID: <20130201112835.24066.20617.stgit@localhost.localdomain> (raw)
In-Reply-To: <20130201111046.24066.72836.stgit@localhost.localdomain>
There could be a service transport, which is processed by service thread and
racing in the same time with per-net service shutdown like listed below:
CPU#0: CPU#1:
svc_recv svc_close_net
svc_get_next_xprt (list_del_init(xpt_ready))
svc_close_list (set XPT_BUSY and XPT_CLOSE)
svc_clear_pools(xprt was gained on CPU#0 already)
svc_delete_xprt (set XPT_DEAD)
svc_handle_xprt (is XPT_CLOSE => svc_delete_xprt()
BUG()
There can be different solutions of the problem.
Probably, the patch doesn't implement the best one, but I hope the simple one.
IOW, it protects critical section (dequeuing of pending transport and
enqueuing it back to the pool) by per-service rw semaphore, taken for read.
On per-net transports shutdown, this semaphore have to be taken for write.
Note: add the PERCPU_RWSEM config option selected by SUNRPC.
Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
---
include/linux/sunrpc/svc.h | 2 ++
net/sunrpc/Kconfig | 1 +
net/sunrpc/svc.c | 2 ++
net/sunrpc/svc_xprt.c | 33 +++++++++++++++++++++++++++------
4 files changed, 32 insertions(+), 6 deletions(-)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 1f0216b..2031d14 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -103,6 +103,8 @@ struct svc_serv {
* entries in the svc_cb_list */
struct svc_xprt *sv_bc_xprt; /* callback on fore channel */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
+ struct percpu_rw_semaphore sv_xprt_sem; /* sem to sync against per-net transports shutdown */
};
/*
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 03d03e3..5e6548c 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -1,5 +1,6 @@
config SUNRPC
tristate
+ select PERCPU_RWSEM
config SUNRPC_GSS
tristate
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b9ba2a8..ef74c72 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -483,6 +483,8 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
if (svc_uses_rpcbind(serv) && (!serv->sv_shutdown))
serv->sv_shutdown = svc_rpcb_cleanup;
+ percpu_init_rwsem(&serv->sv_xprt_sem);
+
return serv;
}
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 5a9d40c..855a6ba 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -471,6 +471,8 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
svc_reserve(rqstp, 0);
rqstp->rq_xprt = NULL;
+ percpu_up_read(&rqstp->rq_server->sv_xprt_sem);
+
svc_xprt_put(xprt);
}
@@ -618,12 +620,14 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
struct svc_pool *pool = rqstp->rq_pool;
DECLARE_WAITQUEUE(wait, current);
long time_left;
+ struct svc_serv *serv = rqstp->rq_server;
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
*/
rqstp->rq_chandle.thread_wait = 5*HZ;
+ percpu_down_read(&serv->sv_xprt_sem);
spin_lock_bh(&pool->sp_lock);
xprt = svc_xprt_dequeue(pool);
if (xprt) {
@@ -640,7 +644,8 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
if (pool->sp_task_pending) {
pool->sp_task_pending = 0;
spin_unlock_bh(&pool->sp_lock);
- return ERR_PTR(-EAGAIN);
+ xprt = ERR_PTR(-EAGAIN);
+ goto out_err;
}
/* No data pending. Go to sleep */
svc_thread_enqueue(pool, rqstp);
@@ -661,16 +666,19 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
if (kthread_should_stop()) {
set_current_state(TASK_RUNNING);
spin_unlock_bh(&pool->sp_lock);
- return ERR_PTR(-EINTR);
+ xprt = ERR_PTR(-EINTR);
+ goto out_err;
}
add_wait_queue(&rqstp->rq_wait, &wait);
spin_unlock_bh(&pool->sp_lock);
+ percpu_up_read(&serv->sv_xprt_sem);
time_left = schedule_timeout(timeout);
try_to_freeze();
+ percpu_down_read(&serv->sv_xprt_sem);
spin_lock_bh(&pool->sp_lock);
remove_wait_queue(&rqstp->rq_wait, &wait);
if (!time_left)
@@ -681,13 +689,24 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
svc_thread_dequeue(pool, rqstp);
spin_unlock_bh(&pool->sp_lock);
dprintk("svc: server %p, no data yet\n", rqstp);
- if (signalled() || kthread_should_stop())
- return ERR_PTR(-EINTR);
- else
- return ERR_PTR(-EAGAIN);
+ if (signalled() || kthread_should_stop()) {
+ xprt = ERR_PTR(-EINTR);
+ goto out_err;
+ } else {
+ xprt = ERR_PTR(-EAGAIN);
+ goto out_err;
+ }
}
}
spin_unlock_bh(&pool->sp_lock);
+out_err:
+ if (IS_ERR(xprt))
+ /*
+ * Note: we relased semaphore only if an error occured.
+ * Otherwise we hold it till transport processing will be
+ * completed in svc_xprt_release().
+ */
+ percpu_up_read(&serv->sv_xprt_sem);
return xprt;
}
@@ -1020,10 +1039,12 @@ static void svc_clear_list(struct svc_serv *serv, struct list_head *xprt_list, s
void svc_close_net(struct svc_serv *serv, struct net *net)
{
+ percpu_down_write(&serv->sv_xprt_sem);
svc_close_list(serv, &serv->sv_tempsocks, net);
svc_close_list(serv, &serv->sv_permsocks, net);
svc_clear_pools(serv, net);
+ percpu_up_write(&serv->sv_xprt_sem);
/*
* At this point the sp_sockets lists will stay empty, since
* svc_xprt_enqueue will not add new entries without taking the
next prev parent reply other threads:[~2013-02-01 11:27 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-02-01 11:28 [PATCH 0/2] NFSD: fix races in service per-net resources allocation Stanislav Kinsbursky
2013-02-01 11:28 ` [PATCH 1/2] per-cpu semaphores: export symbols to modules Stanislav Kinsbursky
2013-02-01 11:28 ` Stanislav Kinsbursky [this message]
2013-02-11 0:25 ` [PATCH 0/2] NFSD: fix races in service per-net resources allocation J. Bruce Fields
2013-02-11 6:18 ` Stanislav Kinsbursky
2013-02-11 16:37 ` J. Bruce Fields
2013-02-11 20:58 ` J. Bruce Fields
2013-02-12 9:52 ` Stanislav Kinsbursky
2013-02-12 20:45 ` J. Bruce Fields
2013-02-12 21:18 ` Peter Staubach
2013-02-12 21:18 ` Peter Staubach
2013-02-13 5:16 ` Stanislav Kinsbursky
2013-02-12 6:49 ` Stanislav Kinsbursky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130201112835.24066.20617.stgit@localhost.localdomain \
--to=skinsbursky@parallels.com \
--cc=Trond.Myklebust@netapp.com \
--cc=akpm@linux-foundation.org \
--cc=bfields@fieldses.org \
--cc=devel@openvz.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.