[RFC PATCH 12/14] sunrpc: keep a cache of svc_rqsts for each NUMA node

Linux NFS development
 help / color / mirror / Atom feed

From: Jeff Layton <jlayton@primarydata.com>
To: linux-nfs@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, Tejun Heo <tj@kernel.org>,
	Al Viro <viro@zeniv.linux.org.uk>
Subject: [RFC PATCH 12/14] sunrpc: keep a cache of svc_rqsts for each NUMA node
Date: Tue,  2 Dec 2014 13:24:21 -0500	[thread overview]
Message-ID: <1417544663-13299-13-git-send-email-jlayton@primarydata.com> (raw)
In-Reply-To: <1417544663-13299-1-git-send-email-jlayton@primarydata.com>

Allocating an entire svc_rqst (including all of the pages, etc...) for
each workqueue request is pretty expensive. Keep a cache of allocated
svc_rqst structures for each NUMA node that we keep in svc_pool.

In order to keep the cache from growing without bound, we register a
shrinker. Since the cache is already NUMA-aware, we can use a NUMA-aware
shrinker as well.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 fs/nfsd/nfssvc.c           |   6 +-
 include/linux/sunrpc/svc.h |  17 ++++++
 net/sunrpc/svc.c           |   1 +
 net/sunrpc/svc_wq.c        | 136 ++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 154 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 2c7ebced0311..c359e8f77b30 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -672,7 +672,6 @@ nfsd(void *vrqstp)
 static void
 nfsd_work(struct work_struct *work)
 {
-	int node = numa_node_id();
 	struct svc_xprt *xprt = container_of(work, struct svc_xprt, xpt_work);
 	struct net *net = xprt->xpt_net;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -681,7 +680,7 @@ nfsd_work(struct work_struct *work)
 	struct fs_struct *saved_fs;
 	int err;
 
-	rqstp = svc_rqst_alloc(serv, &serv->sv_pools[node], node);
+	rqstp = find_or_alloc_svc_rqst(serv);
 	if (!rqstp) {
 		/* Alloc failure. Give up for now, and requeue the work */
 		queue_work(serv->sv_wq, &xprt->xpt_work);
@@ -703,8 +702,7 @@ nfsd_work(struct work_struct *work)
 
 	saved_fs = swap_fs_struct(saved_fs);
 	put_fs_struct(saved_fs);
-
-	svc_rqst_free(rqstp);
+	put_svc_rqst(rqstp);
 }
 
 static struct svc_serv_ops nfsd_wq_sv_ops = {
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index f47de87660b4..33321ddacfee 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -105,6 +105,7 @@ struct svc_serv {
 	struct svc_pool *	sv_pools;	/* array of thread pools */
 	struct svc_serv_ops *	sv_ops;		/* server operations */
 	struct workqueue_struct	*sv_wq;		/* workqueue for wq-based services */
+	struct shrinker		sv_shrinker;	/* for shrinking svc_rqst caches */
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	struct list_head	sv_cb_list;	/* queue for callback requests
 						 * that arrive over the same
@@ -274,6 +275,7 @@ struct svc_rqst {
 #define	RQ_VICTIM	(5)			/* about to be shut down */
 #define	RQ_BUSY		(6)			/* request is busy */
 	unsigned long		rq_flags;	/* flags field */
+	unsigned long		rq_time;	/* when rqstp was last put */
 
 	void *			rq_argp;	/* decoded arguments */
 	void *			rq_resp;	/* xdr'd results */
@@ -493,6 +495,21 @@ char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
 #if IS_ENABLED(CONFIG_SUNRPC_SVC_WORKQUEUE)
 int		   svc_wq_setup(struct svc_serv *, struct svc_pool *, int);
 void		   svc_wq_enqueue_xprt(struct svc_xprt *);
+struct svc_rqst	*  find_or_alloc_svc_rqst(struct svc_serv *serv);
+void		   exit_svc_rqst_cache(struct svc_serv *serv);
+
+static inline void
+put_svc_rqst(struct svc_rqst *rqstp)
+{
+	rqstp->rq_time = jiffies;
+	clear_bit(RQ_BUSY, &rqstp->rq_flags);
+}
+#else
+static inline void
+exit_svc_rqst_cache(struct svc_serv *serv)
+{
+	return;
+}
 #endif
 
 #define	RPC_MAX_ADDRBUFLEN	(63U)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4300bc852f6e..4ebba00b8b27 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -547,6 +547,7 @@ svc_destroy(struct svc_serv *serv)
 
 	if (serv->sv_wq) {
 		destroy_workqueue(serv->sv_wq);
+		exit_svc_rqst_cache(serv);
 		module_put(serv->sv_ops->svo_module);
 	}
 
diff --git a/net/sunrpc/svc_wq.c b/net/sunrpc/svc_wq.c
index d4720ecd0b32..e96bbf49c1a0 100644
--- a/net/sunrpc/svc_wq.c
+++ b/net/sunrpc/svc_wq.c
@@ -12,6 +12,130 @@
 #include <trace/events/sunrpc.h>
 
 /*
+ * Find a svc_rqst to use. Try to find an already allocated-one on the list
+ * first, and then allocate if there isn't one already available.
+ */
+struct svc_rqst *
+find_or_alloc_svc_rqst(struct svc_serv *serv)
+{
+	int node = numa_node_id();
+	struct svc_rqst *rqstp;
+	struct svc_pool *pool = &serv->sv_pools[node];
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
+		if (!test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
+			rcu_read_unlock();
+			return rqstp;
+		}
+	}
+	rcu_read_unlock();
+
+	rqstp = svc_rqst_alloc(serv, pool, node);
+	if (rqstp) {
+		spin_lock_bh(&pool->sp_lock);
+		list_add_tail_rcu(&rqstp->rq_all, &pool->sp_all_threads);
+		++pool->sp_nrthreads;
+		spin_unlock_bh(&pool->sp_lock);
+	}
+	return rqstp;
+}
+EXPORT_SYMBOL_GPL(find_or_alloc_svc_rqst);
+
+static unsigned long
+count_svc_rqst_objects(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct svc_serv *serv = container_of(shrinker, struct svc_serv,
+						sv_shrinker);
+	struct svc_pool *pool = &serv->sv_pools[sc->nid];
+	struct svc_rqst *rqstp;
+	unsigned long count = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
+		/* Don't count it if it's busy */
+		if (test_bit(RQ_BUSY, &rqstp->rq_flags))
+			continue;
+
+		/* Don't count it if it was used within the last second */
+		if (time_before(jiffies, rqstp->rq_time + HZ))
+			continue;
+
+		++count;
+	}
+	rcu_read_unlock();
+
+	return count;
+}
+
+static unsigned long
+scan_svc_rqst_objects(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct svc_serv *serv = container_of(shrinker, struct svc_serv,
+						sv_shrinker);
+	struct svc_pool *pool = &serv->sv_pools[sc->nid];
+	struct svc_rqst *rqstp;
+	unsigned long count = 0;
+
+	spin_lock(&pool->sp_lock);
+	list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
+		/* Don't free it if it's busy */
+		if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
+			continue;
+
+		list_del_rcu(&rqstp->rq_all);
+		svc_rqst_free(rqstp);
+		--pool->sp_nrthreads;
+		++count;
+		if (sc->nr_to_scan-- == 0)
+			break;
+	}
+	spin_unlock(&pool->sp_lock);
+
+	return count;
+}
+
+static int
+init_svc_rqst_cache(struct svc_serv *serv)
+{
+	struct shrinker *shrinker = &serv->sv_shrinker;
+
+	memset(shrinker, 0, sizeof(*shrinker));
+
+	shrinker->count_objects = count_svc_rqst_objects;
+	shrinker->scan_objects = scan_svc_rqst_objects;
+	shrinker->seeks = DEFAULT_SEEKS;
+	shrinker->flags = SHRINKER_NUMA_AWARE;
+
+	return register_shrinker(shrinker);
+}
+
+void
+exit_svc_rqst_cache(struct svc_serv *serv)
+{
+	int node;
+
+	unregister_shrinker(&serv->sv_shrinker);
+
+	for (node = 0; node < serv->sv_nrpools; node++) {
+		struct svc_pool *pool = &serv->sv_pools[node];
+
+		spin_lock_bh(&pool->sp_lock);
+		while (!list_empty(&pool->sp_all_threads)) {
+			struct svc_rqst *rqstp = list_first_entry(
+					&pool->sp_all_threads, struct svc_rqst,
+					rq_all);
+
+			WARN_ON_ONCE(test_bit(RQ_BUSY, &rqstp->rq_flags));
+			list_del_rcu(&rqstp->rq_all);
+			svc_rqst_free(rqstp);
+		}
+		pool->sp_nrthreads = 0;
+		spin_unlock_bh(&pool->sp_lock);
+	}
+}
+
+/*
  * This workqueue job should run on each node when the workqueue is created. It
  * walks the list of xprts for its node, and queues the workqueue job for each.
  */
@@ -58,12 +182,13 @@ process_queued_xprts(struct svc_serv *serv)
 
 /*
  * Start up or shut down a workqueue-based RPC service. Basically, we use this
- * to allocate the workqueue. The function assumes that the caller holds one
- * serv->sv_nrthreads reference.
+ * to allocate the workqueue and set up the shrinker for the svc_rqst cache.
+ * This function assumes that the caller holds one serv->sv_nrthreads reference.
  */
 int
 svc_wq_setup(struct svc_serv *serv, struct svc_pool *pool, int max_active)
 {
+	int err;
 	int nrthreads = serv->sv_nrthreads - 1; /* -1 for caller's reference */
 
 	WARN_ON_ONCE(nrthreads < 0);
@@ -79,14 +204,21 @@ svc_wq_setup(struct svc_serv *serv, struct svc_pool *pool, int max_active)
 		/* svc is down and none requested? */
 		if (!max_active)
 			return 0;
+
+		err = init_svc_rqst_cache(serv);
+		if (err)
+			return err;
+
 		__module_get(serv->sv_ops->svo_module);
 		serv->sv_wq = alloc_workqueue("%s",
 					WQ_UNBOUND|WQ_FREEZABLE|WQ_SYSFS,
 					max_active, serv->sv_name);
 		if (!serv->sv_wq) {
+			exit_svc_rqst_cache(serv);
 			module_put(serv->sv_ops->svo_module);
 			return -ENOMEM;
 		}
+
 		process_queued_xprts(serv);
 	} else {
 		/*
-- 
2.1.0

next prev parent reply	other threads:[~2014-12-02 18:24 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-12-02 18:24 [RFC PATCH 00/14] nfsd/sunrpc: add support for a workqueue-based nfsd Jeff Layton
2014-12-02 18:24 ` [RFC PATCH 01/14] sunrpc: add a new svc_serv_ops struct and move sv_shutdown into it Jeff Layton
2014-12-02 18:24 ` [RFC PATCH 02/14] sunrpc: move sv_function into sv_ops Jeff Layton
2014-12-02 18:24 ` [RFC PATCH 03/14] sunrpc: move sv_module parm " Jeff Layton
2014-12-02 18:24 ` [RFC PATCH 04/14] sunrpc: turn enqueueing a svc_xprt into a svc_serv operation Jeff Layton
2014-12-02 18:24 ` [RFC PATCH 05/14] sunrpc: abstract out svc_set_num_threads to sv_ops Jeff Layton
2014-12-02 18:24 ` [RFC PATCH 06/14] sunrpc: move pool_mode definitions into svc.h Jeff Layton
2014-12-02 18:24 ` [RFC PATCH 07/14] sunrpc: factor svc_rqst allocation and freeing from sv_nrthreads refcounting Jeff Layton
2014-12-02 18:24 ` [RFC PATCH 08/14] sunrpc: set up workqueue function in svc_xprt Jeff Layton
2014-12-02 18:24 ` [RFC PATCH 09/14] sunrpc: add basic support for workqueue-based services Jeff Layton
2014-12-08 20:47   ` J. Bruce Fields
2014-12-08 20:49     ` Jeff Layton
2014-12-02 18:24 ` [RFC PATCH 10/14] nfsd: keep a reference to the fs_struct in svc_rqst Jeff Layton
2014-12-02 18:24 ` [RFC PATCH 11/14] nfsd: add support for workqueue based service processing Jeff Layton
2014-12-02 18:24 ` Jeff Layton [this message]
2014-12-02 18:24 ` [RFC PATCH 13/14] sunrpc: add more tracepoints around svc_xprt handling Jeff Layton
2014-12-02 18:24 ` [RFC PATCH 14/14] sunrpc: add tracepoints around svc_sock handling Jeff Layton
2014-12-02 19:18 ` [RFC PATCH 00/14] nfsd/sunrpc: add support for a workqueue-based nfsd Tejun Heo
2014-12-02 19:26   ` Jeff Layton
2014-12-02 19:29     ` Tejun Heo
2014-12-02 19:26   ` Tejun Heo
2014-12-02 19:46     ` Jeff Layton
2014-12-03  1:11 ` NeilBrown
2014-12-03  1:29   ` Jeff Layton
2014-12-03 15:56     ` Tejun Heo
2014-12-03 16:04       ` Jeff Layton
2014-12-03 19:02         ` Jeff Layton
2014-12-03 19:08           ` Trond Myklebust
2014-12-03 19:20             ` Jeff Layton
2014-12-03 19:59               ` Trond Myklebust
2014-12-03 20:21                 ` Jeff Layton
2014-12-03 20:44                   ` Trond Myklebust
2014-12-04 11:47                     ` Jeff Layton
2014-12-04 17:17                       ` Shirley Ma
2014-12-04 17:28                         ` Jeff Layton
2014-12-04 17:44                           ` Shirley Ma
2014-12-03 16:50       ` Chuck Lever

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:2c7ebced031 dfblob:c359e8f77b3 dfblob:f47de87660b
dfblob:33321ddacfe dfblob:4300bc852f6 dfblob:4ebba00b8b2
dfblob:d4720ecd0b3 dfblob:e96bbf49c1a )
 OR (
bs:"[RFC PATCH 12/14] sunrpc: keep a cache of svc_rqsts for each NUMA node" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1417544663-13299-13-git-send-email-jlayton@primarydata.com \
    --to=jlayton@primarydata.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=tj@kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox