public inbox for linux-nfs@vger.kernel.org
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: Chuck Lever <chuck.lever@oracle.com>, Jeff Layton <jlayton@kernel.org>
Cc: linux-nfs@vger.kernel.org
Subject: [PATCH 1/7] SUNRPC: change service idle list to be an llist
Date: Fri, 18 Aug 2023 11:45:06 +1000	[thread overview]
Message-ID: <20230818014512.26880-2-neilb@suse.de> (raw)
In-Reply-To: <20230818014512.26880-1-neilb@suse.de>

With an llist we don't need to take a lock to add a thread to the list,
though we still need a lock to remove it.  That will go in the next
patch.

Unlike double-linked lists, a thread cannot reliably remove itself from
the list.  Only the first thread can be removed, and that can change
asynchronously.  So some care is needed.

We already check if there is pending work to do, so we are unlikely to
add ourselves to the idle list and then want to remove ourselves again.

If we DO find something needs to be done after adding ourselves to the
list, we simply wake up the first thread on the list.  If that was us,
we successfully removed ourselves and can continue.  If it was some
other thread, they will do the work that needs to be done.  We can
safely sleep until woken.

We also remove the test on freezing() from rqst_should_sleep().  Instead
we set TASK_FREEZABLE before scheduling.  This makes is safe to
schedule() when a freeze is pending.  As we now loop waiting to be
removed from the idle queue, this is a cleaner way to handle freezing.

task_state_index() incorrectly identifies a task with
   TASK_IDLE | TASK_FREEZABLE
as 'D'.  So fix __task_state_index to ignore extra flags on TASK_IDLE
just as it ignores extra flags on other states.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/sched.h      |  4 +--
 include/linux/sunrpc/svc.h | 14 ++++++-----
 net/sunrpc/svc.c           | 13 +++++-----
 net/sunrpc/svc_xprt.c      | 51 +++++++++++++++++++-------------------
 4 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 609bde814cb0..a5f3badcb629 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1666,7 +1666,7 @@ static inline unsigned int __task_state_index(unsigned int tsk_state,
 
 	BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
 
-	if (tsk_state == TASK_IDLE)
+	if ((tsk_state & TASK_IDLE) == TASK_IDLE)
 		state = TASK_REPORT_IDLE;
 
 	/*
@@ -1674,7 +1674,7 @@ static inline unsigned int __task_state_index(unsigned int tsk_state,
 	 * to userspace, we can make this appear as if the task has gone through
 	 * a regular rt_mutex_lock() call.
 	 */
-	if (tsk_state == TASK_RTLOCK_WAIT)
+	if (tsk_state & TASK_RTLOCK_WAIT)
 		state = TASK_UNINTERRUPTIBLE;
 
 	return fls(state);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 22b3018ebf62..5216f95411e3 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -37,7 +37,7 @@ struct svc_pool {
 	struct list_head	sp_sockets;	/* pending sockets */
 	unsigned int		sp_nrthreads;	/* # of threads in pool */
 	struct list_head	sp_all_threads;	/* all server threads */
-	struct list_head	sp_idle_threads; /* idle server threads */
+	struct llist_head	sp_idle_threads; /* idle server threads */
 
 	/* statistics on pool operation */
 	struct percpu_counter	sp_messages_arrived;
@@ -186,7 +186,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
  */
 struct svc_rqst {
 	struct list_head	rq_all;		/* all threads list */
-	struct list_head	rq_idle;	/* On the idle list */
+	struct llist_node	rq_idle;	/* On the idle list */
 	struct rcu_head		rq_rcu_head;	/* for RCU deferred kfree */
 	struct svc_xprt *	rq_xprt;	/* transport ptr */
 
@@ -270,22 +270,24 @@ enum {
  * svc_thread_set_busy - mark a thread as busy
  * @rqstp: the thread which is now busy
  *
- * If rq_idle is "empty", the thread must be busy.
+ * By convention a thread is busy if rq_idle.next points to rq_idle.
+ * This ensures it is not on the idle list.
  */
 static inline void svc_thread_set_busy(struct svc_rqst *rqstp)
 {
-	INIT_LIST_HEAD(&rqstp->rq_idle);
+	rqstp->rq_idle.next = &rqstp->rq_idle;
 }
 
 /**
  * svc_thread_busy - check if a thread as busy
  * @rqstp: the thread which might be busy
  *
- * If rq_idle is "empty", the thread must be busy.
+ * By convention a thread is busy if rq_idle.next points to rq_idle.
+ * This ensures it is not on the idle list.
  */
 static inline bool svc_thread_busy(struct svc_rqst *rqstp)
 {
-	return list_empty(&rqstp->rq_idle);
+	return rqstp->rq_idle.next == &rqstp->rq_idle;
 }
 
 #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 051f08c48418..addbf28ea50a 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -510,7 +510,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 		pool->sp_id = i;
 		INIT_LIST_HEAD(&pool->sp_sockets);
 		INIT_LIST_HEAD(&pool->sp_all_threads);
-		INIT_LIST_HEAD(&pool->sp_idle_threads);
+		init_llist_head(&pool->sp_idle_threads);
 		spin_lock_init(&pool->sp_lock);
 
 		percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL);
@@ -701,15 +701,16 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
 void svc_pool_wake_idle_thread(struct svc_pool *pool)
 {
 	struct svc_rqst	*rqstp;
+	struct llist_node *ln;
 
 	rcu_read_lock();
 	spin_lock_bh(&pool->sp_lock);
-	rqstp = list_first_entry_or_null(&pool->sp_idle_threads,
-					 struct svc_rqst, rq_idle);
-	if (rqstp)
-		list_del_init(&rqstp->rq_idle);
+	ln = llist_del_first(&pool->sp_idle_threads);
 	spin_unlock_bh(&pool->sp_lock);
-	if (rqstp) {
+	if (ln) {
+		rqstp = llist_entry(ln, struct svc_rqst, rq_idle);
+		svc_thread_set_busy(rqstp);
+
 		WRITE_ONCE(rqstp->rq_qtime, ktime_get());
 		wake_up_process(rqstp->rq_task);
 		rcu_read_unlock();
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index fa0d854a5596..81327001e074 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -715,10 +715,6 @@ rqst_should_sleep(struct svc_rqst *rqstp)
 	if (svc_thread_should_stop(rqstp))
 		return false;
 
-	/* are we freezing? */
-	if (freezing(current))
-		return false;
-
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	if (svc_is_backchannel(rqstp)) {
 		if (!list_empty(&rqstp->rq_server->sv_cb_list))
@@ -734,30 +730,32 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp)
 	struct svc_pool *pool = rqstp->rq_pool;
 
 	if (rqst_should_sleep(rqstp)) {
-		set_current_state(TASK_IDLE);
-		spin_lock_bh(&pool->sp_lock);
-		list_add(&rqstp->rq_idle, &pool->sp_idle_threads);
-		spin_unlock_bh(&pool->sp_lock);
+		set_current_state(TASK_IDLE | TASK_FREEZABLE);
+		llist_add(&rqstp->rq_idle, &pool->sp_idle_threads);
+
+		if (unlikely(!rqst_should_sleep(rqstp)))
+			/* maybe there were no idle threads when some work
+			 * became ready and so nothing was woken.  We've just
+			 * become idle so someone can to the work - maybe us.
+			 * But we cannot reliably remove ourselves from the
+			 * idle list - we can only remove the first task which
+			 * might be us, and might not.
+			 * So remove and wake it, then schedule().  If it was
+			 * us, we won't sleep.  If it is some other thread, they
+			 * will do the work.
+			 */
+			svc_pool_wake_idle_thread(pool);
 
-		/* Need to check should_sleep() again after
-		 * setting task state in case a wakeup happened
-		 * between testing and setting.
+		/* We mustn't continue while on the idle list, and we
+		 * cannot remove outselves reliably.  The only "work"
+		 * we can do while on the idle list is to freeze.
+		 * So loop until someone removes us
 		 */
-		if (rqst_should_sleep(rqstp)) {
+		while (!svc_thread_busy(rqstp)) {
 			schedule();
-		} else {
-			__set_current_state(TASK_RUNNING);
-			cond_resched();
-		}
-
-		/* We *must* be removed from the list before we can continue.
-		 * If we were woken, this is already done
-		 */
-		if (!svc_thread_busy(rqstp)) {
-			spin_lock_bh(&pool->sp_lock);
-			list_del_init(&rqstp->rq_idle);
-			spin_unlock_bh(&pool->sp_lock);
+			set_current_state(TASK_IDLE | TASK_FREEZABLE);
 		}
+		__set_current_state(TASK_RUNNING);
 	} else {
 		cond_resched();
 	}
@@ -870,9 +868,10 @@ void svc_recv(struct svc_rqst *rqstp)
 		struct svc_xprt *xprt = rqstp->rq_xprt;
 
 		/* Normally we will wait up to 5 seconds for any required
-		 * cache information to be provided.
+		 * cache information to be provided.  When there are no
+		 * idle threads, we reduce the wait time.
 		 */
-		if (!list_empty(&pool->sp_idle_threads))
+		if (pool->sp_idle_threads.first)
 			rqstp->rq_chandle.thread_wait = 5 * HZ;
 		else
 			rqstp->rq_chandle.thread_wait = 1 * HZ;
-- 
2.40.1


  reply	other threads:[~2023-08-18  1:46 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-18  1:45 [PATCH 0/7] SUNRPC - queuing improvements NeilBrown
2023-08-18  1:45 ` NeilBrown [this message]
2023-08-18 15:53   ` [PATCH 1/7] SUNRPC: change service idle list to be an llist Chuck Lever
2023-08-30  1:52     ` NeilBrown
2023-08-18  1:45 ` [PATCH 2/7] SUNRPC: only have one thread waking up at a time NeilBrown
2023-08-18  1:45 ` [PATCH 3/7] SUNRPC/svc: add light-weight queuing mechanism NeilBrown
2023-08-18  1:45 ` [PATCH 4/7] SUNRPC: use lwq for sp_sockets - renamed to sp_xprts NeilBrown
2023-08-18  1:45 ` [PATCH 5/7] SUNRPC: change sp_nrthreads to atomic_t NeilBrown
2023-08-18  1:45 ` [PATCH 6/7] SUNRPC: discard sp_lock NeilBrown
2023-08-18  1:45 ` [PATCH 7/7] SUNRPC: change the back-channel queue to lwq NeilBrown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230818014512.26880-2-neilb@suse.de \
    --to=neilb@suse.de \
    --cc=chuck.lever@oracle.com \
    --cc=jlayton@kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox