public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Oleg Nesterov <oleg@redhat.com>
To: Boqun Feng <boqun.feng@gmail.com>, Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Rik van Riel <riel@surriel.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Waiman Long <longman@redhat.com>, Will Deacon <will@kernel.org>
Cc: Alexey Gladkov <legion@kernel.org>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	linux-kernel@vger.kernel.org
Subject: [PATCH 5/5] time,signal: turn signal_struct.stats_lock into seqcount_rwlock_t
Date: Wed, 13 Sep 2023 17:50:09 +0200	[thread overview]
Message-ID: <20230913155009.GA26255@redhat.com> (raw)
In-Reply-To: <20230913154907.GA26210@redhat.com>

This way thread_group_cputime() doesn't exclude other readers on the
2nd pass.

thread_group_cputime() still needs to disable irqs because stats_lock
nests inside siglock. But once we change the getrusage()-like users to
rely on stats_lock we can remove this dependency, and after that there
will be no need for _irqsave.

And IIUC, this is the bugfix for CONFIG_PREEMPT_RT? Before this patch
read_seqbegin_or_lock() can spin in __read_seqcount_begin() while the
write_seqlock(stats_lock) section was preempted.

While at it, change the main loop to use __for_each_thread(sig, t).

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/sched/signal.h |  4 +++-
 kernel/exit.c                | 12 ++++++++----
 kernel/fork.c                |  3 ++-
 kernel/sched/cputime.c       | 10 ++++++----
 4 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index d7fa3ca2fa53..c7c0928b877d 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -182,7 +182,9 @@ struct signal_struct {
 	 * Live threads maintain their own counters and add to these
 	 * in __exit_signal, except for the group leader.
 	 */
-	seqlock_t stats_lock;
+	rwlock_t stats_lock;
+	seqcount_rwlock_t stats_seqc;
+
 	u64 utime, stime, cutime, cstime;
 	u64 gtime;
 	u64 cgtime;
diff --git a/kernel/exit.c b/kernel/exit.c
index f3ba4b97a7d9..8dedb7138f9c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -182,7 +182,8 @@ static void __exit_signal(struct task_struct *tsk)
 	 * see the empty ->thread_head list.
 	 */
 	task_cputime(tsk, &utime, &stime);
-	write_seqlock(&sig->stats_lock);
+	write_lock(&sig->stats_lock);
+	write_seqcount_begin(&sig->stats_seqc);
 	sig->utime += utime;
 	sig->stime += stime;
 	sig->gtime += task_gtime(tsk);
@@ -196,7 +197,8 @@ static void __exit_signal(struct task_struct *tsk)
 	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 	sig->nr_threads--;
 	__unhash_process(tsk, group_dead);
-	write_sequnlock(&sig->stats_lock);
+	write_seqcount_end(&sig->stats_seqc);
+	write_unlock(&sig->stats_lock);
 
 	/*
 	 * Do this under ->siglock, we can race with another thread
@@ -1160,7 +1162,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		 */
 		thread_group_cputime_adjusted(p, &tgutime, &tgstime);
 		spin_lock_irq(&current->sighand->siglock);
-		write_seqlock(&psig->stats_lock);
+		write_lock(&psig->stats_lock);
+		write_seqcount_begin(&psig->stats_seqc);
 		psig->cutime += tgutime + sig->cutime;
 		psig->cstime += tgstime + sig->cstime;
 		psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
@@ -1183,7 +1186,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 			psig->cmaxrss = maxrss;
 		task_io_accounting_add(&psig->ioac, &p->ioac);
 		task_io_accounting_add(&psig->ioac, &sig->ioac);
-		write_sequnlock(&psig->stats_lock);
+		write_seqcount_end(&psig->stats_seqc);
+		write_unlock(&psig->stats_lock);
 		spin_unlock_irq(&current->sighand->siglock);
 	}
 
diff --git a/kernel/fork.c b/kernel/fork.c
index b9d3aa493bbd..bbd5604053f8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1870,7 +1870,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->curr_target = tsk;
 	init_sigpending(&sig->shared_pending);
 	INIT_HLIST_HEAD(&sig->multiprocess);
-	seqlock_init(&sig->stats_lock);
+	rwlock_init(&sig->stats_lock);
+	seqcount_rwlock_init(&sig->stats_seqc, &sig->stats_lock);
 	prev_cputime_init(&sig->prev_cputime);
 
 #ifdef CONFIG_POSIX_TIMERS
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index af7952f12e6c..bd6a85bd2a49 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -333,12 +333,13 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 	nextseq = 0;
 	do {
 		seq = nextseq;
-		flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+		flags = read_seqcount_begin_or_lock_irqsave(&sig->stats_seqc,
+						     &sig->stats_lock, &seq);
 		times->utime = sig->utime;
 		times->stime = sig->stime;
 		times->sum_exec_runtime = sig->sum_sched_runtime;
 
-		for_each_thread(tsk, t) {
+		__for_each_thread(sig, t) {
 			task_cputime(t, &utime, &stime);
 			times->utime += utime;
 			times->stime += stime;
@@ -346,8 +347,9 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 		}
 		/* If lockless access failed, take the lock. */
 		nextseq = 1;
-	} while (need_seqretry(&sig->stats_lock, seq));
-	done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
+	} while (need_seqcount_retry(&sig->stats_seqc, seq));
+	done_seqcount_retry_irqrestore(&sig->stats_seqc, &sig->stats_lock,
+					seq, flags);
 	rcu_read_unlock();
 }
 
-- 
2.25.1.362.g51ebf55


  parent reply	other threads:[~2023-09-13 15:51 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-13 15:49 [PATCH 0/5] turn signal_struct.stats_lock into seqcount_rwlock_t Oleg Nesterov
2023-09-13 15:49 ` [PATCH 1/5] seqlock: simplify SEQCOUNT_LOCKNAME() Oleg Nesterov
2023-09-15 17:36   ` Alexey Gladkov
2023-09-16  8:51   ` Peter Zijlstra
2023-09-21 11:48     ` Oleg Nesterov
2023-09-21 14:04       ` Peter Zijlstra
2023-09-21 14:31         ` Oleg Nesterov
2023-09-13 15:49 ` [PATCH 2/5] seqlock: change __seqprop() to return the function pointer Oleg Nesterov
2023-09-13 17:37   ` kernel test robot
2023-09-13 18:30     ` Oleg Nesterov
2023-09-13 17:59   ` kernel test robot
2023-09-13 19:23   ` kernel test robot
2023-09-13 15:50 ` [PATCH 3/5] seqlock: introduce seqprop_lock/unlock Oleg Nesterov
2023-09-15 18:25   ` Alexey Gladkov
2023-09-15 18:43     ` Oleg Nesterov
2023-09-13 15:50 ` [PATCH 4/5] seqlock: introduce read_seqcount_begin_or_lock() and friends Oleg Nesterov
2023-09-13 15:50 ` Oleg Nesterov [this message]
2023-09-23 12:37   ` [PATCH 5/5] time,signal: turn signal_struct.stats_lock into seqcount_rwlock_t Alexey Gladkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230913155009.GA26255@redhat.com \
    --to=oleg@redhat.com \
    --cc=boqun.feng@gmail.com \
    --cc=ebiederm@xmission.com \
    --cc=legion@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=riel@surriel.com \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox