From: Oleg Nesterov <oleg@redhat.com>
To: Boqun Feng <boqun.feng@gmail.com>, Ingo Molnar <mingo@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Rik van Riel <riel@surriel.com>,
Thomas Gleixner <tglx@linutronix.de>,
Waiman Long <longman@redhat.com>, Will Deacon <will@kernel.org>
Cc: Alexey Gladkov <legion@kernel.org>,
"Eric W. Biederman" <ebiederm@xmission.com>,
linux-kernel@vger.kernel.org
Subject: [PATCH 5/5] time,signal: turn signal_struct.stats_lock into seqcount_rwlock_t
Date: Wed, 13 Sep 2023 17:50:09 +0200 [thread overview]
Message-ID: <20230913155009.GA26255@redhat.com> (raw)
In-Reply-To: <20230913154907.GA26210@redhat.com>
This way thread_group_cputime() doesn't exclude other readers on the
2nd pass.
thread_group_cputime() still needs to disable irqs because stats_lock
nests inside siglock. But once we change the getrusage()-like users to
rely on stats_lock we can remove this dependency, and after that there
will be no need for _irqsave.
And IIUC, this is the bugfix for CONFIG_PREEMPT_RT? Before this patch
read_seqbegin_or_lock() can spin in __read_seqcount_begin() while the
write_seqlock(stats_lock) section was preempted.
While at it, change the main loop to use __for_each_thread(sig, t).
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
include/linux/sched/signal.h | 4 +++-
kernel/exit.c | 12 ++++++++----
kernel/fork.c | 3 ++-
kernel/sched/cputime.c | 10 ++++++----
4 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index d7fa3ca2fa53..c7c0928b877d 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -182,7 +182,9 @@ struct signal_struct {
* Live threads maintain their own counters and add to these
* in __exit_signal, except for the group leader.
*/
- seqlock_t stats_lock;
+ rwlock_t stats_lock;
+ seqcount_rwlock_t stats_seqc;
+
u64 utime, stime, cutime, cstime;
u64 gtime;
u64 cgtime;
diff --git a/kernel/exit.c b/kernel/exit.c
index f3ba4b97a7d9..8dedb7138f9c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -182,7 +182,8 @@ static void __exit_signal(struct task_struct *tsk)
* see the empty ->thread_head list.
*/
task_cputime(tsk, &utime, &stime);
- write_seqlock(&sig->stats_lock);
+ write_lock(&sig->stats_lock);
+ write_seqcount_begin(&sig->stats_seqc);
sig->utime += utime;
sig->stime += stime;
sig->gtime += task_gtime(tsk);
@@ -196,7 +197,8 @@ static void __exit_signal(struct task_struct *tsk)
sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
sig->nr_threads--;
__unhash_process(tsk, group_dead);
- write_sequnlock(&sig->stats_lock);
+ write_seqcount_end(&sig->stats_seqc);
+ write_unlock(&sig->stats_lock);
/*
* Do this under ->siglock, we can race with another thread
@@ -1160,7 +1162,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
*/
thread_group_cputime_adjusted(p, &tgutime, &tgstime);
spin_lock_irq(¤t->sighand->siglock);
- write_seqlock(&psig->stats_lock);
+ write_lock(&psig->stats_lock);
+ write_seqcount_begin(&psig->stats_seqc);
psig->cutime += tgutime + sig->cutime;
psig->cstime += tgstime + sig->cstime;
psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
@@ -1183,7 +1186,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
psig->cmaxrss = maxrss;
task_io_accounting_add(&psig->ioac, &p->ioac);
task_io_accounting_add(&psig->ioac, &sig->ioac);
- write_sequnlock(&psig->stats_lock);
+ write_seqcount_end(&psig->stats_seqc);
+ write_unlock(&psig->stats_lock);
spin_unlock_irq(¤t->sighand->siglock);
}
diff --git a/kernel/fork.c b/kernel/fork.c
index b9d3aa493bbd..bbd5604053f8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1870,7 +1870,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
sig->curr_target = tsk;
init_sigpending(&sig->shared_pending);
INIT_HLIST_HEAD(&sig->multiprocess);
- seqlock_init(&sig->stats_lock);
+ rwlock_init(&sig->stats_lock);
+ seqcount_rwlock_init(&sig->stats_seqc, &sig->stats_lock);
prev_cputime_init(&sig->prev_cputime);
#ifdef CONFIG_POSIX_TIMERS
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index af7952f12e6c..bd6a85bd2a49 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -333,12 +333,13 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
nextseq = 0;
do {
seq = nextseq;
- flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+ flags = read_seqcount_begin_or_lock_irqsave(&sig->stats_seqc,
+ &sig->stats_lock, &seq);
times->utime = sig->utime;
times->stime = sig->stime;
times->sum_exec_runtime = sig->sum_sched_runtime;
- for_each_thread(tsk, t) {
+ __for_each_thread(sig, t) {
task_cputime(t, &utime, &stime);
times->utime += utime;
times->stime += stime;
@@ -346,8 +347,9 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
}
/* If lockless access failed, take the lock. */
nextseq = 1;
- } while (need_seqretry(&sig->stats_lock, seq));
- done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
+ } while (need_seqcount_retry(&sig->stats_seqc, seq));
+ done_seqcount_retry_irqrestore(&sig->stats_seqc, &sig->stats_lock,
+ seq, flags);
rcu_read_unlock();
}
--
2.25.1.362.g51ebf55
next prev parent reply other threads:[~2023-09-13 15:51 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-13 15:49 [PATCH 0/5] turn signal_struct.stats_lock into seqcount_rwlock_t Oleg Nesterov
2023-09-13 15:49 ` [PATCH 1/5] seqlock: simplify SEQCOUNT_LOCKNAME() Oleg Nesterov
2023-09-15 17:36 ` Alexey Gladkov
2023-09-16 8:51 ` Peter Zijlstra
2023-09-21 11:48 ` Oleg Nesterov
2023-09-21 14:04 ` Peter Zijlstra
2023-09-21 14:31 ` Oleg Nesterov
2023-09-13 15:49 ` [PATCH 2/5] seqlock: change __seqprop() to return the function pointer Oleg Nesterov
2023-09-13 17:37 ` kernel test robot
2023-09-13 18:30 ` Oleg Nesterov
2023-09-13 17:59 ` kernel test robot
2023-09-13 19:23 ` kernel test robot
2023-09-13 15:50 ` [PATCH 3/5] seqlock: introduce seqprop_lock/unlock Oleg Nesterov
2023-09-15 18:25 ` Alexey Gladkov
2023-09-15 18:43 ` Oleg Nesterov
2023-09-13 15:50 ` [PATCH 4/5] seqlock: introduce read_seqcount_begin_or_lock() and friends Oleg Nesterov
2023-09-13 15:50 ` Oleg Nesterov [this message]
2023-09-23 12:37 ` [PATCH 5/5] time,signal: turn signal_struct.stats_lock into seqcount_rwlock_t Alexey Gladkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230913155009.GA26255@redhat.com \
--to=oleg@redhat.com \
--cc=boqun.feng@gmail.com \
--cc=ebiederm@xmission.com \
--cc=legion@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=longman@redhat.com \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=riel@surriel.com \
--cc=tglx@linutronix.de \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.