From: Peter Zijlstra <peterz@infradead.org>
To: Petr Tesarik <ptesarik@suse.cz>
Cc: Frank Mayhar <fmayhar@google.com>,
Christoph Lameter <cl@linux-foundation.org>,
Doug Chapman <doug.chapman@hp.com>,
mingo@elte.hu, roland@redhat.com, adobriyan@gmail.com,
akpm@linux-foundation.org,
linux-kernel <linux-kernel@vger.kernel.org>,
Thomas Gleixner <tglx@linutronix.de>
Subject: Re: regression introduced by - timers: fix itimer/many thread hang
Date: Mon, 24 Nov 2008 17:06:57 +0100 [thread overview]
Message-ID: <1227542817.4259.341.camel@twins> (raw)
In-Reply-To: <1227531589.4259.117.camel@twins>
On Mon, 2008-11-24 at 13:59 +0100, Peter Zijlstra wrote:
> I'll look into
> whipping up a patch removing all the per-cpu crap from there.
Remove the per-cpu-ish-ness from the itimer stuff.
Either we bounce once cacheline per cpu per tick, yielding n^2 bounces
or we just bounce a single..
Also, using per-cpu allocations for the thread-groups complicates the
per-cpu allocator in that its currently aimed to be a fixed sized
allocator and the only possible extention to that would be vmap based,
which is seriously constrained on 32 bit archs.
So making the per-cpu memory requirement depend on the number of
processes is an issue.
Lastly, it didn't deal with cpu-hotplug, although admittedly that might
be fixable.
---
include/linux/init_task.h | 6 ++++
include/linux/sched.h | 29 +++++++++++-------
kernel/fork.c | 15 ++++-----
kernel/posix-cpu-timers.c | 70 ---------------------------------------------
kernel/sched_fair.c | 13 ++++----
kernel/sched_stats.h | 33 +++++++++-----------
6 files changed, 52 insertions(+), 114 deletions(-)
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 23fd890..e9e5313 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -47,6 +47,12 @@ extern struct files_struct init_files;
.posix_timers = LIST_HEAD_INIT(sig.posix_timers), \
.cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \
.rlim = INIT_RLIMITS, \
+ .cputime = { .totals = { \
+ .utime = cputime_zero, \
+ .stime = cputime_zero, \
+ .sum_exec_runtime = 0, \
+ .lock = __SPIN_LOCK_UNLOCKED(sig.cputime.totals.lock), \
+ }, }, \
}
extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e22cb72..6a65f04 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -447,6 +447,7 @@ struct task_cputime {
cputime_t utime;
cputime_t stime;
unsigned long long sum_exec_runtime;
+ spinlock_t lock;
};
/* Alternate field names when used to cache expirations. */
#define prof_exp stime
@@ -462,7 +463,7 @@ struct task_cputime {
* used for thread group CPU clock calculations.
*/
struct thread_group_cputime {
- struct task_cputime *totals;
+ struct task_cputime totals;
};
/*
@@ -2159,24 +2160,30 @@ static inline int spin_needbreak(spinlock_t *lock)
* Thread group CPU time accounting.
*/
-extern int thread_group_cputime_alloc(struct task_struct *);
-extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
-
-static inline void thread_group_cputime_init(struct signal_struct *sig)
+static inline
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
{
- sig->cputime.totals = NULL;
+ struct task_cputime *totals = &tsk->signal->cputime.totals;
+ unsigned long flags;
+
+ spin_lock_irqsave(&totals->lock, flags);
+ *times = *totals;
+ spin_unlock_irqrestore(&totals->lock, flags);
}
-static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
+static inline void thread_group_cputime_init(struct signal_struct *sig)
{
- if (curr->signal->cputime.totals)
- return 0;
- return thread_group_cputime_alloc(curr);
+ sig->cputime.totals = (struct task_cputime){
+ .utime = cputime_zero,
+ .stime = cputime_zero,
+ .sum_exec_runtime = 0,
+ };
+
+ spin_lock_init(&sig->cputime.totals.lock);
}
static inline void thread_group_cputime_free(struct signal_struct *sig)
{
- free_percpu(sig->cputime.totals);
}
/*
diff --git a/kernel/fork.c b/kernel/fork.c
index d183739..218513b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -812,14 +812,15 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
int ret;
if (clone_flags & CLONE_THREAD) {
- ret = thread_group_cputime_clone_thread(current);
- if (likely(!ret)) {
- atomic_inc(¤t->signal->count);
- atomic_inc(¤t->signal->live);
- }
- return ret;
+ atomic_inc(¤t->signal->count);
+ atomic_inc(¤t->signal->live);
+ return 0;
}
sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+
+ if (sig)
+ posix_cpu_timers_init_group(sig);
+
tsk->signal = sig;
if (!sig)
return -ENOMEM;
@@ -862,8 +863,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
task_unlock(current->group_leader);
- posix_cpu_timers_init_group(sig);
-
acct_init_pacct(&sig->pacct);
tty_audit_fork(sig);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 3f4377e..53dafd6 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -10,76 +10,6 @@
#include <linux/kernel_stat.h>
/*
- * Allocate the thread_group_cputime structure appropriately and fill in the
- * current values of the fields. Called from copy_signal() via
- * thread_group_cputime_clone_thread() when adding a second or subsequent
- * thread to a thread group. Assumes interrupts are enabled when called.
- */
-int thread_group_cputime_alloc(struct task_struct *tsk)
-{
- struct signal_struct *sig = tsk->signal;
- struct task_cputime *cputime;
-
- /*
- * If we have multiple threads and we don't already have a
- * per-CPU task_cputime struct (checked in the caller), allocate
- * one and fill it in with the times accumulated so far. We may
- * race with another thread so recheck after we pick up the sighand
- * lock.
- */
- cputime = alloc_percpu(struct task_cputime);
- if (cputime == NULL)
- return -ENOMEM;
- spin_lock_irq(&tsk->sighand->siglock);
- if (sig->cputime.totals) {
- spin_unlock_irq(&tsk->sighand->siglock);
- free_percpu(cputime);
- return 0;
- }
- sig->cputime.totals = cputime;
- cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id());
- cputime->utime = tsk->utime;
- cputime->stime = tsk->stime;
- cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
- spin_unlock_irq(&tsk->sighand->siglock);
- return 0;
-}
-
-/**
- * thread_group_cputime - Sum the thread group time fields across all CPUs.
- *
- * @tsk: The task we use to identify the thread group.
- * @times: task_cputime structure in which we return the summed fields.
- *
- * Walk the list of CPUs to sum the per-CPU time fields in the thread group
- * time structure.
- */
-void thread_group_cputime(
- struct task_struct *tsk,
- struct task_cputime *times)
-{
- struct task_cputime *totals, *tot;
- int i;
-
- totals = tsk->signal->cputime.totals;
- if (!totals) {
- times->utime = tsk->utime;
- times->stime = tsk->stime;
- times->sum_exec_runtime = tsk->se.sum_exec_runtime;
- return;
- }
-
- times->stime = times->utime = cputime_zero;
- times->sum_exec_runtime = 0;
- for_each_possible_cpu(i) {
- tot = per_cpu_ptr(totals, i);
- times->utime = cputime_add(times->utime, tot->utime);
- times->stime = cputime_add(times->stime, tot->stime);
- times->sum_exec_runtime += tot->sum_exec_runtime;
- }
-}
-
-/*
* Called after updating RLIMIT_CPU to set timer expiration if necessary.
*/
void update_rlimit_cpu(unsigned long rlim_new)
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 7dbf72a..ba7714f 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -296,6 +296,7 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
static inline void account_group_user_time(struct task_struct *tsk,
cputime_t cputime)
{
+ struct task_cputime *times;
struct signal_struct *sig;
/* tsk == current, ensure it is safe to use ->signal */
@@ -303,13 +304,11 @@ static inline void account_group_user_time(struct task_struct *tsk,
return;
sig = tsk->signal;
- if (sig->cputime.totals) {
- struct task_cputime *times;
+ times = &sig->cputime.totals;
- times = per_cpu_ptr(sig->cputime.totals, get_cpu());
- times->utime = cputime_add(times->utime, cputime);
- put_cpu_no_resched();
- }
+ spin_lock(×->lock);
+ times->utime = cputime_add(times->utime, cputime);
+ spin_unlock(×->lock);
}
/**
@@ -325,6 +324,7 @@ static inline void account_group_user_time(struct task_struct *tsk,
static inline void account_group_system_time(struct task_struct *tsk,
cputime_t cputime)
{
+ struct task_cputime *times;
struct signal_struct *sig;
/* tsk == current, ensure it is safe to use ->signal */
@@ -332,13 +332,11 @@ static inline void account_group_system_time(struct task_struct *tsk,
return;
sig = tsk->signal;
- if (sig->cputime.totals) {
- struct task_cputime *times;
+ times = &sig->cputime.totals;
- times = per_cpu_ptr(sig->cputime.totals, get_cpu());
- times->stime = cputime_add(times->stime, cputime);
- put_cpu_no_resched();
- }
+ spin_lock(×->lock);
+ times->stime = cputime_add(times->stime, cputime);
+ spin_unlock(×->lock);
}
/**
@@ -354,6 +352,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
static inline void account_group_exec_runtime(struct task_struct *tsk,
unsigned long long ns)
{
+ struct task_cputime *times;
struct signal_struct *sig;
sig = tsk->signal;
@@ -362,11 +361,9 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
if (unlikely(!sig))
return;
- if (sig->cputime.totals) {
- struct task_cputime *times;
+ times = &sig->cputime.totals;
- times = per_cpu_ptr(sig->cputime.totals, get_cpu());
- times->sum_exec_runtime += ns;
- put_cpu_no_resched();
- }
+ spin_lock(×->lock);
+ times->sum_exec_runtime += ns;
+ spin_unlock(×->lock);
}
next prev parent reply other threads:[~2008-11-24 16:07 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <1224694989.8431.23.camel@oberon>
[not found] ` <1225132746.14792.13.camel@bobble.smo.corp.google.com>
[not found] ` <1225219114.24204.37.camel@oberon>
2008-11-06 1:58 ` regression introduced by - timers: fix itimer/many thread hang Frank Mayhar
2008-11-06 11:03 ` Peter Zijlstra
2008-11-06 15:03 ` Christoph Lameter
2008-11-06 15:08 ` Peter Zijlstra
2008-11-06 16:08 ` Christoph Lameter
2008-11-06 23:52 ` Frank Mayhar
2008-11-07 8:35 ` Ingo Molnar
2008-11-07 10:29 ` Peter Zijlstra
2008-11-07 18:10 ` Frank Mayhar
2008-11-07 20:26 ` Peter Zijlstra
2008-11-10 14:38 ` Christoph Lameter
2008-11-10 14:42 ` Peter Zijlstra
2008-11-10 15:41 ` Christoph Lameter
2008-11-10 18:00 ` Frank Mayhar
2008-11-14 2:42 ` Roland McGrath
2008-11-14 16:41 ` Oleg Nesterov
2008-11-17 14:36 ` Oleg Nesterov
2008-11-17 18:16 ` Roland McGrath
2008-11-17 22:18 ` Oleg Nesterov
2008-11-17 21:49 ` Roland McGrath
2008-11-11 0:20 ` Ingo Oeser
2008-11-11 13:58 ` Christoph Lameter
2008-11-21 18:42 ` Petr Tesarik
2008-11-21 19:26 ` Frank Mayhar
2008-11-23 14:24 ` Peter Zijlstra
2008-11-24 8:46 ` Petr Tesarik
2008-11-24 9:33 ` Peter Zijlstra
2008-11-24 12:32 ` Petr Tesarik
2008-11-24 12:59 ` Peter Zijlstra
2008-11-24 16:06 ` Peter Zijlstra [this message]
2008-11-06 16:31 ` [PATCH] revert: " Peter Zijlstra
2008-11-06 21:44 ` Ingo Molnar
2008-11-06 21:53 ` Christoph Lameter
2008-11-07 10:19 ` Peter Zijlstra
2008-11-13 16:00 ` Doug Chapman
2008-11-13 16:08 ` Ingo Molnar
2008-11-14 14:10 ` Doug Chapman
[not found] <20081105191211.c0316b94.akpm@linux-foundation.org>
2008-11-06 12:59 ` regression introduced by - " Oleg Nesterov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1227542817.4259.341.camel@twins \
--to=peterz@infradead.org \
--cc=adobriyan@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=cl@linux-foundation.org \
--cc=doug.chapman@hp.com \
--cc=fmayhar@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=ptesarik@suse.cz \
--cc=roland@redhat.com \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox