From: Steven Rostedt <rostedt@goodmis.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>,
Linus Torvalds <torvalds@linux-foundation.org>,
Andrew Morton <akpm@linux-foundation.org>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Christoph Hellwig <hch@infradead.org>,
Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>,
Gregory Haskins <ghaskins@novell.com>,
Arnaldo Carvalho de Melo <acme@ghostprotocols.net>,
Thomas Gleixner <tglx@linutronix.de>,
Tim Bird <tim.bird@am.sony.com>, Sam Ravnborg <sam@ravnborg.org>,
"Frank Ch. Eigler" <fche@redhat.com>,
Jan Kiszka <jan.kiszka@siemens.com>,
John Stultz <johnstul@us.ibm.com>,
Arjan van de Ven <arjan@infradead.org>,
Steven Rostedt <srostedt@redhat.com>
Subject: [PATCH 06/23 -v8] handle accurate time keeping over long delays
Date: Wed, 30 Jan 2008 16:04:03 -0500 [thread overview]
Message-ID: <20080130210525.701268662@goodmis.org> (raw)
In-Reply-To: 20080130210357.927754294@goodmis.org
[-- Attachment #1: rt-time-starvation-fix.patch --]
[-- Type: text/plain, Size: 10307 bytes --]
From: John Stultz <johnstul@us.ibm.com>
Handle accurate time even if there's a long delay between
accumulated clock cycles.
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
arch/powerpc/kernel/time.c | 3 +-
arch/x86/kernel/vsyscall_64.c | 5 ++-
include/asm-x86/vgtod.h | 2 -
include/linux/clocksource.h | 58 ++++++++++++++++++++++++++++++++++++++++--
kernel/time/timekeeping.c | 36 +++++++++++++-------------
5 files changed, 82 insertions(+), 22 deletions(-)
Index: linux-mcount.git/arch/x86/kernel/vsyscall_64.c
===================================================================
--- linux-mcount.git.orig/arch/x86/kernel/vsyscall_64.c 2008-01-30 14:47:08.000000000 -0500
+++ linux-mcount.git/arch/x86/kernel/vsyscall_64.c 2008-01-30 14:54:12.000000000 -0500
@@ -86,6 +86,7 @@ void update_vsyscall(struct timespec *wa
vsyscall_gtod_data.clock.mask = clock->mask;
vsyscall_gtod_data.clock.mult = clock->mult;
vsyscall_gtod_data.clock.shift = clock->shift;
+ vsyscall_gtod_data.clock.cycle_accumulated = clock->cycle_accumulated;
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
@@ -121,7 +122,7 @@ static __always_inline long time_syscall
static __always_inline void do_vgettimeofday(struct timeval * tv)
{
- cycle_t now, base, mask, cycle_delta;
+ cycle_t now, base, accumulated, mask, cycle_delta;
unsigned seq;
unsigned long mult, shift, nsec;
cycle_t (*vread)(void);
@@ -135,6 +136,7 @@ static __always_inline void do_vgettimeo
}
now = vread();
base = __vsyscall_gtod_data.clock.cycle_last;
+ accumulated = __vsyscall_gtod_data.clock.cycle_accumulated;
mask = __vsyscall_gtod_data.clock.mask;
mult = __vsyscall_gtod_data.clock.mult;
shift = __vsyscall_gtod_data.clock.shift;
@@ -145,6 +147,7 @@ static __always_inline void do_vgettimeo
/* calculate interval: */
cycle_delta = (now - base) & mask;
+ cycle_delta += accumulated;
/* convert to nsecs: */
nsec += (cycle_delta * mult) >> shift;
Index: linux-mcount.git/include/asm-x86/vgtod.h
===================================================================
--- linux-mcount.git.orig/include/asm-x86/vgtod.h 2008-01-30 14:35:51.000000000 -0500
+++ linux-mcount.git/include/asm-x86/vgtod.h 2008-01-30 14:54:12.000000000 -0500
@@ -15,7 +15,7 @@ struct vsyscall_gtod_data {
struct timezone sys_tz;
struct { /* extract of a clocksource struct */
cycle_t (*vread)(void);
- cycle_t cycle_last;
+ cycle_t cycle_last, cycle_accumulated;
cycle_t mask;
u32 mult;
u32 shift;
Index: linux-mcount.git/include/linux/clocksource.h
===================================================================
--- linux-mcount.git.orig/include/linux/clocksource.h 2008-01-30 14:35:51.000000000 -0500
+++ linux-mcount.git/include/linux/clocksource.h 2008-01-30 14:54:12.000000000 -0500
@@ -50,8 +50,12 @@ struct clocksource;
* @flags: flags describing special properties
* @vread: vsyscall based read
* @resume: resume function for the clocksource, if necessary
+ * @cycle_last: Used internally by timekeeping core, please ignore.
+ * @cycle_accumulated: Used internally by timekeeping core, please ignore.
* @cycle_interval: Used internally by timekeeping core, please ignore.
* @xtime_interval: Used internally by timekeeping core, please ignore.
+ * @xtime_nsec: Used internally by timekeeping core, please ignore.
+ * @error: Used internally by timekeeping core, please ignore.
*/
struct clocksource {
/*
@@ -82,7 +86,10 @@ struct clocksource {
* Keep it in a different cache line to dirty no
* more than one cache line.
*/
- cycle_t cycle_last ____cacheline_aligned_in_smp;
+ struct {
+ cycle_t cycle_last, cycle_accumulated;
+ } ____cacheline_aligned_in_smp;
+
u64 xtime_nsec;
s64 error;
@@ -168,11 +175,44 @@ static inline cycle_t clocksource_read(s
}
/**
+ * clocksource_get_cycles: - Access the clocksource's accumulated cycle value
+ * @cs: pointer to clocksource being read
+ * @now: current cycle value
+ *
+ * Uses the clocksource to return the current cycle_t value.
+ * NOTE!!!: This is different from clocksource_read, because it
+ * returns the accumulated cycle value! Must hold xtime lock!
+ */
+static inline cycle_t
+clocksource_get_cycles(struct clocksource *cs, cycle_t now)
+{
+ cycle_t offset = (now - cs->cycle_last) & cs->mask;
+ offset += cs->cycle_accumulated;
+ return offset;
+}
+
+/**
+ * clocksource_accumulate: - Accumulates clocksource cycles
+ * @cs: pointer to clocksource being read
+ * @now: current cycle value
+ *
+ * Used to avoids clocksource hardware overflow by periodically
+ * accumulating the current cycle delta. Must hold xtime write lock!
+ */
+static inline void clocksource_accumulate(struct clocksource *cs, cycle_t now)
+{
+ cycle_t offset = (now - cs->cycle_last) & cs->mask;
+ cs->cycle_last = now;
+ cs->cycle_accumulated += offset;
+}
+
+/**
* cyc2ns - converts clocksource cycles to nanoseconds
* @cs: Pointer to clocksource
* @cycles: Cycles
*
* Uses the clocksource and ntp ajdustment to convert cycle_ts to nanoseconds.
+ * Must hold xtime lock!
*
* XXX - This could use some mult_lxl_ll() asm optimization
*/
@@ -184,13 +224,27 @@ static inline s64 cyc2ns(struct clocksou
}
/**
+ * ns2cyc - converts nanoseconds to clocksource cycles
+ * @cs: Pointer to clocksource
+ * @nsecs: Nanoseconds
+ */
+static inline cycle_t ns2cyc(struct clocksource *cs, u64 nsecs)
+{
+ cycle_t ret = nsecs << cs->shift;
+
+ do_div(ret, cs->mult + 1);
+
+ return ret;
+}
+
+/**
* clocksource_calculate_interval - Calculates a clocksource interval struct
*
* @c: Pointer to clocksource.
* @length_nsec: Desired interval length in nanoseconds.
*
* Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
- * pair and interval request.
+ * pair and interval request. Must hold xtime_lock!
*
* Unless you're the timekeeping code, you should not be using this!
*/
Index: linux-mcount.git/kernel/time/timekeeping.c
===================================================================
--- linux-mcount.git.orig/kernel/time/timekeeping.c 2008-01-30 14:35:51.000000000 -0500
+++ linux-mcount.git/kernel/time/timekeeping.c 2008-01-30 14:54:12.000000000 -0500
@@ -66,16 +66,10 @@ static struct clocksource *clock; /* poi
*/
static inline s64 __get_nsec_offset(void)
{
- cycle_t cycle_now, cycle_delta;
+ cycle_t cycle_delta;
s64 ns_offset;
- /* read clocksource: */
- cycle_now = clocksource_read(clock);
-
- /* calculate the delta since the last update_wall_time: */
- cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
-
- /* convert to nanoseconds: */
+ cycle_delta = clocksource_get_cycles(clock, clocksource_read(clock));
ns_offset = cyc2ns(clock, cycle_delta);
return ns_offset;
@@ -195,7 +189,7 @@ static void change_clocksource(void)
clock = new;
clock->cycle_last = now;
-
+ clock->cycle_accumulated = 0;
clock->error = 0;
clock->xtime_nsec = 0;
clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
@@ -205,9 +199,15 @@ static void change_clocksource(void)
printk(KERN_INFO "Time: %s clocksource has been installed.\n",
clock->name);
}
+
+void timekeeping_accumulate(void)
+{
+ clocksource_accumulate(clock, clocksource_read(clock));
+}
#else
static inline void change_clocksource(void) { }
static inline s64 __get_nsec_offset(void) { return 0; }
+void timekeeping_accumulate(void) { }
#endif
/**
@@ -302,6 +302,7 @@ static int timekeeping_resume(struct sys
timespec_add_ns(&xtime, timekeeping_suspend_nsecs);
/* re-base the last cycle value */
clock->cycle_last = clocksource_read(clock);
+ clock->cycle_accumulated = 0;
clock->error = 0;
timekeeping_suspended = 0;
write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -448,27 +449,28 @@ static void clocksource_adjust(s64 offse
*/
void update_wall_time(void)
{
- cycle_t offset;
+ cycle_t cycle_now;
/* Make sure we're fully resumed: */
if (unlikely(timekeeping_suspended))
return;
#ifdef CONFIG_GENERIC_TIME
- offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask;
+ cycle_now = clocksource_read(clock);
#else
- offset = clock->cycle_interval;
+ cycle_now = clock->cycle_last + clock->cycle_interval;
#endif
+ clocksource_accumulate(clock, cycle_now);
+
clock->xtime_nsec += (s64)xtime.tv_nsec << clock->shift;
/* normally this loop will run just once, however in the
* case of lost or late ticks, it will accumulate correctly.
*/
- while (offset >= clock->cycle_interval) {
+ while (clock->cycle_accumulated >= clock->cycle_interval) {
/* accumulate one interval */
clock->xtime_nsec += clock->xtime_interval;
- clock->cycle_last += clock->cycle_interval;
- offset -= clock->cycle_interval;
+ clock->cycle_accumulated -= clock->cycle_interval;
if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
@@ -482,13 +484,13 @@ void update_wall_time(void)
}
/* correct the clock when NTP error is too big */
- clocksource_adjust(offset);
+ clocksource_adjust(clock->cycle_accumulated);
/* store full nanoseconds into xtime */
xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift;
clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
- update_xtime_cache(cyc2ns(clock, offset));
+ update_xtime_cache(cyc2ns(clock, clock->cycle_accumulated));
/* check to see if there is a new clocksource to use */
change_clocksource();
Index: linux-mcount.git/arch/powerpc/kernel/time.c
===================================================================
--- linux-mcount.git.orig/arch/powerpc/kernel/time.c 2008-01-30 14:35:51.000000000 -0500
+++ linux-mcount.git/arch/powerpc/kernel/time.c 2008-01-30 14:54:12.000000000 -0500
@@ -773,7 +773,8 @@ void update_vsyscall(struct timespec *wa
stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC;
do_div(stamp_xsec, 1000000000);
stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC;
- update_gtod(clock->cycle_last, stamp_xsec, t2x);
+ update_gtod(clock->cycle_last-clock->cycle_accumulated,
+ stamp_xsec, t2x);
}
void update_vsyscall_tz(void)
--
next prev parent reply other threads:[~2008-01-30 21:15 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-01-30 21:03 [PATCH 00/23 -v8] mcount and latency tracing utility -v8 Steven Rostedt
2008-01-30 21:03 ` [PATCH 01/23 -v8] printk - dont wakeup klogd with interrupts disabled Steven Rostedt
2008-01-30 21:03 ` [PATCH 02/23 -v8] Add basic support for gcc profiler instrumentation Steven Rostedt
2008-01-30 21:04 ` [PATCH 03/23 -v8] Annotate core code that should not be traced Steven Rostedt
2008-01-30 21:04 ` [PATCH 04/23 -v8] x86_64: notrace annotations Steven Rostedt
2008-01-30 21:04 ` [PATCH 05/23 -v8] add notrace annotations to vsyscall Steven Rostedt
2008-01-30 21:04 ` Steven Rostedt [this message]
2008-01-31 12:10 ` [PATCH 06/23 -v8] handle accurate time keeping over long delays Mathieu Desnoyers
2008-01-31 17:24 ` John Stultz
2008-02-01 17:02 ` Mathieu Desnoyers
2008-02-02 7:11 ` Steven Rostedt
2008-02-02 16:30 ` Mathieu Desnoyers
2008-01-30 21:04 ` [PATCH 07/23 -v8] initialize the clock source to jiffies clock Steven Rostedt
2008-01-30 21:04 ` [PATCH 08/23 -v8] add get_monotonic_cycles Steven Rostedt
2008-01-30 21:04 ` [PATCH 09/23 -v8] add notrace annotations to timing events Steven Rostedt
2008-01-30 21:04 ` [PATCH 10/23 -v8] mcount tracer add preempt_enable/disable notrace macros Steven Rostedt
2008-01-30 21:04 ` [PATCH 11/23 -v8] mcount based trace in the form of a header file library Steven Rostedt
2008-01-30 21:04 ` [PATCH 12/23 -v8] Add context switch marker to sched.c Steven Rostedt
2008-01-30 21:04 ` [PATCH 13/23 -v8] Make the task State char-string visible to all Steven Rostedt
2008-01-30 21:04 ` [PATCH 14/23 -v8] Add tracing of context switches Steven Rostedt
2008-02-05 17:04 ` Tim Bird
2008-01-30 21:04 ` [PATCH 15/23 -v8] Generic command line storage Steven Rostedt
2008-01-30 21:04 ` [PATCH 16/23 -v8] trace generic call to schedule switch Steven Rostedt
2008-01-30 21:04 ` [PATCH 17/23 -v8] Add marker in try_to_wake_up Steven Rostedt
2008-01-30 21:04 ` [PATCH 18/23 -v8] mcount tracer for wakeup latency timings Steven Rostedt
2008-01-30 21:04 ` [PATCH 19/23 -v8] Trace irq disabled critical timings Steven Rostedt
2008-01-30 21:04 ` [PATCH 20/23 -v8] trace preempt off " Steven Rostedt
2008-01-30 21:04 ` [PATCH 21/23 -v8] Add markers to various events Steven Rostedt
2008-01-31 12:06 ` Mathieu Desnoyers
2008-02-02 7:00 ` Steven Rostedt
2008-02-02 14:14 ` Mathieu Desnoyers
2008-01-30 21:04 ` [PATCH 22/23 -v8] Add event tracer Steven Rostedt
2008-01-30 21:04 ` [PATCH 23/23 -v8] Critical latency timings histogram Steven Rostedt
2008-02-05 17:11 ` [PATCH 00/23 -v8] mcount and latency tracing utility -v8 Tim Bird
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080130210525.701268662@goodmis.org \
--to=rostedt@goodmis.org \
--cc=a.p.zijlstra@chello.nl \
--cc=acme@ghostprotocols.net \
--cc=akpm@linux-foundation.org \
--cc=arjan@infradead.org \
--cc=fche@redhat.com \
--cc=ghaskins@novell.com \
--cc=hch@infradead.org \
--cc=jan.kiszka@siemens.com \
--cc=johnstul@us.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@polymtl.ca \
--cc=mingo@elte.hu \
--cc=sam@ravnborg.org \
--cc=srostedt@redhat.com \
--cc=tglx@linutronix.de \
--cc=tim.bird@am.sony.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox