From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753527Ab0JARiH (ORCPT ); Fri, 1 Oct 2010 13:38:07 -0400 Received: from mx1.redhat.com ([209.132.183.28]:53781 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751928Ab0JARiG (ORCPT ); Fri, 1 Oct 2010 13:38:06 -0400 From: Glauber Costa To: linux-kernel@vger.kernel.org Cc: avi@redhat.com, Thomas Gleixner , Ingo Molnar , "H. Peter Anvin" , Peter Zijlstra Subject: [PATCH] use a stable clock reference in vdso vgetns Date: Fri, 1 Oct 2010 09:09:56 -0400 Message-Id: <1285938596-2186-1-git-send-email-glommer@redhat.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org When using vdso services for clock_gettime, we test for the ability of a fine-grained measurement through the existance of a vread() function. However, from the time we test it, to the time we use it, vread() reference may not be valid anymore. It happens, for example, when we change the current clocksource from one that provides vread (say tsc) to one that lacks it (say acpi_pm), in the middle of clock_gettime routine. seqlock does not really protect us, since readers here won't stop the writers to change references. The proposed solution is to grab a copy of the clock structure early, and use it as a stable reference onwards. Since we don't free parts of memory associated with old clocksources, we merely stop using it for a while, this is a safe thing to do. Signed-off-by: Glauber Costa CC: Avi Kivity CC: Thomas Gleixner CC: Ingo Molnar CC: H. Peter Anvin CC: Peter Zijlstra --- arch/x86/include/asm/vgtod.h | 2 +- arch/x86/vdso/vclock_gettime.c | 51 +++++++++++++++++++++++++++------------ 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 3d61e20..7f813bc 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -13,7 +13,7 @@ struct vsyscall_gtod_data { int sysctl_enabled; struct timezone sys_tz; - struct { /* extract of a clocksource struct */ + struct vclock { /* extract of a clocksource struct */ cycle_t (*vread)(void); cycle_t cycle_last; cycle_t mask; diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index ee55754..c82ade0 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -34,23 +34,26 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) return ret; } -notrace static inline long vgetns(void) +/* + * The clock structure must be provided from the outside, instead of accessed + * from gtod. The clock can have changed, leading to an invalid vread by the + * time we get here. All other values must also be consistent with vread result + */ +notrace static inline long vgetns(struct vclock *clock) { long v; - cycles_t (*vread)(void); - vread = gtod->clock.vread; - v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask; - return (v * gtod->clock.mult) >> gtod->clock.shift; + v = (clock->vread() - clock->cycle_last) & clock->mask; + return (v * clock->mult) >> clock->shift; } -notrace static noinline int do_realtime(struct timespec *ts) +notrace static noinline int do_realtime(struct timespec *ts, struct vclock *clock) { unsigned long seq, ns; do { seq = read_seqbegin(>od->lock); ts->tv_sec = gtod->wall_time_sec; ts->tv_nsec = gtod->wall_time_nsec; - ns = vgetns(); + ns = vgetns(clock); } while (unlikely(read_seqretry(>od->lock, seq))); timespec_add_ns(ts, ns); return 0; @@ -72,13 +75,13 @@ vset_normalized_timespec(struct timespec *ts, long sec, long nsec) ts->tv_nsec = nsec; } -notrace static noinline int do_monotonic(struct timespec *ts) +notrace static noinline int do_monotonic(struct timespec *ts, struct vclock *clock) { unsigned long seq, ns, secs; do { seq = read_seqbegin(>od->lock); secs = gtod->wall_time_sec; - ns = gtod->wall_time_nsec + vgetns(); + ns = gtod->wall_time_nsec + vgetns(clock); secs += gtod->wall_to_monotonic.tv_sec; ns += gtod->wall_to_monotonic.tv_nsec; } while (unlikely(read_seqretry(>od->lock, seq))); @@ -113,21 +116,29 @@ notrace static noinline int do_monotonic_coarse(struct timespec *ts) notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { - if (likely(gtod->sysctl_enabled)) + if (likely(gtod->sysctl_enabled)) { + struct vclock clk; + unsigned long seq; + do { + seq = read_seqbegin(>od->lock); + memcpy(&clk, >od->clock, sizeof(clk)); + } while (unlikely(read_seqretry(>od->lock, seq))); + switch (clock) { case CLOCK_REALTIME: - if (likely(gtod->clock.vread)) - return do_realtime(ts); + if (likely(clk.vread)) + return do_realtime(ts, &clk); break; case CLOCK_MONOTONIC: - if (likely(gtod->clock.vread)) - return do_monotonic(ts); + if (likely(clk.vread)) + return do_monotonic(ts, &clk); break; case CLOCK_REALTIME_COARSE: return do_realtime_coarse(ts); case CLOCK_MONOTONIC_COARSE: return do_monotonic_coarse(ts); } + } return vdso_fallback_gettime(clock, ts); } int clock_gettime(clockid_t, struct timespec *) @@ -136,12 +147,20 @@ int clock_gettime(clockid_t, struct timespec *) notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) { long ret; - if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { + struct vclock clock; + unsigned long seq; + + do { + seq = read_seqbegin(>od->lock); + memcpy(&clock, >od->clock, sizeof(clock)); + } while (unlikely(read_seqretry(>od->lock, seq))); + + if (likely(gtod->sysctl_enabled && clock.vread)) { if (likely(tv != NULL)) { BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != offsetof(struct timespec, tv_nsec) || sizeof(*tv) != sizeof(struct timespec)); - do_realtime((struct timespec *)tv); + do_realtime((struct timespec *)tv, &clock); tv->tv_usec /= 1000; } if (unlikely(tz != NULL)) { -- 1.7.0.1