* [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C
@ 2017-10-31 18:32 Mark Salyzyn
2017-11-01 22:14 ` Mark Salyzyn
0 siblings, 1 reply; 6+ messages in thread
From: Mark Salyzyn @ 2017-10-31 18:32 UTC (permalink / raw)
To: linux-arm-kernel
Take an effort from the previous 9 patches to recode the arm64 vdso
code from assembler to C previously submitted by
Andrew Pinski <apinski@cavium.com>, rework it for use in both arm and
arm64, overlapping any optimizations for each architecture. But
instead of landing it in arm64, land the result into lib/vdso and
unify both implementations to simplify future maintenance.
apinski at cavium.com makes the following claims in the original patch:
This allows the compiler to optimize the divide by 1000 and remove
the other divides.
On ThunderX, gettimeofday improves by 32%. On ThunderX 2,
gettimeofday improves by 18%.
Note I noticed a bug in the old (arm64) implementation of
__kernel_clock_getres; it was checking only the lower 32bits of the
pointer; this would work for most cases but could fail in a few.
<end of claim>
Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Cc: James Morse <james.morse@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andy Gross <andy.gross@linaro.org>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Andrew Pinski <apinski@cavium.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel at vger.kernel.org
Cc: linux-arm-kernel at lists.infradead.org
v2:
- turned off profiling, restored quiet_cmd_vdsoas.
v3:
- move arch/arm/vdso/vgettimeofday.c to lib/vdso/vgettimeofday.c.
- adjust vgettimeofday.c to be a better global candidate, switch to
using ARCH_PROVIDES_TIMER and __arch_counter_get() as more generic.
v4:
- simplify arch_vdso_read_counter, use read_sysreg.
- Use GENMASK_ULL macro for ARCH_CLOCK_FIXED_MASK.
- update commit message.
- add vdso_wtm_clock_nsec_t, vdso_xtime_clock_sec_t and
vdso_raw_time_nsec_t.
---
arch/arm64/include/asm/vdso_datapage.h | 23 ++-
arch/arm64/kernel/asm-offsets.c | 2 +-
arch/arm64/kernel/vdso.c | 2 +-
arch/arm64/kernel/vdso/Makefile | 16 +-
arch/arm64/kernel/vdso/compiler.h | 67 +++++++
arch/arm64/kernel/vdso/datapage.h | 59 ++++++
arch/arm64/kernel/vdso/gettimeofday.S | 328 ---------------------------------
arch/arm64/kernel/vdso/vgettimeofday.c | 3 +
8 files changed, 160 insertions(+), 340 deletions(-)
create mode 100644 arch/arm64/kernel/vdso/compiler.h
create mode 100644 arch/arm64/kernel/vdso/datapage.h
delete mode 100644 arch/arm64/kernel/vdso/gettimeofday.S
create mode 100644 arch/arm64/kernel/vdso/vgettimeofday.c
diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
index 2b9a63771eda..95f4a7abab80 100644
--- a/arch/arm64/include/asm/vdso_datapage.h
+++ b/arch/arm64/include/asm/vdso_datapage.h
@@ -20,16 +20,31 @@
#ifndef __ASSEMBLY__
+#ifndef _VDSO_WTM_CLOCK_SEC_T
+#define _VDSO_WTM_CLOCK_SEC_T
+typedef __u64 vdso_wtm_clock_nsec_t;
+#endif
+
+#ifndef _VDSO_XTIME_CLOCK_SEC_T
+#define _VDSO_XTIME_CLOCK_SEC_T
+typedef __u64 vdso_xtime_clock_sec_t;
+#endif
+
+#ifndef _VDSO_RAW_TIME_SEC_T
+#define _VDSO_RAW_TIME_SEC_T
+typedef __u64 vdso_raw_time_sec_t;
+#endif
+
struct vdso_data {
__u64 cs_cycle_last; /* Timebase at clocksource init */
- __u64 raw_time_sec; /* Raw time */
+ vdso_raw_time_sec_t raw_time_sec; /* Raw time */
__u64 raw_time_nsec;
- __u64 xtime_clock_sec; /* Kernel time */
- __u64 xtime_clock_nsec;
+ vdso_xtime_clock_sec_t xtime_clock_sec; /* Kernel time */
+ __u64 xtime_clock_snsec;
__u64 xtime_coarse_sec; /* Coarse time */
__u64 xtime_coarse_nsec;
__u64 wtm_clock_sec; /* Wall to monotonic time */
- __u64 wtm_clock_nsec;
+ vdso_wtm_clock_nsec_t wtm_clock_nsec;
__u32 tb_seq_count; /* Timebase sequence counter */
/* cs_* members must be adjacent and in this order (ldp accesses) */
__u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 71bf088f1e4b..db5b305806b7 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -104,7 +104,7 @@ int main(void)
DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
DEFINE(VDSO_RAW_TIME_NSEC, offsetof(struct vdso_data, raw_time_nsec));
DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
- DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec));
+ DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_snsec));
DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 2d419006ad43..59f150c25889 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -238,7 +238,7 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->raw_time_sec = tk->raw_sec;
vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
+ vdso_data->xtime_clock_snsec = tk->tkr_mono.xtime_nsec;
vdso_data->cs_mono_mult = tk->tkr_mono.mult;
vdso_data->cs_raw_mult = tk->tkr_raw.mult;
/* tkr_mono.shift == tkr_raw.shift */
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 62c84f7cb01b..0925f8908dcb 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -5,18 +5,26 @@
# Heavily based on the vDSO Makefiles for other archs.
#
-obj-vdso := gettimeofday.o note.o sigreturn.o
+obj-vdso := vgettimeofday.o note.o sigreturn.o
# Build rules
targets := $(obj-vdso) vdso.so vdso.so.dbg
obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
-ccflags-y := -shared -fno-common -fno-builtin
+ccflags-y := -shared -fno-common -fno-builtin -fno-stack-protector
+ccflags-y += -DDISABLE_BRANCH_PROFILING
ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+# Force -O2 to avoid libgcc dependencies
+CFLAGS_REMOVE_vgettimeofday.o = -pg -Os
+CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
+
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
# Workaround for bare-metal (ELF) toolchains that neglect to pass -shared
# down to collect2, resulting in silent corruption of the vDSO image.
@@ -48,10 +56,6 @@ endef
include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
$(call if_changed,vdsosym)
-# Assembly rules for the .S files
-$(obj-vdso): %.o: %.S FORCE
- $(call if_changed_dep,vdsoas)
-
# Actual build commands
quiet_cmd_vdsold = VDSOL $@
cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@
diff --git a/arch/arm64/kernel/vdso/compiler.h b/arch/arm64/kernel/vdso/compiler.h
new file mode 100644
index 000000000000..fd38451d03ba
--- /dev/null
+++ b/arch/arm64/kernel/vdso/compiler.h
@@ -0,0 +1,67 @@
+/*
+ * Userspace implementations of fallback calls
+ *
+ * Copyright (C) 2017 Cavium, Inc.
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ * Rewriten into C by: Andrew Pinski <apinski@cavium.com>
+ */
+
+#ifndef __VDSO_COMPILER_H
+#define __VDSO_COMPILER_H
+
+#include <asm/processor.h>
+#include <asm/unistd.h>
+#include <linux/compiler.h>
+
+#ifdef CONFIG_ARM_ARCH_TIMER
+#define ARCH_PROVIDES_TIMER
+#endif
+
+#define DEFINE_FALLBACK(name, type_arg1, name_arg1, type_arg2, name_arg2) \
+static notrace long name##_fallback(type_arg1 _##name_arg1, \
+ type_arg2 _##name_arg2) \
+{ \
+ register type_arg1 name_arg1 asm("x0") = _##name_arg1; \
+ register type_arg2 name_arg2 asm("x1") = _##name_arg2; \
+ register long ret asm ("x0"); \
+ register long nr asm("x8") = __NR_##name; \
+ \
+ asm volatile( \
+ " svc #0\n" \
+ : "=r" (ret) \
+ : "r" (name_arg1), "r" (name_arg2), "r" (nr) \
+ : "memory"); \
+ \
+ return ret; \
+}
+
+/*
+ * AArch64 implementation of arch_counter_get_cntvct() suitable for vdso
+ */
+static __always_inline notrace u64 arch_vdso_read_counter(void)
+{
+ /* Read the virtual counter. */
+ isb();
+ return read_sysreg(cntvct_el0);
+}
+
+/* Rename exported vdso functions */
+#define __vdso_clock_gettime __kernel_clock_gettime
+#define __vdso_gettimeofday __kernel_gettimeofday
+#define __vdso_clock_getres __kernel_clock_getres
+
+#endif /* __VDSO_COMPILER_H */
diff --git a/arch/arm64/kernel/vdso/datapage.h b/arch/arm64/kernel/vdso/datapage.h
new file mode 100644
index 000000000000..be86a6074cf8
--- /dev/null
+++ b/arch/arm64/kernel/vdso/datapage.h
@@ -0,0 +1,59 @@
+/*
+ * Userspace implementations of __get_datapage
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __VDSO_DATAPAGE_H
+#define __VDSO_DATAPAGE_H
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <asm/vdso_datapage.h>
+
+/*
+ * We use the hidden visibility to prevent the compiler from generating a GOT
+ * relocation. Not only is going through a GOT useless (the entry couldn't and
+ * mustn't be overridden by another library), it does not even work: the linker
+ * cannot generate an absolute address to the data page.
+ *
+ * With the hidden visibility, the compiler simply generates a PC-relative
+ * relocation (R_ARM_REL32), and this is what we need.
+ */
+extern const struct vdso_data _vdso_data __attribute__((visibility("hidden")));
+
+static inline const struct vdso_data *__get_datapage(void)
+{
+ const struct vdso_data *ret;
+ /*
+ * This simply puts &_vdso_data into ret. The reason why we don't use
+ * `ret = &_vdso_data` is that the compiler tends to optimise this in a
+ * very suboptimal way: instead of keeping &_vdso_data in a register,
+ * it goes through a relocation almost every time _vdso_data must be
+ * accessed (even in subfunctions). This is both time and space
+ * consuming: each relocation uses a word in the code section, and it
+ * has to be loaded at runtime.
+ *
+ * This trick hides the assignment from the compiler. Since it cannot
+ * track where the pointer comes from, it will only use one relocation
+ * where __get_datapage() is called, and then keep the result in a
+ * register.
+ */
+ asm("" : "=r"(ret) : "0"(&_vdso_data));
+ return ret;
+}
+
+/* We can only guarantee 56 bits of precision. */
+#define ARCH_CLOCK_FIXED_MASK GENMASK_ULL(55, 0)
+
+#endif /* __VDSO_DATAPAGE_H */
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
deleted file mode 100644
index 76320e920965..000000000000
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * Userspace implementations of gettimeofday() and friends.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-#define NSEC_PER_SEC_LO16 0xca00
-#define NSEC_PER_SEC_HI16 0x3b9a
-
-vdso_data .req x6
-seqcnt .req w7
-w_tmp .req w8
-x_tmp .req x8
-
-/*
- * Conventions for macro arguments:
- * - An argument is write-only if its name starts with "res".
- * - All other arguments are read-only, unless otherwise specified.
- */
-
- .macro seqcnt_acquire
-9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
- tbnz seqcnt, #0, 9999b
- dmb ishld
- .endm
-
- .macro seqcnt_check fail
- dmb ishld
- ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
- cmp w_tmp, seqcnt
- b.ne \fail
- .endm
-
- .macro syscall_check fail
- ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
- cbnz w_tmp, \fail
- .endm
-
- .macro get_nsec_per_sec res
- mov \res, #NSEC_PER_SEC_LO16
- movk \res, #NSEC_PER_SEC_HI16, lsl #16
- .endm
-
- /*
- * Returns the clock delta, in nanoseconds left-shifted by the clock
- * shift.
- */
- .macro get_clock_shifted_nsec res, cycle_last, mult
- /* Read the virtual counter. */
- isb
- mrs x_tmp, cntvct_el0
- /* Calculate cycle delta and convert to ns. */
- sub \res, x_tmp, \cycle_last
- /* We can only guarantee 56 bits of precision. */
- movn x_tmp, #0xff00, lsl #48
- and \res, x_tmp, \res
- mul \res, \res, \mult
- .endm
-
- /*
- * Returns in res_{sec,nsec} the REALTIME timespec, based on the
- * "wall time" (xtime) and the clock_mono delta.
- */
- .macro get_ts_realtime res_sec, res_nsec, \
- clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
- add \res_nsec, \clock_nsec, \xtime_nsec
- udiv x_tmp, \res_nsec, \nsec_to_sec
- add \res_sec, \xtime_sec, x_tmp
- msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec
- .endm
-
- /*
- * Returns in res_{sec,nsec} the timespec based on the clock_raw delta,
- * used for CLOCK_MONOTONIC_RAW.
- */
- .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec
- udiv \res_sec, \clock_nsec, \nsec_to_sec
- msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec
- .endm
-
- /* sec and nsec are modified in place. */
- .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
- /* Add timespec. */
- add \sec, \sec, \ts_sec
- add \nsec, \nsec, \ts_nsec
-
- /* Normalise the new timespec. */
- cmp \nsec, \nsec_to_sec
- b.lt 9999f
- sub \nsec, \nsec, \nsec_to_sec
- add \sec, \sec, #1
-9999:
- cmp \nsec, #0
- b.ge 9998f
- add \nsec, \nsec, \nsec_to_sec
- sub \sec, \sec, #1
-9998:
- .endm
-
- .macro clock_gettime_return, shift=0
- .if \shift == 1
- lsr x11, x11, x12
- .endif
- stp x10, x11, [x1, #TSPEC_TV_SEC]
- mov x0, xzr
- ret
- .endm
-
- .macro jump_slot jumptable, index, label
- .if (. - \jumptable) != 4 * (\index)
- .error "Jump slot index mismatch"
- .endif
- b \label
- .endm
-
- .text
-
-/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
-ENTRY(__kernel_gettimeofday)
- .cfi_startproc
- adr vdso_data, _vdso_data
- /* If tv is NULL, skip to the timezone code. */
- cbz x0, 2f
-
- /* Compute the time of day. */
-1: seqcnt_acquire
- syscall_check fail=4f
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
- seqcnt_check fail=1b
-
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-
- /* Convert ns to us. */
- mov x13, #1000
- lsl x13, x13, x12
- udiv x11, x11, x13
- stp x10, x11, [x0, #TVAL_TV_SEC]
-2:
- /* If tz is NULL, return 0. */
- cbz x1, 3f
- ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST]
- stp w4, w5, [x1, #TZ_MINWEST]
-3:
- mov x0, xzr
- ret
-4:
- /* Syscall fallback. */
- mov x8, #__NR_gettimeofday
- svc #0
- ret
- .cfi_endproc
-ENDPROC(__kernel_gettimeofday)
-
-#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE
-
-/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
-ENTRY(__kernel_clock_gettime)
- .cfi_startproc
- cmp w0, #JUMPSLOT_MAX
- b.hi syscall
- adr vdso_data, _vdso_data
- adr x_tmp, jumptable
- add x_tmp, x_tmp, w0, uxtw #2
- br x_tmp
-
- ALIGN
-jumptable:
- jump_slot jumptable, CLOCK_REALTIME, realtime
- jump_slot jumptable, CLOCK_MONOTONIC, monotonic
- b syscall
- b syscall
- jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw
- jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
- jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse
-
- .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
- .error "Wrong jumptable size"
- .endif
-
- ALIGN
-realtime:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
- seqcnt_check fail=realtime
-
- /* All computations are done with left-shifted nsecs. */
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-monotonic:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
- ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
- seqcnt_check fail=monotonic
-
- /* All computations are done with left-shifted nsecs. */
- lsl x4, x4, x12
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-
- add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-monotonic_raw:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_raw_mult, w12 = cs_shift */
- ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT]
- ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
- seqcnt_check fail=monotonic_raw
-
- /* All computations are done with left-shifted nsecs. */
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- get_ts_clock_raw res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, nsec_to_sec=x9
-
- add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-realtime_coarse:
- seqcnt_acquire
- ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
- seqcnt_check fail=realtime_coarse
- clock_gettime_return
-
- ALIGN
-monotonic_coarse:
- seqcnt_acquire
- ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
- ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
- seqcnt_check fail=monotonic_coarse
-
- /* Computations are done in (non-shifted) nsecs. */
- get_nsec_per_sec res=x9
- add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
- clock_gettime_return
-
- ALIGN
-syscall: /* Syscall fallback. */
- mov x8, #__NR_clock_gettime
- svc #0
- ret
- .cfi_endproc
-ENDPROC(__kernel_clock_gettime)
-
-/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */
-ENTRY(__kernel_clock_getres)
- .cfi_startproc
- cmp w0, #CLOCK_REALTIME
- ccmp w0, #CLOCK_MONOTONIC, #0x4, ne
- ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
- b.ne 1f
-
- ldr x2, 5f
- b 2f
-1:
- cmp w0, #CLOCK_REALTIME_COARSE
- ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
- b.ne 4f
- ldr x2, 6f
-2:
- cbz w1, 3f
- stp xzr, x2, [x1]
-
-3: /* res == NULL. */
- mov w0, wzr
- ret
-
-4: /* Syscall fallback. */
- mov x8, #__NR_clock_getres
- svc #0
- ret
-5:
- .quad CLOCK_REALTIME_RES
-6:
- .quad CLOCK_COARSE_RES
- .cfi_endproc
-ENDPROC(__kernel_clock_getres)
diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..b73d4011993d
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,3 @@
+#include "compiler.h"
+#include "datapage.h"
+#include "../../../../lib/vdso/vgettimeofday.c"
--
2.15.0.rc2.357.g7e34df9404-goog
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C
2017-10-31 18:32 [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C Mark Salyzyn
@ 2017-11-01 22:14 ` Mark Salyzyn
[not found] ` <61EF90BA-9A6D-4DC4-A1F4-BEAA210D708C@redhat.com>
0 siblings, 1 reply; 6+ messages in thread
From: Mark Salyzyn @ 2017-11-01 22:14 UTC (permalink / raw)
To: linux-arm-kernel
On 10/31/2017 11:32 AM, Mark Salyzyn wrote:
> . . .
> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
> index 62c84f7cb01b..0925f8908dcb 100644
> --- a/arch/arm64/kernel/vdso/Makefile
> +++ b/arch/arm64/kernel/vdso/Makefile
> @@ -5,18 +5,26 @@
> # Heavily based on the vDSO Makefiles for other archs.
> #
>
> -obj-vdso := gettimeofday.o note.o sigreturn.o
> +obj-vdso := vgettimeofday.o note.o sigreturn.o
>
> # Build rules
> targets := $(obj-vdso) vdso.so vdso.so.dbg
> obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
>
> -ccflags-y := -shared -fno-common -fno-builtin
> +ccflags-y := -shared -fno-common -fno-builtin -fno-stack-protector
> +ccflags-y += -DDISABLE_BRANCH_PROFILING
> ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
> $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
>
> +# Force -O2 to avoid libgcc dependencies
> +CFLAGS_REMOVE_vgettimeofday.o = -pg -Os
> +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
> +
This last stanza breaks clang builds. Suggested fix is:
@@ -18,7 +18,11 @@ ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
?# Force -O2 to avoid libgcc dependencies
?CFLAGS_REMOVE_vgettimeofday.o = -pg -Os
+ifeq ($(cc-name),clang)
+CFLAGS_vgettimeofday.o = -O2
+else
?CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
+endif
?# Disable gcov profiling for VDSO code
?GCOV_PROFILE := n
Holding off respin or followup patch until after we get a response from
Jon Masters <jcm@readhat.com> on their QE tests on this patch series.
-- Mark
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C
[not found] ` <61EF90BA-9A6D-4DC4-A1F4-BEAA210D708C@redhat.com>
@ 2017-11-21 18:03 ` Jeffrey Bastian
2017-11-21 21:16 ` Laura Abbott
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Jeffrey Bastian @ 2017-11-21 18:03 UTC (permalink / raw)
To: linux-arm-kernel
On Thu, Nov 02, 2017 at 12:22:36AM -0400, Jon Masters wrote:
> On Nov 2, 2017, at 06:14, Mark Salyzyn <salyzyn@android.com> wrote:
> > Holding off respin or followup patch until after we get a response
> > from Jon Masters <jcm@readhat.com> on their QE tests on this patch
> > series.
>
> Copying Jeff Bastian who owns QE. Jeff, can you recall what test was
> failing when we tried the gettimeofday C rewrite VDSO patches?
The LTP clock_getres01 test was failing when we tried the initial
implementation of gettimeofday-in-C. This would be a good place to
start when testing the new version of the patches.
https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/clock_getres/clock_getres01.c
--
Jeff Bastian
Kernel QE - Hardware Enablement
Red Hat
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C
2017-11-21 18:03 ` Jeffrey Bastian
@ 2017-11-21 21:16 ` Laura Abbott
2017-11-21 21:28 ` Mark Salyzyn
2017-12-15 22:29 ` Jeffrey Bastian
2 siblings, 0 replies; 6+ messages in thread
From: Laura Abbott @ 2017-11-21 21:16 UTC (permalink / raw)
To: linux-arm-kernel
On 11/21/2017 10:03 AM, Jeffrey Bastian wrote:
> On Thu, Nov 02, 2017 at 12:22:36AM -0400, Jon Masters wrote:
>> On Nov 2, 2017, at 06:14, Mark Salyzyn <salyzyn@android.com> wrote:
>>> Holding off respin or followup patch until after we get a response
>>> from Jon Masters <jcm@readhat.com> on their QE tests on this patch
>>> series.
>>
>> Copying Jeff Bastian who owns QE. Jeff, can you recall what test was
>> failing when we tried the gettimeofday C rewrite VDSO patches?
>
> The LTP clock_getres01 test was failing when we tried the initial
> implementation of gettimeofday-in-C. This would be a good place to
> start when testing the new version of the patches.
>
> https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/clock_getres/clock_getres01.c
>
>
FWIW, I just tried this on my Mustang (4.14 + these patches) with
both DT and acpi=force and it passed.
Thanks,
Laura
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C
2017-11-21 18:03 ` Jeffrey Bastian
2017-11-21 21:16 ` Laura Abbott
@ 2017-11-21 21:28 ` Mark Salyzyn
2017-12-15 22:29 ` Jeffrey Bastian
2 siblings, 0 replies; 6+ messages in thread
From: Mark Salyzyn @ 2017-11-21 21:28 UTC (permalink / raw)
To: linux-arm-kernel
On 11/21/2017 10:03 AM, Jeffrey Bastian wrote:
> On Thu, Nov 02, 2017 at 12:22:36AM -0400, Jon Masters wrote:
>> On Nov 2, 2017, at 06:14, Mark Salyzyn <salyzyn@android.com> wrote:
>>> Holding off respin or followup patch until after we get a response
>>> from Jon Masters <jcm@readhat.com> on their QE tests on this patch
>>> series.
>> Copying Jeff Bastian who owns QE. Jeff, can you recall what test was
>> failing when we tried the gettimeofday C rewrite VDSO patches?
> The LTP clock_getres01 test was failing when we tried the initial
> implementation of gettimeofday-in-C. This would be a good place to
> start when testing the new version of the patches.
>
> https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/clock_getres/clock_getres01.c
>
>
Thanks.
I ran the test (32 and 64 bit) on my latest work (but back-ported to
4.9) and it passes. I did not change __vdso_clock_getres or the fallback
(syscall) handler in the patch series though, so the failure has me
wondering. Could you re-run the test with latest to be sure, and I will
look into setting up to test with a ToT kernel (after the thanksgiving
break).
Using bionic libc, with advance modifications to utilize
__vdso_clock_getres; required in order to support the test.
NB: in AOSP _today_ bionic does _not_ propagate the errno correctly on
the __vdso_clock_gettime vdso path and will understandably fail the
similar tests. The public rubber stamp of __vdso_clock_gettime code to
support __vdso_clock_getres will not work. errno propagation issue will
be fixed in bionic.
-- Mark
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C
2017-11-21 18:03 ` Jeffrey Bastian
2017-11-21 21:16 ` Laura Abbott
2017-11-21 21:28 ` Mark Salyzyn
@ 2017-12-15 22:29 ` Jeffrey Bastian
2 siblings, 0 replies; 6+ messages in thread
From: Jeffrey Bastian @ 2017-12-15 22:29 UTC (permalink / raw)
To: linux-arm-kernel
On Tue, Nov 21, 2017 at 13:28:13 PST, Mark Salyzyn wrote:
> I ran the test (32 and 64 bit) on my latest work (but back-ported to
> 4.9) and it passes. I did not change __vdso_clock_getres or the fallback
> (syscall) handler in the patch series though, so the failure has me
> wondering. Could you re-run the test with latest to be sure, and I will
> look into setting up to test with a ToT kernel (after the thanksgiving
> break).
I applied your patch set to a 4.14 kernel and ran LTP clock_getres01
(64-bit) and it still passed:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[root at localhost ~]# uname -r
4.14.0-vdsogtod.aarch64
[root at localhost ~]# ./ltp-full-20170929/testcases/kernel/syscalls/clock_getres/clock_getres01
tst_test.c:934: INFO: Timeout per run is 0h 05m 00s
clock_getres01.c:79: PASS: clock_getres(REALTIME, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(MONOTONIC, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(PROCESS_CPUTIME_ID, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(THREAD_CPUTIME_ID, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(REALTIME, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(CLOCK_MONOTONIC_RAW, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(CLOCK_REALTIME_COARSE, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(CLOCK_MONOTONIC_COARSE, ...) succeeded
clock_getres01.c:79: PASS: clock_getres(CLOCK_BOOTTIME, ...) succeeded
clock_getres01.c:64: CONF: clock_getres(CLOCK_REALTIME_ALARM, ...) NO SUPPORTED
clock_getres01.c:64: CONF: clock_getres(CLOCK_BOOTTIME_ALARM, ...) NO SUPPORTED
clock_getres01.c:79: PASS: clock_getres(-1, ...) succeeded
Summary:
passed 10
failed 0
skipped 2
warnings 0
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
I also tried another test (see source below) which sleeps for 0.5
seconds and then verifies that approximately 0.5 seconds passed. With
the original gettimeofday-in-C patches, it would often fail with weird
elapsed times like 0.1 seconds or 1.1 seconds:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[root at localhost ~]# ./a.out ; echo $?
1: clock_gettime: tv_sec = 1207, tv_nsec = 6403566
Sleeping 0.5 seconds
2: clock_gettime: tv_sec = 1207, tv_nsec = 16512476
Elapsed: 0.10108910
FAIL: elapsed time is not approximately 0.5 seconds
255
[root at localhost ~]# ./a.out ; echo $?
1: clock_gettime: tv_sec = 1215, tv_nsec = 9784545
Sleeping 0.5 seconds
2: clock_gettime: tv_sec = 1216, tv_nsec = 9898595
Elapsed: 1.114050
FAIL: elapsed time is not approximately 0.5 seconds
255
[root at localhost ~]# ./a.out ; echo $?
1: clock_gettime: tv_sec = 1225, tv_nsec = 13710370
Sleeping 0.5 seconds
2: clock_gettime: tv_sec = 1225, tv_nsec = 13819740
Elapsed: 0.109370
FAIL: elapsed time is not approximately 0.5 seconds
255
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The elapsed time matches the expected 0.5 seconds with the latest patch
version:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[root at localhost ~]# ./a.out ; echo $?
1: clock_gettime: tv_sec = 2077, tv_nsec = 989623095
Sleeping 0.5 seconds
2: clock_gettime: tv_sec = 2078, tv_nsec = 489728070
Elapsed: 0.500104975
PASS
0
[root at localhost ~]# ./a.out ; echo $?
1: clock_gettime: tv_sec = 2079, tv_nsec = 519625035
Sleeping 0.5 seconds
2: clock_gettime: tv_sec = 2080, tv_nsec = 19726365
Elapsed: 0.500101330
PASS
0
[root at localhost ~]# ./a.out ; echo $?
1: clock_gettime: tv_sec = 2080, tv_nsec = 684894725
Sleeping 0.5 seconds
2: clock_gettime: tv_sec = 2081, tv_nsec = 184994605
Elapsed: 0.500099880
PASS
0
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Source for the elapsed time test:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <stdio.h>
#include <time.h>
int main(void)
{
int ret;
clockid_t clk_id = CLOCK_MONOTONIC_RAW;
struct timespec tp1, tp2, elapsed;
struct timespec req, rem;
ret = clock_gettime(clk_id, &tp1);
if (ret) {
perror("ERROR: clock_gettime:");
}
printf("1: clock_gettime: tv_sec = %ld, tv_nsec = %lld\n",
tp1.tv_sec, tp1.tv_nsec);
printf("Sleeping 0.5 seconds\n");
req.tv_sec = 0;
req.tv_nsec = 500000000;
ret = nanosleep(&req, &rem);
if (ret) {
perror("ERROR: nanosleep:");
printf(" Remaining sleep: tv_sec = %ld, tv_nsec = %lld\n",
rem.tv_sec, rem.tv_nsec);
}
ret = clock_gettime(clk_id, &tp2);
if (ret) {
perror("ERROR: clock_gettime:");
}
printf("2: clock_gettime: tv_sec = %ld, tv_nsec = %lld\n",
tp2.tv_sec, tp2.tv_nsec);
if ((tp2.tv_nsec - tp1.tv_nsec) < 0) {
elapsed.tv_sec = tp2.tv_sec - tp1.tv_sec - 1;
elapsed.tv_nsec = 1000000000 + tp2.tv_nsec - tp1.tv_nsec;
} else {
elapsed.tv_sec = tp2.tv_sec - tp1.tv_sec;
elapsed.tv_nsec = tp2.tv_nsec - tp1.tv_nsec;
}
printf("Elapsed: %ld.%lld\n", elapsed.tv_sec, elapsed.tv_nsec);
if (elapsed.tv_sec != 0 ||
elapsed.tv_nsec < 490000000 ||
elapsed.tv_nsec > 510000000) {
printf("FAIL: elapsed time is not approximately 0.5 seconds\n");
return -1;
}
printf("PASS\n");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
Jeff Bastian
Kernel QE - Hardware Enablement
Red Hat
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2017-12-15 22:29 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-10-31 18:32 [PATCH v4 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C Mark Salyzyn
2017-11-01 22:14 ` Mark Salyzyn
[not found] ` <61EF90BA-9A6D-4DC4-A1F4-BEAA210D708C@redhat.com>
2017-11-21 18:03 ` Jeffrey Bastian
2017-11-21 21:16 ` Laura Abbott
2017-11-21 21:28 ` Mark Salyzyn
2017-12-15 22:29 ` Jeffrey Bastian
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).