* [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
@ 2017-07-20 9:58 Santosh Sivaraj
2017-07-20 13:18 ` Michael Ellerman
` (3 more replies)
0 siblings, 4 replies; 15+ messages in thread
From: Santosh Sivaraj @ 2017-07-20 9:58 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Michael Ellerman, Srikar Dronamraju, Benjamin Herrenschmidt
Current vDSO64 implementation does not have support for coarse
clocks (CLOCK_MONOTONIC_COARSE, CLOCK_REALTIME_COARSE), for which it falls
back to system call. Below is a benchmark of the difference in execution
time with and without vDSO support.
(Non-coarse clocks are also included just for completion)
Without vDSO support:
--------------------
clock-gettime-realtime: syscall: 1547 nsec/call
clock-gettime-realtime: libc: 258 nsec/call
clock-gettime-realtime: vdso: 180 nsec/call
clock-gettime-monotonic: syscall: 1399 nsec/call
clock-gettime-monotonic: libc: 317 nsec/call
clock-gettime-monotonic: vdso: 249 nsec/call
clock-gettime-realtime-coarse: syscall: 1228 nsec/call
clock-gettime-realtime-coarse: libc: 1320 nsec/call
clock-gettime-realtime-coarse: vdso: 1330 nsec/call
clock-gettime-monotonic-coarse: syscall: 1263 nsec/call
clock-gettime-monotonic-coarse: libc: 1368 nsec/call
clock-gettime-monotonic-coarse: vdso: 1258 nsec/call
With vDSO support:
------------------
clock-gettime-realtime: syscall: 1660 nsec/call
clock-gettime-realtime: libc: 251 nsec/call
clock-gettime-realtime: vdso: 180 nsec/call
clock-gettime-monotonic: syscall: 1514 nsec/call
clock-gettime-monotonic: libc: 309 nsec/call
clock-gettime-monotonic: vdso: 239 nsec/call
clock-gettime-realtime-coarse: syscall: 1228 nsec/call
clock-gettime-realtime-coarse: libc: 172 nsec/call
clock-gettime-realtime-coarse: vdso: 101 nsec/call
clock-gettime-monotonic-coarse: syscall: 1347 nsec/call
clock-gettime-monotonic-coarse: libc: 187 nsec/call
clock-gettime-monotonic-coarse: vdso: 125 nsec/call
Used https://github.com/nlynch-mentor/vdsotest.git for the benchmarks.
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
arch/powerpc/include/asm/vdso.h | 1 +
arch/powerpc/kernel/vdso64/Makefile | 2 +-
arch/powerpc/kernel/vdso64/gettime.c | 162 ++++++++++++++++++++++++++++++
arch/powerpc/kernel/vdso64/gettimeofday.S | 82 ++-------------
4 files changed, 174 insertions(+), 73 deletions(-)
create mode 100644 arch/powerpc/kernel/vdso64/gettime.c
diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h
index c53f5f6..721e4cf 100644
--- a/arch/powerpc/include/asm/vdso.h
+++ b/arch/powerpc/include/asm/vdso.h
@@ -23,6 +23,7 @@ extern unsigned long vdso32_sigtramp;
extern unsigned long vdso32_rt_sigtramp;
int vdso_getcpu_init(void);
+struct vdso_data *__get_datapage(void);
#else /* __ASSEMBLY__ */
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 31107bf..8958d87 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -1,6 +1,6 @@
# List of files in the vdso, has to be asm only for now
-obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
+obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o gettime.o
# Build rules
diff --git a/arch/powerpc/kernel/vdso64/gettime.c b/arch/powerpc/kernel/vdso64/gettime.c
new file mode 100644
index 0000000..01f411f
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/gettime.c
@@ -0,0 +1,162 @@
+/*
+ * Userland implementation of gettimeofday() for 64 bits processes in a
+ * ppc64 kernel for use in the vDSO
+ *
+ * Copyright (C) 2017 Santosh Sivaraj (santosh@fossix.org), IBM.
+ *
+ * Originally implemented in assembly by:
+ * Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+#include <asm/time.h>
+
+static notrace void kernel_get_tspec(struct timespec *tp,
+ struct vdso_data *vdata, u32 *wtom_sec,
+ u32 *wtom_nsec)
+{
+ u64 tb;
+ u32 update_count;
+
+ do {
+ /* check for update count & load values */
+ update_count = vdata->tb_update_count;
+
+ /* Get TB, offset it and scale result */
+ tb = mulhdu((get_tb() - vdata->tb_orig_stamp) << 12,
+ vdata->tb_to_xs) + vdata->stamp_sec_fraction;
+ tp->tv_sec = vdata->stamp_xtime.tv_sec;
+ if (wtom_sec)
+ *wtom_sec = vdata->wtom_clock_sec;
+ if (wtom_nsec)
+ *wtom_nsec = vdata->wtom_clock_nsec;
+ } while (update_count != vdata->tb_update_count);
+
+ tp->tv_nsec = ((u64)mulhwu(tb, NSEC_PER_SEC) << 32) >> 32;
+ tp->tv_sec += (tb >> 32);
+}
+
+static notrace int clock_get_realtime(struct timespec *tp,
+ struct vdso_data *vdata)
+{
+ kernel_get_tspec(tp, vdata, NULL, NULL);
+
+ return 0;
+}
+
+static notrace int clock_get_monotonic(struct timespec *tp,
+ struct vdso_data *vdata)
+{
+ __s32 wtom_sec, wtom_nsec;
+ u64 nsec;
+
+ kernel_get_tspec(tp, vdata, &wtom_sec, &wtom_nsec);
+
+ tp->tv_sec += wtom_sec;
+
+ nsec = tp->tv_nsec;
+ tp->tv_nsec = 0;
+ timespec_add_ns(tp, nsec + wtom_nsec);
+
+ return 0;
+}
+
+static notrace int clock_realtime_coarse(struct timespec *tp,
+ struct vdso_data *vdata)
+{
+ u32 update_count;
+
+ do {
+ /* check for update count & load values */
+ update_count = vdata->tb_update_count;
+
+ tp->tv_sec = vdata->stamp_xtime.tv_sec;
+ tp->tv_nsec = vdata->stamp_xtime.tv_nsec;
+ } while (update_count != vdata->tb_update_count);
+
+ return 0;
+}
+
+static notrace int clock_monotonic_coarse(struct timespec *tp,
+ struct vdso_data *vdata)
+{
+ __s32 wtom_sec, wtom_nsec;
+ u64 nsec;
+ u32 update_count;
+
+ do {
+ /* check for update count & load values */
+ update_count = vdata->tb_update_count;
+
+ tp->tv_sec = vdata->stamp_xtime.tv_sec;
+ tp->tv_nsec = vdata->stamp_xtime.tv_nsec;
+ wtom_sec = vdata->wtom_clock_sec;
+ wtom_nsec = vdata->wtom_clock_nsec;
+ } while (update_count != vdata->tb_update_count);
+
+ tp->tv_sec += wtom_sec;
+ nsec = tp->tv_nsec;
+ tp->tv_nsec = 0;
+ timespec_add_ns(tp, nsec + wtom_nsec);
+
+ return 0;
+}
+
+static notrace int gettime_syscall_fallback(clockid_t clk_id,
+ struct timespec *tp)
+{
+ register clockid_t id asm("r3") = clk_id;
+ register struct timespec *t asm("r4") = tp;
+ register int nr asm("r0") = __NR_clock_gettime;
+ register int ret asm("r3");
+
+ asm volatile("sc"
+ : "=r" (ret)
+ : "r"(nr), "r"(id), "r"(t)
+ : "memory");
+
+ return ret;
+}
+
+notrace int kernel_clock_gettime(clockid_t clk_id, struct timespec *tp)
+{
+ int ret;
+ struct vdso_data *vdata = __get_datapage();
+
+ if (!tp || !vdata)
+ return -EBADR;
+
+ switch (clk_id) {
+ case CLOCK_REALTIME:
+ ret = clock_get_realtime(tp, vdata);
+ break;
+ case CLOCK_MONOTONIC:
+ ret = clock_get_monotonic(tp, vdata);
+ break;
+ case CLOCK_REALTIME_COARSE:
+ ret = clock_realtime_coarse(tp, vdata);
+ break;
+ case CLOCK_MONOTONIC_COARSE:
+ ret = clock_monotonic_coarse(tp, vdata);
+ break;
+ default:
+ /* fallback to syscall */
+ ret = -1;
+ break;
+ }
+
+ if (ret)
+ ret = gettime_syscall_fallback(clk_id, tp);
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 3820213..1258009 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -16,6 +16,8 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
+.global kernel_clock_gettime
+
.text
/*
* Exact prototype of gettimeofday
@@ -51,85 +53,21 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
.cfi_endproc
V_FUNCTION_END(__kernel_gettimeofday)
-
-/*
- * Exact prototype of clock_gettime()
- *
- * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
- *
- */
V_FUNCTION_BEGIN(__kernel_clock_gettime)
.cfi_startproc
- /* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
-
- mflr r12 /* r12 saves lr */
- .cfi_register lr,r12
- mr r11,r4 /* r11 saves tp */
- bl V_LOCAL_FUNC(__get_datapage) /* get data page */
- lis r7,NSEC_PER_SEC@h /* want nanoseconds */
- ori r7,r7,NSEC_PER_SEC@l
-50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
- bne cr1,80f /* if not monotonic, all done */
-
- /*
- * CLOCK_MONOTONIC
- */
-
- /* now we must fixup using wall to monotonic. We need to snapshot
- * that value and do the counter trick again. Fortunately, we still
- * have the counter value in r8 that was returned by __do_get_tspec.
- * At this point, r4,r5 contain our sec/nsec values.
- */
-
- lwa r6,WTOM_CLOCK_SEC(r3)
- lwa r9,WTOM_CLOCK_NSEC(r3)
-
- /* We now have our result in r6,r9. We create a fake dependency
- * on that result and re-check the counter
- */
- or r0,r6,r9
- xor r0,r0,r0
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld cr0,r0,r8 /* check if updated */
- bne- 50b
-
- /* Add wall->monotonic offset and check for overflow or underflow.
- */
- add r4,r4,r6
- add r5,r5,r9
- cmpd cr0,r5,r7
- cmpdi cr1,r5,0
- blt 1f
- subf r5,r7,r5
- addi r4,r4,1
-1: bge cr1,80f
- addi r4,r4,-1
- add r5,r5,r7
-
-80: std r4,TSPC64_TV_SEC(r11)
- std r5,TSPC64_TV_NSEC(r11)
-
- mtlr r12
+ mflr r6 /* r12 saves lr */
+ stwu r1,-112(r1)
+ .cfi_register lr,r6
+ std r6,24(r1)
+ bl V_LOCAL_FUNC(kernel_clock_gettime)
crclr cr0*4+so
- li r3,0
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_gettime
- sc
+ ld r6,24(r1)
+ addi r1,r1,112
+ mtlr r6
blr
.cfi_endproc
V_FUNCTION_END(__kernel_clock_gettime)
-
/*
* Exact prototype of clock_getres()
*
--
2.9.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2017-07-20 9:58 [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE Santosh Sivaraj
@ 2017-07-20 13:18 ` Michael Ellerman
2017-07-20 21:17 ` Benjamin Herrenschmidt
` (2 more replies)
2017-07-21 9:09 ` [PATCH v2] " Santosh Sivaraj
` (2 subsequent siblings)
3 siblings, 3 replies; 15+ messages in thread
From: Michael Ellerman @ 2017-07-20 13:18 UTC (permalink / raw)
To: Santosh Sivaraj, linuxppc-dev; +Cc: Srikar Dronamraju, Benjamin Herrenschmidt
Santosh Sivaraj <santosh@fossix.org> writes:
> Current vDSO64 implementation does not have support for coarse
> clocks (CLOCK_MONOTONIC_COARSE, CLOCK_REALTIME_COARSE), for which it falls
> back to system call. Below is a benchmark of the difference in execution
> time with and without vDSO support.
Hi Santosh,
Great patch! Always good to see asm replaced with C.
> diff --git a/arch/powerpc/kernel/vdso64/gettime.c b/arch/powerpc/kernel/vdso64/gettime.c
> new file mode 100644
> index 0000000..01f411f
> --- /dev/null
> +++ b/arch/powerpc/kernel/vdso64/gettime.c
> @@ -0,0 +1,162 @@
...
> +static notrace int gettime_syscall_fallback(clockid_t clk_id,
> + struct timespec *tp)
> +{
> + register clockid_t id asm("r3") = clk_id;
> + register struct timespec *t asm("r4") = tp;
> + register int nr asm("r0") = __NR_clock_gettime;
> + register int ret asm("r3");
I guess this works. I've always been a bit nervous about register
variables TBH.
> + asm volatile("sc"
> + : "=r" (ret)
> + : "r"(nr), "r"(id), "r"(t)
> + : "memory");
Not sure we need the memory clobber?
It can clobber more registers than that though.
See: Documentation/powerpc/syscall64-abi.txt
> diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
> index 3820213..1258009 100644
> --- a/arch/powerpc/kernel/vdso64/gettimeofday.S
> +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
> @@ -51,85 +53,21 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
...
> + stwu r1,-112(r1)
> + .cfi_register lr,r6
> + std r6,24(r1)
> + bl V_LOCAL_FUNC(kernel_clock_gettime)
> crclr cr0*4+so
Clearing CR0[SO] says that the syscall always succeeded.
What happens if you call this with a completely bogus clock id?
I think the solution is probably to do the syscall fallback in asm, and
everything else in C.
cheers
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2017-07-20 13:18 ` Michael Ellerman
@ 2017-07-20 21:17 ` Benjamin Herrenschmidt
2017-07-20 22:03 ` Segher Boessenkool
2017-07-21 3:05 ` Anton Blanchard
2017-07-21 4:40 ` Santosh Sivaraj
2 siblings, 1 reply; 15+ messages in thread
From: Benjamin Herrenschmidt @ 2017-07-20 21:17 UTC (permalink / raw)
To: Michael Ellerman, Santosh Sivaraj, linuxppc-dev
Cc: Srikar Dronamraju, Segher Boessenkool
On Thu, 2017-07-20 at 23:18 +1000, Michael Ellerman wrote:
> Santosh Sivaraj <santosh@fossix.org> writes:
>
> > Current vDSO64 implementation does not have support for coarse
> > clocks (CLOCK_MONOTONIC_COARSE, CLOCK_REALTIME_COARSE), for which it falls
> > back to system call. Below is a benchmark of the difference in execution
> > time with and without vDSO support.
>
> Hi Santosh,
>
> Great patch! Always good to see asm replaced with C.
Yeah ewll ... when C becomes some kind of weird glorifed asm like
below, I don't see much of a point ;-)
> > diff --git a/arch/powerpc/kernel/vdso64/gettime.c b/arch/powerpc/kernel/vdso64/gettime.c
> > new file mode 100644
> > index 0000000..01f411f
> > --- /dev/null
> > +++ b/arch/powerpc/kernel/vdso64/gettime.c
> > @@ -0,0 +1,162 @@
>
> ...
> > +static notrace int gettime_syscall_fallback(clockid_t clk_id,
> > + struct timespec *tp)
> > +{
> > + register clockid_t id asm("r3") = clk_id;
> > + register struct timespec *t asm("r4") = tp;
> > + register int nr asm("r0") = __NR_clock_gettime;
> > + register int ret asm("r3");
>
> I guess this works. I've always been a bit nervous about register
> variables TBH.
Does it really work ? That really makes me nervous too, I woudn't do
this without a strong ack from a toolchain person... Segher ?
> > + asm volatile("sc"
> > + : "=r" (ret)
> > + : "r"(nr), "r"(id), "r"(t)
> > + : "memory");
>
> Not sure we need the memory clobber?
>
> It can clobber more registers than that though.
>
> See: Documentation/powerpc/syscall64-abi.txt
>
> > diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
> > index 3820213..1258009 100644
> > --- a/arch/powerpc/kernel/vdso64/gettimeofday.S
> > +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
> > @@ -51,85 +53,21 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>
> ...
> > + stwu r1,-112(r1)
> > + .cfi_register lr,r6
> > + std r6,24(r1)
> > + bl V_LOCAL_FUNC(kernel_clock_gettime)
> > crclr cr0*4+so
>
> Clearing CR0[SO] says that the syscall always succeeded.
>
> What happens if you call this with a completely bogus clock id?
>
> I think the solution is probably to do the syscall fallback in asm, and
> everything else in C.
>
> cheers
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2017-07-20 21:17 ` Benjamin Herrenschmidt
@ 2017-07-20 22:03 ` Segher Boessenkool
0 siblings, 0 replies; 15+ messages in thread
From: Segher Boessenkool @ 2017-07-20 22:03 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: Michael Ellerman, Santosh Sivaraj, linuxppc-dev,
Srikar Dronamraju, Segher Boessenkool
On Fri, Jul 21, 2017 at 07:17:39AM +1000, Benjamin Herrenschmidt wrote:
> > Great patch! Always good to see asm replaced with C.
>
> Yeah ewll ... when C becomes some kind of weird glorifed asm like
> below, I don't see much of a point ;-)
Yeah.
> > > diff --git a/arch/powerpc/kernel/vdso64/gettime.c b/arch/powerpc/kernel/vdso64/gettime.c
> > > new file mode 100644
> > > index 0000000..01f411f
> > > --- /dev/null
> > > +++ b/arch/powerpc/kernel/vdso64/gettime.c
> > > @@ -0,0 +1,162 @@
> >
> > ...
> > > +static notrace int gettime_syscall_fallback(clockid_t clk_id,
> > > + struct timespec *tp)
> > > +{
> > > + register clockid_t id asm("r3") = clk_id;
> > > + register struct timespec *t asm("r4") = tp;
> > > + register int nr asm("r0") = __NR_clock_gettime;
> > > + register int ret asm("r3");
> >
> > I guess this works. I've always been a bit nervous about register
> > variables TBH.
>
> Does it really work ? That really makes me nervous too, I woudn't do
> this without a strong ack from a toolchain person... Segher ?
Local register variables work perfectly well, but only for one thing:
those variables are guaranteed to be in those registers, _as arguments
to an asm_.
> > > + asm volatile("sc"
> > > + : "=r" (ret)
> > > + : "r"(nr), "r"(id), "r"(t)
> > > + : "memory");
> >
> > Not sure we need the memory clobber?
> >
> > It can clobber more registers than that though.
It needs the memory clobber (if the system call accessess any of "your"
memory). You need more register clobbers: also for most CR fields,
CTR, etc.
One trick that often works is doing the system call from within an
assembler function that uses the C ABI, since that has almost the same
calling conventions.
Something as simple as
.globl syscall42
syscall42:
li 0,42
sc
blr
(and yeah handle the CR bit 3 thing somehow)
and declare it as
int syscall42(some_type r3_arg, another_type r4_arg);
Inline asm is good when you want the asm code inlined into the callers,
potentially with arguments optimised etc. The only overhead making the
syscall a function has is that single blr; the only optimisation you
miss is you could potentially load GPR0 a bit earlier (and you can get
a tiny bit more scheduling flexibility).
Segher
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2017-07-20 13:18 ` Michael Ellerman
2017-07-20 21:17 ` Benjamin Herrenschmidt
@ 2017-07-21 3:05 ` Anton Blanchard
2017-07-21 4:40 ` Santosh Sivaraj
2 siblings, 0 replies; 15+ messages in thread
From: Anton Blanchard @ 2017-07-21 3:05 UTC (permalink / raw)
To: Michael Ellerman; +Cc: Santosh Sivaraj, linuxppc-dev, Srikar Dronamraju
Hi,
> > +static notrace int gettime_syscall_fallback(clockid_t clk_id,
> > + struct timespec *tp)
> > +{
> > + register clockid_t id asm("r3") = clk_id;
> > + register struct timespec *t asm("r4") = tp;
> > + register int nr asm("r0") = __NR_clock_gettime;
> > + register int ret asm("r3");
>
> I guess this works. I've always been a bit nervous about register
> variables TBH.
I don't think this works with clang unfortunately.
Anton
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2017-07-20 13:18 ` Michael Ellerman
2017-07-20 21:17 ` Benjamin Herrenschmidt
2017-07-21 3:05 ` Anton Blanchard
@ 2017-07-21 4:40 ` Santosh Sivaraj
2017-07-21 6:05 ` Benjamin Herrenschmidt
2 siblings, 1 reply; 15+ messages in thread
From: Santosh Sivaraj @ 2017-07-21 4:40 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev, Srikar Dronamraju, Benjamin Herrenschmidt
* Michael Ellerman <mpe@ellerman.id.au> wrote (on 2017-07-20 23:18:26 +1000=
):
> Santosh Sivaraj <santosh@fossix.org> writes:
>=20
> > Current vDSO64 implementation does not have support for coarse
> > clocks (CLOCK_MONOTONIC_COARSE, CLOCK_REALTIME_COARSE), for which it fa=
lls
> > back to system call. Below is a benchmark of the difference in execution
> > time with and without vDSO support.
>=20
> Hi Santosh,
>=20
> Great patch! Always good to see asm replaced with C.
>=20
> > diff --git a/arch/powerpc/kernel/vdso64/gettime.c b/arch/powerpc/kernel=
/vdso64/gettime.c
> > new file mode 100644
> > index 0000000..01f411f
> > --- /dev/null
> > +++ b/arch/powerpc/kernel/vdso64/gettime.c
> > @@ -0,0 +1,162 @@
> ...
> > +static notrace int gettime_syscall_fallback(clockid_t clk_id,
> > + struct timespec *tp)
> > +{
> > + register clockid_t id asm("r3") =3D clk_id;
> > + register struct timespec *t asm("r4") =3D tp;
> > + register int nr asm("r0") =3D __NR_clock_gettime;
> > + register int ret asm("r3");
>=20
> I guess this works. I've always been a bit nervous about register
> variables TBH.
Yes, this works. It falls back to syscall in the case of CLOCK_BOOTTIME.
>=20
> > + asm volatile("sc"
> > + : "=3Dr" (ret)
> > + : "r"(nr), "r"(id), "r"(t)
> > + : "memory");
>=20
> Not sure we need the memory clobber?
>=20
> It can clobber more registers than that though.
>=20
> See: Documentation/powerpc/syscall64-abi.txt
>=20
> > diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/k=
ernel/vdso64/gettimeofday.S
> > index 3820213..1258009 100644
> > --- a/arch/powerpc/kernel/vdso64/gettimeofday.S
> > +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
> > @@ -51,85 +53,21 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
> ...
> > + stwu r1,-112(r1)
> > + .cfi_register lr,r6
> > + std r6,24(r1)
> > + bl V_LOCAL_FUNC(kernel_clock_gettime)
> > crclr cr0*4+so
>=20
> Clearing CR0[SO] says that the syscall always succeeded.
>=20
> What happens if you call this with a completely bogus clock id?
>
In case of a bogus clock id, the default case sets 'ret' to -1, which force=
s it
to fallback to the syscall.
> I think the solution is probably to do the syscall fallback in asm, and
> everything else in C.
Ok. That's what Sergey also sugested. I will send a v2.
Thanks,
Santosh
>=20
> cheers
--=20
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2017-07-21 4:40 ` Santosh Sivaraj
@ 2017-07-21 6:05 ` Benjamin Herrenschmidt
0 siblings, 0 replies; 15+ messages in thread
From: Benjamin Herrenschmidt @ 2017-07-21 6:05 UTC (permalink / raw)
To: Santosh Sivaraj, Michael Ellerman; +Cc: linuxppc-dev, Srikar Dronamraju
On Fri, 2017-07-21 at 10:10 +0530, Santosh Sivaraj wrote:
> In case of a bogus clock id, the default case sets 'ret' to -1, which forces it
> to fallback to the syscall.
> > I think the solution is probably to do the syscall fallback in asm, and
> > everything else in C.
>
> Ok. That's what Sergey also sugested. I will send a v2.
Beware that the vDSO has no TOC. You need to be extremely careful that
the generated code by the compiler doesn't try to access/use a TOC.
I would rather continue doing things in asm at this stage.
Ben.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v2] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2017-07-20 9:58 [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE Santosh Sivaraj
2017-07-20 13:18 ` Michael Ellerman
@ 2017-07-21 9:09 ` Santosh Sivaraj
2017-07-25 6:56 ` [PATCH v3] " Santosh Sivaraj
2017-08-11 8:23 ` [PATCH] " Santosh Sivaraj
2017-07-22 19:29 ` kbuild test robot
2017-07-23 15:12 ` kbuild test robot
3 siblings, 2 replies; 15+ messages in thread
From: Santosh Sivaraj @ 2017-07-21 9:09 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Ellerman, Srikar Dronamraju, Anton Blanchard,
Segher Boessenkool, Benjamin Herrenschmidt
Current vDSO64 implementation does not have support for coarse
clocks (CLOCK_MONOTONIC_COARSE, CLOCK_REALTIME_COARSE), for which it falls
back to system call. Below is a benchmark of the difference in execution
time with and without vDSO support.
(Non-coarse clocks are also included just for completion)
Without vDSO support:
--------------------
clock-gettime-realtime: syscall: 1547 nsec/call
clock-gettime-realtime: libc: 258 nsec/call
clock-gettime-realtime: vdso: 180 nsec/call
clock-gettime-monotonic: syscall: 1399 nsec/call
clock-gettime-monotonic: libc: 317 nsec/call
clock-gettime-monotonic: vdso: 249 nsec/call
clock-gettime-realtime-coarse: syscall: 1228 nsec/call
clock-gettime-realtime-coarse: libc: 1320 nsec/call
clock-gettime-realtime-coarse: vdso: 1330 nsec/call
clock-gettime-monotonic-coarse: syscall: 1263 nsec/call
clock-gettime-monotonic-coarse: libc: 1368 nsec/call
clock-gettime-monotonic-coarse: vdso: 1258 nsec/call
With vDSO support:
------------------
clock-gettime-realtime: syscall: 1660 nsec/call
clock-gettime-realtime: libc: 251 nsec/call
clock-gettime-realtime: vdso: 180 nsec/call
clock-gettime-monotonic: syscall: 1514 nsec/call
clock-gettime-monotonic: libc: 309 nsec/call
clock-gettime-monotonic: vdso: 239 nsec/call
clock-gettime-realtime-coarse: syscall: 1228 nsec/call
clock-gettime-realtime-coarse: libc: 172 nsec/call
clock-gettime-realtime-coarse: vdso: 101 nsec/call
clock-gettime-monotonic-coarse: syscall: 1347 nsec/call
clock-gettime-monotonic-coarse: libc: 187 nsec/call
clock-gettime-monotonic-coarse: vdso: 125 nsec/call
Used https://github.com/nlynch-mentor/vdsotest.git for the benchmarks.
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
V2 update: moved syscall fallback to assembly. Tested fallback by removing
REALTIME case from vDSO handler.
clock-gettime-realtime: syscall: 1467 nsec/call
clock-gettime-realtime: libc: 1678 nsec/call
clock-gettime-realtime: vdso: 1615 nsec/call
arch/powerpc/include/asm/vdso.h | 1 +
arch/powerpc/kernel/vdso64/Makefile | 2 +-
arch/powerpc/kernel/vdso64/gettime.c | 143 ++++++++++++++++++++++++++++++
arch/powerpc/kernel/vdso64/gettimeofday.S | 86 ++++--------------
4 files changed, 161 insertions(+), 71 deletions(-)
create mode 100644 arch/powerpc/kernel/vdso64/gettime.c
diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h
index c53f5f6..721e4cf 100644
--- a/arch/powerpc/include/asm/vdso.h
+++ b/arch/powerpc/include/asm/vdso.h
@@ -23,6 +23,7 @@ extern unsigned long vdso32_sigtramp;
extern unsigned long vdso32_rt_sigtramp;
int vdso_getcpu_init(void);
+struct vdso_data *__get_datapage(void);
#else /* __ASSEMBLY__ */
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 31107bf..8958d87 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -1,6 +1,6 @@
# List of files in the vdso, has to be asm only for now
-obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
+obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o gettime.o
# Build rules
diff --git a/arch/powerpc/kernel/vdso64/gettime.c b/arch/powerpc/kernel/vdso64/gettime.c
new file mode 100644
index 0000000..ef8f75c
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/gettime.c
@@ -0,0 +1,143 @@
+/*
+ * Userland implementation of gettimeofday() for 64 bits processes in a
+ * ppc64 kernel for use in the vDSO
+ *
+ * Copyright (C) 2017 Santosh Sivaraj (santosh@fossix.org), IBM.
+ *
+ * Originally implemented in assembly by:
+ * Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+#include <asm/time.h>
+
+static notrace void kernel_get_tspec(struct timespec *tp,
+ struct vdso_data *vdata, u32 *wtom_sec,
+ u32 *wtom_nsec)
+{
+ u64 tb;
+ u32 update_count;
+
+ do {
+ /* check for update count & load values */
+ update_count = vdata->tb_update_count;
+
+ /* Get TB, offset it and scale result */
+ tb = mulhdu((get_tb() - vdata->tb_orig_stamp) << 12,
+ vdata->tb_to_xs) + vdata->stamp_sec_fraction;
+ tp->tv_sec = vdata->stamp_xtime.tv_sec;
+ if (wtom_sec)
+ *wtom_sec = vdata->wtom_clock_sec;
+ if (wtom_nsec)
+ *wtom_nsec = vdata->wtom_clock_nsec;
+ } while (update_count != vdata->tb_update_count);
+
+ tp->tv_nsec = ((u64)mulhwu(tb, NSEC_PER_SEC) << 32) >> 32;
+ tp->tv_sec += (tb >> 32);
+}
+
+static notrace int clock_get_realtime(struct timespec *tp,
+ struct vdso_data *vdata)
+{
+ kernel_get_tspec(tp, vdata, NULL, NULL);
+
+ return 0;
+}
+
+static notrace int clock_get_monotonic(struct timespec *tp,
+ struct vdso_data *vdata)
+{
+ __s32 wtom_sec, wtom_nsec;
+ u64 nsec;
+
+ kernel_get_tspec(tp, vdata, &wtom_sec, &wtom_nsec);
+
+ tp->tv_sec += wtom_sec;
+
+ nsec = tp->tv_nsec;
+ tp->tv_nsec = 0;
+ timespec_add_ns(tp, nsec + wtom_nsec);
+
+ return 0;
+}
+
+static notrace int clock_realtime_coarse(struct timespec *tp,
+ struct vdso_data *vdata)
+{
+ u32 update_count;
+
+ do {
+ /* check for update count & load values */
+ update_count = vdata->tb_update_count;
+
+ tp->tv_sec = vdata->stamp_xtime.tv_sec;
+ tp->tv_nsec = vdata->stamp_xtime.tv_nsec;
+ } while (update_count != vdata->tb_update_count);
+
+ return 0;
+}
+
+static notrace int clock_monotonic_coarse(struct timespec *tp,
+ struct vdso_data *vdata)
+{
+ __s32 wtom_sec, wtom_nsec;
+ u64 nsec;
+ u32 update_count;
+
+ do {
+ /* check for update count & load values */
+ update_count = vdata->tb_update_count;
+
+ tp->tv_sec = vdata->stamp_xtime.tv_sec;
+ tp->tv_nsec = vdata->stamp_xtime.tv_nsec;
+ wtom_sec = vdata->wtom_clock_sec;
+ wtom_nsec = vdata->wtom_clock_nsec;
+ } while (update_count != vdata->tb_update_count);
+
+ tp->tv_sec += wtom_sec;
+ nsec = tp->tv_nsec;
+ tp->tv_nsec = 0;
+ timespec_add_ns(tp, nsec + wtom_nsec);
+
+ return 0;
+}
+
+notrace int kernel_clock_gettime(clockid_t clk_id, struct timespec *tp)
+{
+ int ret;
+ struct vdso_data *vdata = __get_datapage();
+
+ if (!tp || !vdata)
+ return -EBADR;
+
+ switch (clk_id) {
+ case CLOCK_REALTIME:
+ ret = clock_get_realtime(tp, vdata);
+ break;
+ case CLOCK_MONOTONIC:
+ ret = clock_get_monotonic(tp, vdata);
+ break;
+ case CLOCK_REALTIME_COARSE:
+ ret = clock_realtime_coarse(tp, vdata);
+ break;
+ case CLOCK_MONOTONIC_COARSE:
+ ret = clock_monotonic_coarse(tp, vdata);
+ break;
+ default:
+ /* fallback to syscall */
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 3820213..ed5a88b 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -16,6 +16,8 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
+.global kernel_clock_gettime
+
.text
/*
* Exact prototype of gettimeofday
@@ -51,85 +53,29 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
.cfi_endproc
V_FUNCTION_END(__kernel_gettimeofday)
-
-/*
- * Exact prototype of clock_gettime()
- *
- * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
- *
- */
V_FUNCTION_BEGIN(__kernel_clock_gettime)
.cfi_startproc
- /* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
-
- mflr r12 /* r12 saves lr */
- .cfi_register lr,r12
- mr r11,r4 /* r11 saves tp */
- bl V_LOCAL_FUNC(__get_datapage) /* get data page */
- lis r7,NSEC_PER_SEC@h /* want nanoseconds */
- ori r7,r7,NSEC_PER_SEC@l
-50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
- bne cr1,80f /* if not monotonic, all done */
-
- /*
- * CLOCK_MONOTONIC
- */
-
- /* now we must fixup using wall to monotonic. We need to snapshot
- * that value and do the counter trick again. Fortunately, we still
- * have the counter value in r8 that was returned by __do_get_tspec.
- * At this point, r4,r5 contain our sec/nsec values.
- */
-
- lwa r6,WTOM_CLOCK_SEC(r3)
- lwa r9,WTOM_CLOCK_NSEC(r3)
-
- /* We now have our result in r6,r9. We create a fake dependency
- * on that result and re-check the counter
- */
- or r0,r6,r9
- xor r0,r0,r0
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld cr0,r0,r8 /* check if updated */
- bne- 50b
-
- /* Add wall->monotonic offset and check for overflow or underflow.
- */
- add r4,r4,r6
- add r5,r5,r9
- cmpd cr0,r5,r7
- cmpdi cr1,r5,0
- blt 1f
- subf r5,r7,r5
- addi r4,r4,1
-1: bge cr1,80f
- addi r4,r4,-1
- add r5,r5,r7
-
-80: std r4,TSPC64_TV_SEC(r11)
- std r5,TSPC64_TV_NSEC(r11)
-
- mtlr r12
+ mflr r6 /* r12 saves lr */
+ stwu r1,-112(r1)
+ .cfi_register lr,r6
+ std r6,24(r1)
+ std r3,32(r1)
+ std r4,40(r1)
crclr cr0*4+so
- li r3,0
- blr
-
- /*
- * syscall fallback
- */
-99:
+ bl V_LOCAL_FUNC(kernel_clock_gettime)
+ cmpwi r3,0
+ beq 77f
li r0,__NR_clock_gettime
+ ld r3,32(r1)
+ ld r4,40(r1)
sc
+77: ld r6,24(r1)
+ addi r1,r1,112
+ mtlr r6
blr
.cfi_endproc
V_FUNCTION_END(__kernel_clock_gettime)
-
/*
* Exact prototype of clock_getres()
*
--
2.9.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH v3] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2017-07-21 9:09 ` [PATCH v2] " Santosh Sivaraj
@ 2017-07-25 6:56 ` Santosh Sivaraj
2017-07-25 10:07 ` Benjamin Herrenschmidt
2017-08-11 8:23 ` [PATCH] " Santosh Sivaraj
1 sibling, 1 reply; 15+ messages in thread
From: Santosh Sivaraj @ 2017-07-25 6:56 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Ellerman, John Stultz, Thomas Gleixner,
Frederic Weisbecker, Benjamin Herrenschmidt
Current vDSO64 implementation does not have support for coarse clocks
(CLOCK_MONOTONIC_COARSE, CLOCK_REALTIME_COARSE), for which it falls back
to system call, increasing the response time, vDSO implementation reduces
the cycle time. Below is a benchmark of the difference in execution time
with and without vDSO support.
(Non-coarse clocks are also included just for completion)
Without vDSO support:
--------------------
clock-gettime-realtime: syscall: 1547 nsec/call
clock-gettime-realtime: libc: 258 nsec/call
clock-gettime-realtime: vdso: 180 nsec/call
clock-gettime-monotonic: syscall: 1399 nsec/call
clock-gettime-monotonic: libc: 317 nsec/call
clock-gettime-monotonic: vdso: 249 nsec/call
clock-gettime-realtime-coarse: syscall: 1228 nsec/call
clock-gettime-realtime-coarse: libc: 1320 nsec/call
clock-gettime-realtime-coarse: vdso: 1330 nsec/call
clock-gettime-monotonic-coarse: syscall: 1263 nsec/call
clock-gettime-monotonic-coarse: libc: 1368 nsec/call
clock-gettime-monotonic-coarse: vdso: 1258 nsec/call
With vDSO support:
------------------
clock-gettime-realtime: syscall: 1660 nsec/call
clock-gettime-realtime: libc: 251 nsec/call
clock-gettime-realtime: vdso: 180 nsec/call
clock-gettime-monotonic: syscall: 1514 nsec/call
clock-gettime-monotonic: libc: 309 nsec/call
clock-gettime-monotonic: vdso: 239 nsec/call
clock-gettime-realtime-coarse: syscall: 1228 nsec/call
clock-gettime-realtime-coarse: libc: 172 nsec/call
clock-gettime-realtime-coarse: vdso: 101 nsec/call
clock-gettime-monotonic-coarse: syscall: 1347 nsec/call
clock-gettime-monotonic-coarse: libc: 187 nsec/call
clock-gettime-monotonic-coarse: vdso: 125 nsec/call
Used https://github.com/nlynch-mentor/vdsotest.git for the benchmarks.
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
V2 update:
- moved syscall fallback to assembly.
V3 update:
- Restored "exact prototype" comment for __kernel_clock_gettime
- Remove .hidden/.protected directives from __get_datapage to allow it to be called
from C.
arch/powerpc/include/asm/vdso.h | 1 +
arch/powerpc/kernel/vdso64/Makefile | 2 +-
arch/powerpc/kernel/vdso64/datapage.S | 6 --
arch/powerpc/kernel/vdso64/gettime.c | 143 ++++++++++++++++++++++++++++++
arch/powerpc/kernel/vdso64/gettimeofday.S | 78 ++++------------
5 files changed, 161 insertions(+), 69 deletions(-)
create mode 100644 arch/powerpc/kernel/vdso64/gettime.c
diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h
index c53f5f6..721e4cf 100644
--- a/arch/powerpc/include/asm/vdso.h
+++ b/arch/powerpc/include/asm/vdso.h
@@ -23,6 +23,7 @@ extern unsigned long vdso32_sigtramp;
extern unsigned long vdso32_rt_sigtramp;
int vdso_getcpu_init(void);
+struct vdso_data *__get_datapage(void);
#else /* __ASSEMBLY__ */
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 31107bf..8958d87 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -1,6 +1,6 @@
# List of files in the vdso, has to be asm only for now
-obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
+obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o gettime.o
# Build rules
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
index abf17fe..0a2ee63 100644
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -22,12 +22,6 @@ __kernel_datapage_offset:
V_FUNCTION_BEGIN(__get_datapage)
.cfi_startproc
- /* We don't want that exposed or overridable as we want other objects
- * to be able to bl directly to here
- */
- .protected __get_datapage
- .hidden __get_datapage
-
mflr r0
.cfi_register lr,r0
diff --git a/arch/powerpc/kernel/vdso64/gettime.c b/arch/powerpc/kernel/vdso64/gettime.c
new file mode 100644
index 0000000..ef8f75c
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/gettime.c
@@ -0,0 +1,143 @@
+/*
+ * Userland implementation of gettimeofday() for 64 bits processes in a
+ * ppc64 kernel for use in the vDSO
+ *
+ * Copyright (C) 2017 Santosh Sivaraj (santosh@fossix.org), IBM.
+ *
+ * Originally implemented in assembly by:
+ * Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+#include <asm/time.h>
+
+static notrace void kernel_get_tspec(struct timespec *tp,
+ struct vdso_data *vdata, u32 *wtom_sec,
+ u32 *wtom_nsec)
+{
+ u64 tb;
+ u32 update_count;
+
+ do {
+ /* check for update count & load values */
+ update_count = vdata->tb_update_count;
+
+ /* Get TB, offset it and scale result */
+ tb = mulhdu((get_tb() - vdata->tb_orig_stamp) << 12,
+ vdata->tb_to_xs) + vdata->stamp_sec_fraction;
+ tp->tv_sec = vdata->stamp_xtime.tv_sec;
+ if (wtom_sec)
+ *wtom_sec = vdata->wtom_clock_sec;
+ if (wtom_nsec)
+ *wtom_nsec = vdata->wtom_clock_nsec;
+ } while (update_count != vdata->tb_update_count);
+
+ tp->tv_nsec = ((u64)mulhwu(tb, NSEC_PER_SEC) << 32) >> 32;
+ tp->tv_sec += (tb >> 32);
+}
+
+static notrace int clock_get_realtime(struct timespec *tp,
+ struct vdso_data *vdata)
+{
+ kernel_get_tspec(tp, vdata, NULL, NULL);
+
+ return 0;
+}
+
+static notrace int clock_get_monotonic(struct timespec *tp,
+ struct vdso_data *vdata)
+{
+ __s32 wtom_sec, wtom_nsec;
+ u64 nsec;
+
+ kernel_get_tspec(tp, vdata, &wtom_sec, &wtom_nsec);
+
+ tp->tv_sec += wtom_sec;
+
+ nsec = tp->tv_nsec;
+ tp->tv_nsec = 0;
+ timespec_add_ns(tp, nsec + wtom_nsec);
+
+ return 0;
+}
+
+static notrace int clock_realtime_coarse(struct timespec *tp,
+ struct vdso_data *vdata)
+{
+ u32 update_count;
+
+ do {
+ /* check for update count & load values */
+ update_count = vdata->tb_update_count;
+
+ tp->tv_sec = vdata->stamp_xtime.tv_sec;
+ tp->tv_nsec = vdata->stamp_xtime.tv_nsec;
+ } while (update_count != vdata->tb_update_count);
+
+ return 0;
+}
+
+static notrace int clock_monotonic_coarse(struct timespec *tp,
+ struct vdso_data *vdata)
+{
+ __s32 wtom_sec, wtom_nsec;
+ u64 nsec;
+ u32 update_count;
+
+ do {
+ /* check for update count & load values */
+ update_count = vdata->tb_update_count;
+
+ tp->tv_sec = vdata->stamp_xtime.tv_sec;
+ tp->tv_nsec = vdata->stamp_xtime.tv_nsec;
+ wtom_sec = vdata->wtom_clock_sec;
+ wtom_nsec = vdata->wtom_clock_nsec;
+ } while (update_count != vdata->tb_update_count);
+
+ tp->tv_sec += wtom_sec;
+ nsec = tp->tv_nsec;
+ tp->tv_nsec = 0;
+ timespec_add_ns(tp, nsec + wtom_nsec);
+
+ return 0;
+}
+
+notrace int kernel_clock_gettime(clockid_t clk_id, struct timespec *tp)
+{
+ int ret;
+ struct vdso_data *vdata = __get_datapage();
+
+ if (!tp || !vdata)
+ return -EBADR;
+
+ switch (clk_id) {
+ case CLOCK_REALTIME:
+ ret = clock_get_realtime(tp, vdata);
+ break;
+ case CLOCK_MONOTONIC:
+ ret = clock_get_monotonic(tp, vdata);
+ break;
+ case CLOCK_REALTIME_COARSE:
+ ret = clock_realtime_coarse(tp, vdata);
+ break;
+ case CLOCK_MONOTONIC_COARSE:
+ ret = clock_monotonic_coarse(tp, vdata);
+ break;
+ default:
+ /* fallback to syscall */
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 3820213..c3f6b24 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -16,6 +16,8 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
+.global kernel_clock_gettime
+
.text
/*
* Exact prototype of gettimeofday
@@ -60,71 +62,23 @@ V_FUNCTION_END(__kernel_gettimeofday)
*/
V_FUNCTION_BEGIN(__kernel_clock_gettime)
.cfi_startproc
- /* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
-
- mflr r12 /* r12 saves lr */
- .cfi_register lr,r12
- mr r11,r4 /* r11 saves tp */
- bl V_LOCAL_FUNC(__get_datapage) /* get data page */
- lis r7,NSEC_PER_SEC@h /* want nanoseconds */
- ori r7,r7,NSEC_PER_SEC@l
-50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
- bne cr1,80f /* if not monotonic, all done */
-
- /*
- * CLOCK_MONOTONIC
- */
-
- /* now we must fixup using wall to monotonic. We need to snapshot
- * that value and do the counter trick again. Fortunately, we still
- * have the counter value in r8 that was returned by __do_get_tspec.
- * At this point, r4,r5 contain our sec/nsec values.
- */
-
- lwa r6,WTOM_CLOCK_SEC(r3)
- lwa r9,WTOM_CLOCK_NSEC(r3)
-
- /* We now have our result in r6,r9. We create a fake dependency
- * on that result and re-check the counter
- */
- or r0,r6,r9
- xor r0,r0,r0
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld cr0,r0,r8 /* check if updated */
- bne- 50b
-
- /* Add wall->monotonic offset and check for overflow or underflow.
- */
- add r4,r4,r6
- add r5,r5,r9
- cmpd cr0,r5,r7
- cmpdi cr1,r5,0
- blt 1f
- subf r5,r7,r5
- addi r4,r4,1
-1: bge cr1,80f
- addi r4,r4,-1
- add r5,r5,r7
-
-80: std r4,TSPC64_TV_SEC(r11)
- std r5,TSPC64_TV_NSEC(r11)
-
- mtlr r12
+ mflr r6 /* r12 saves lr */
+ stwu r1,-112(r1)
+ .cfi_register lr,r6
+ std r6,24(r1)
+ std r3,32(r1)
+ std r4,40(r1)
crclr cr0*4+so
- li r3,0
- blr
-
- /*
- * syscall fallback
- */
-99:
+ bl V_LOCAL_FUNC(kernel_clock_gettime)
+ cmpwi r3,0
+ beq 77f
li r0,__NR_clock_gettime
+ ld r3,32(r1)
+ ld r4,40(r1)
sc
+77: ld r6,24(r1)
+ addi r1,r1,112
+ mtlr r6
blr
.cfi_endproc
V_FUNCTION_END(__kernel_clock_gettime)
--
2.9.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH v3] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2017-07-25 6:56 ` [PATCH v3] " Santosh Sivaraj
@ 2017-07-25 10:07 ` Benjamin Herrenschmidt
2017-07-25 13:47 ` Santosh Sivaraj
0 siblings, 1 reply; 15+ messages in thread
From: Benjamin Herrenschmidt @ 2017-07-25 10:07 UTC (permalink / raw)
To: Santosh Sivaraj, linuxppc-dev
Cc: Michael Ellerman, John Stultz, Thomas Gleixner,
Frederic Weisbecker
On Tue, 2017-07-25 at 12:26 +0530, Santosh Sivaraj wrote:
> +static notrace void kernel_get_tspec(struct timespec *tp,
> + struct vdso_data *vdata, u32 *wtom_sec,
> + u32 *wtom_nsec)
> +{
> + u64 tb;
> + u32 update_count;
This is broken:
> + do {
> + /* check for update count & load values */
> + update_count = vdata->tb_update_count;
> +
> + /* Get TB, offset it and scale result */
> + tb = mulhdu((get_tb() - vdata->tb_orig_stamp) << 12,
> + vdata->tb_to_xs) + vdata->stamp_sec_fraction;
> + tp->tv_sec = vdata->stamp_xtime.tv_sec;
> + if (wtom_sec)
> + *wtom_sec = vdata->wtom_clock_sec;
> + if (wtom_nsec)
> + *wtom_nsec = vdata->wtom_clock_nsec;
> + } while (update_count != vdata->tb_update_count);
The assembly code is carefuly crafted to create a chain of data
dependencies in order to enforce the ordering in this function,
you completely broke it.
IE. the pointer used to access tb_orig_stamp etc... depends on the
initial update count, and the final read of the update count depends
on all the previously read values (or should), thus ordering those
loads. Withtout that you'll need more expensive lwsync's.
Additionally, you broke another semantic of the seqlock which is
to spin on the first update count if it has an odd value.
The same problem exist in all your other implementations.
I am really NOT a fan of that attempt at converting to C. The code is
hand crafted assembly for a number of reasons, including the above ones
and maximum performance.
As it is, it's deeply broken.
> +
> + tp->tv_nsec = ((u64)mulhwu(tb, NSEC_PER_SEC) << 32) >> 32;
> + tp->tv_sec += (tb >> 32);
> +}
> +
> +static notrace int clock_get_realtime(struct timespec *tp,
> + struct vdso_data *vdata)
> +{
> + kernel_get_tspec(tp, vdata, NULL, NULL);
> +
> + return 0;
> +}
> +
> +static notrace int clock_get_monotonic(struct timespec *tp,
> + struct vdso_data *vdata)
> +{
> + __s32 wtom_sec, wtom_nsec;
> + u64 nsec;
> +
> + kernel_get_tspec(tp, vdata, &wtom_sec, &wtom_nsec);
> +
> + tp->tv_sec += wtom_sec;
> +
> + nsec = tp->tv_nsec;
> + tp->tv_nsec = 0;
> + timespec_add_ns(tp, nsec + wtom_nsec);
> +
> + return 0;
> +}
> +
> +static notrace int clock_realtime_coarse(struct timespec *tp,
> + struct vdso_data *vdata)
> +{
> + u32 update_count;
> +
> + do {
> + /* check for update count & load values */
> + update_count = vdata->tb_update_count;
> +
> + tp->tv_sec = vdata->stamp_xtime.tv_sec;
> + tp->tv_nsec = vdata->stamp_xtime.tv_nsec;
> + } while (update_count != vdata->tb_update_count);
> +
> + return 0;
> +}
> +
> +static notrace int clock_monotonic_coarse(struct timespec *tp,
> + struct vdso_data *vdata)
> +{
> + __s32 wtom_sec, wtom_nsec;
> + u64 nsec;
> + u32 update_count;
> +
> + do {
> + /* check for update count & load values */
> + update_count = vdata->tb_update_count;
> +
> + tp->tv_sec = vdata->stamp_xtime.tv_sec;
> + tp->tv_nsec = vdata->stamp_xtime.tv_nsec;
> + wtom_sec = vdata->wtom_clock_sec;
> + wtom_nsec = vdata->wtom_clock_nsec;
> + } while (update_count != vdata->tb_update_count);
> +
> + tp->tv_sec += wtom_sec;
> + nsec = tp->tv_nsec;
> + tp->tv_nsec = 0;
> + timespec_add_ns(tp, nsec + wtom_nsec);
> +
> + return 0;
> +}
> +
> +notrace int kernel_clock_gettime(clockid_t clk_id, struct timespec *tp)
> +{
> + int ret;
> + struct vdso_data *vdata = __get_datapage();
> +
> + if (!tp || !vdata)
> + return -EBADR;
> +
> + switch (clk_id) {
> + case CLOCK_REALTIME:
> + ret = clock_get_realtime(tp, vdata);
> + break;
> + case CLOCK_MONOTONIC:
> + ret = clock_get_monotonic(tp, vdata);
> + break;
> + case CLOCK_REALTIME_COARSE:
> + ret = clock_realtime_coarse(tp, vdata);
> + break;
> + case CLOCK_MONOTONIC_COARSE:
> + ret = clock_monotonic_coarse(tp, vdata);
> + break;
> + default:
> + /* fallback to syscall */
> + ret = -1;
> + break;
> + }
> +
> + return ret;
> +}
> diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
> index 3820213..c3f6b24 100644
> --- a/arch/powerpc/kernel/vdso64/gettimeofday.S
> +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
> @@ -16,6 +16,8 @@
> #include <asm/asm-offsets.h>
> #include <asm/unistd.h>
>
> +.global kernel_clock_gettime
> +
> .text
> /*
> * Exact prototype of gettimeofday
> @@ -60,71 +62,23 @@ V_FUNCTION_END(__kernel_gettimeofday)
> */
> V_FUNCTION_BEGIN(__kernel_clock_gettime)
> .cfi_startproc
> - /* Check for supported clock IDs */
> - cmpwi cr0,r3,CLOCK_REALTIME
> - cmpwi cr1,r3,CLOCK_MONOTONIC
> - cror cr0*4+eq,cr0*4+eq,cr1*4+eq
> - bne cr0,99f
> -
> - mflr r12 /* r12 saves lr */
> - .cfi_register lr,r12
> - mr r11,r4 /* r11 saves tp */
> - bl V_LOCAL_FUNC(__get_datapage) /* get data page */
> - lis r7,NSEC_PER_SEC@h /* want nanoseconds */
> - ori r7,r7,NSEC_PER_SEC@l
> -50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
> - bne cr1,80f /* if not monotonic, all done */
> -
> - /*
> - * CLOCK_MONOTONIC
> - */
> -
> - /* now we must fixup using wall to monotonic. We need to snapshot
> - * that value and do the counter trick again. Fortunately, we still
> - * have the counter value in r8 that was returned by __do_get_tspec.
> - * At this point, r4,r5 contain our sec/nsec values.
> - */
> -
> - lwa r6,WTOM_CLOCK_SEC(r3)
> - lwa r9,WTOM_CLOCK_NSEC(r3)
> -
> - /* We now have our result in r6,r9. We create a fake dependency
> - * on that result and re-check the counter
> - */
> - or r0,r6,r9
> - xor r0,r0,r0
> - add r3,r3,r0
> - ld r0,CFG_TB_UPDATE_COUNT(r3)
> - cmpld cr0,r0,r8 /* check if updated */
> - bne- 50b
> -
> - /* Add wall->monotonic offset and check for overflow or underflow.
> - */
> - add r4,r4,r6
> - add r5,r5,r9
> - cmpd cr0,r5,r7
> - cmpdi cr1,r5,0
> - blt 1f
> - subf r5,r7,r5
> - addi r4,r4,1
> -1: bge cr1,80f
> - addi r4,r4,-1
> - add r5,r5,r7
> -
> -80: std r4,TSPC64_TV_SEC(r11)
> - std r5,TSPC64_TV_NSEC(r11)
> -
> - mtlr r12
> + mflr r6 /* r12 saves lr */
> + stwu r1,-112(r1)
> + .cfi_register lr,r6
> + std r6,24(r1)
> + std r3,32(r1)
> + std r4,40(r1)
> crclr cr0*4+so
> - li r3,0
> - blr
> -
> - /*
> - * syscall fallback
> - */
> -99:
> + bl V_LOCAL_FUNC(kernel_clock_gettime)
> + cmpwi r3,0
> + beq 77f
> li r0,__NR_clock_gettime
> + ld r3,32(r1)
> + ld r4,40(r1)
> sc
> +77: ld r6,24(r1)
> + addi r1,r1,112
> + mtlr r6
> blr
> .cfi_endproc
> V_FUNCTION_END(__kernel_clock_gettime)
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v3] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2017-07-25 10:07 ` Benjamin Herrenschmidt
@ 2017-07-25 13:47 ` Santosh Sivaraj
2017-07-26 2:02 ` Benjamin Herrenschmidt
0 siblings, 1 reply; 15+ messages in thread
From: Santosh Sivaraj @ 2017-07-25 13:47 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: linuxppc-dev, Michael Ellerman, John Stultz, Thomas Gleixner,
Frederic Weisbecker
* Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote (on 2017-07-25 20:07:28 +1000):
> On Tue, 2017-07-25 at 12:26 +0530, Santosh Sivaraj wrote:
>
> > +static notrace void kernel_get_tspec(struct timespec *tp,
> > + struct vdso_data *vdata, u32 *wtom_sec,
> > + u32 *wtom_nsec)
> > +{
> > + u64 tb;
> > + u32 update_count;
>
> This is broken:
>
> > + do {
> > + /* check for update count & load values */
> > + update_count = vdata->tb_update_count;
> > +
> > + /* Get TB, offset it and scale result */
> > + tb = mulhdu((get_tb() - vdata->tb_orig_stamp) << 12,
> > + vdata->tb_to_xs) + vdata->stamp_sec_fraction;
> > + tp->tv_sec = vdata->stamp_xtime.tv_sec;
> > + if (wtom_sec)
> > + *wtom_sec = vdata->wtom_clock_sec;
> > + if (wtom_nsec)
> > + *wtom_nsec = vdata->wtom_clock_nsec;
> > + } while (update_count != vdata->tb_update_count);
>
> The assembly code is carefuly crafted to create a chain of data
> dependencies in order to enforce the ordering in this function,
> you completely broke it.
>
> IE. the pointer used to access tb_orig_stamp etc... depends on the
> initial update count, and the final read of the update count depends
> on all the previously read values (or should), thus ordering those
> loads. Withtout that you'll need more expensive lwsync's.
>
> Additionally, you broke another semantic of the seqlock which is
> to spin on the first update count if it has an odd value.
>
> The same problem exist in all your other implementations.
>
> I am really NOT a fan of that attempt at converting to C. The code is
> hand crafted assembly for a number of reasons, including the above ones
> and maximum performance.
>
> As it is, it's deeply broken.
I get the point. I looked at the generated assembly a bit closer, the update
count is optimized out. Will send the alternative asm only patch.
Thanks,
Santosh
>
> > +
> > + tp->tv_nsec = ((u64)mulhwu(tb, NSEC_PER_SEC) << 32) >> 32;
> > + tp->tv_sec += (tb >> 32);
> > +}
> > +
> > +static notrace int clock_get_realtime(struct timespec *tp,
> > + struct vdso_data *vdata)
> > +{
> > + kernel_get_tspec(tp, vdata, NULL, NULL);
> > +
> > + return 0;
> > +}
> > +
> > +static notrace int clock_get_monotonic(struct timespec *tp,
> > + struct vdso_data *vdata)
> > +{
> > + __s32 wtom_sec, wtom_nsec;
> > + u64 nsec;
> > +
> > + kernel_get_tspec(tp, vdata, &wtom_sec, &wtom_nsec);
> > +
> > + tp->tv_sec += wtom_sec;
> > +
> > + nsec = tp->tv_nsec;
> > + tp->tv_nsec = 0;
> > + timespec_add_ns(tp, nsec + wtom_nsec);
> > +
> > + return 0;
> > +}
> > +
> > +static notrace int clock_realtime_coarse(struct timespec *tp,
> > + struct vdso_data *vdata)
> > +{
> > + u32 update_count;
> > +
> > + do {
> > + /* check for update count & load values */
> > + update_count = vdata->tb_update_count;
> > +
> > + tp->tv_sec = vdata->stamp_xtime.tv_sec;
> > + tp->tv_nsec = vdata->stamp_xtime.tv_nsec;
> > + } while (update_count != vdata->tb_update_count);
> > +
> > + return 0;
> > +}
> > +
> > +static notrace int clock_monotonic_coarse(struct timespec *tp,
> > + struct vdso_data *vdata)
> > +{
> > + __s32 wtom_sec, wtom_nsec;
> > + u64 nsec;
> > + u32 update_count;
> > +
> > + do {
> > + /* check for update count & load values */
> > + update_count = vdata->tb_update_count;
> > +
> > + tp->tv_sec = vdata->stamp_xtime.tv_sec;
> > + tp->tv_nsec = vdata->stamp_xtime.tv_nsec;
> > + wtom_sec = vdata->wtom_clock_sec;
> > + wtom_nsec = vdata->wtom_clock_nsec;
> > + } while (update_count != vdata->tb_update_count);
> > +
> > + tp->tv_sec += wtom_sec;
> > + nsec = tp->tv_nsec;
> > + tp->tv_nsec = 0;
> > + timespec_add_ns(tp, nsec + wtom_nsec);
> > +
> > + return 0;
> > +}
> > +
> > +notrace int kernel_clock_gettime(clockid_t clk_id, struct timespec *tp)
> > +{
> > + int ret;
> > + struct vdso_data *vdata = __get_datapage();
> > +
> > + if (!tp || !vdata)
> > + return -EBADR;
> > +
> > + switch (clk_id) {
> > + case CLOCK_REALTIME:
> > + ret = clock_get_realtime(tp, vdata);
> > + break;
> > + case CLOCK_MONOTONIC:
> > + ret = clock_get_monotonic(tp, vdata);
> > + break;
> > + case CLOCK_REALTIME_COARSE:
> > + ret = clock_realtime_coarse(tp, vdata);
> > + break;
> > + case CLOCK_MONOTONIC_COARSE:
> > + ret = clock_monotonic_coarse(tp, vdata);
> > + break;
> > + default:
> > + /* fallback to syscall */
> > + ret = -1;
> > + break;
> > + }
> > +
> > + return ret;
> > +}
> > diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
> > index 3820213..c3f6b24 100644
> > --- a/arch/powerpc/kernel/vdso64/gettimeofday.S
> > +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
> > @@ -16,6 +16,8 @@
> > #include <asm/asm-offsets.h>
> > #include <asm/unistd.h>
> >
> > +.global kernel_clock_gettime
> > +
> > .text
> > /*
> > * Exact prototype of gettimeofday
> > @@ -60,71 +62,23 @@ V_FUNCTION_END(__kernel_gettimeofday)
> > */
> > V_FUNCTION_BEGIN(__kernel_clock_gettime)
> > .cfi_startproc
> > - /* Check for supported clock IDs */
> > - cmpwi cr0,r3,CLOCK_REALTIME
> > - cmpwi cr1,r3,CLOCK_MONOTONIC
> > - cror cr0*4+eq,cr0*4+eq,cr1*4+eq
> > - bne cr0,99f
> > -
> > - mflr r12 /* r12 saves lr */
> > - .cfi_register lr,r12
> > - mr r11,r4 /* r11 saves tp */
> > - bl V_LOCAL_FUNC(__get_datapage) /* get data page */
> > - lis r7,NSEC_PER_SEC@h /* want nanoseconds */
> > - ori r7,r7,NSEC_PER_SEC@l
> > -50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
> > - bne cr1,80f /* if not monotonic, all done */
> > -
> > - /*
> > - * CLOCK_MONOTONIC
> > - */
> > -
> > - /* now we must fixup using wall to monotonic. We need to snapshot
> > - * that value and do the counter trick again. Fortunately, we still
> > - * have the counter value in r8 that was returned by __do_get_tspec.
> > - * At this point, r4,r5 contain our sec/nsec values.
> > - */
> > -
> > - lwa r6,WTOM_CLOCK_SEC(r3)
> > - lwa r9,WTOM_CLOCK_NSEC(r3)
> > -
> > - /* We now have our result in r6,r9. We create a fake dependency
> > - * on that result and re-check the counter
> > - */
> > - or r0,r6,r9
> > - xor r0,r0,r0
> > - add r3,r3,r0
> > - ld r0,CFG_TB_UPDATE_COUNT(r3)
> > - cmpld cr0,r0,r8 /* check if updated */
> > - bne- 50b
> > -
> > - /* Add wall->monotonic offset and check for overflow or underflow.
> > - */
> > - add r4,r4,r6
> > - add r5,r5,r9
> > - cmpd cr0,r5,r7
> > - cmpdi cr1,r5,0
> > - blt 1f
> > - subf r5,r7,r5
> > - addi r4,r4,1
> > -1: bge cr1,80f
> > - addi r4,r4,-1
> > - add r5,r5,r7
> > -
> > -80: std r4,TSPC64_TV_SEC(r11)
> > - std r5,TSPC64_TV_NSEC(r11)
> > -
> > - mtlr r12
> > + mflr r6 /* r12 saves lr */
> > + stwu r1,-112(r1)
> > + .cfi_register lr,r6
> > + std r6,24(r1)
> > + std r3,32(r1)
> > + std r4,40(r1)
> > crclr cr0*4+so
> > - li r3,0
> > - blr
> > -
> > - /*
> > - * syscall fallback
> > - */
> > -99:
> > + bl V_LOCAL_FUNC(kernel_clock_gettime)
> > + cmpwi r3,0
> > + beq 77f
> > li r0,__NR_clock_gettime
> > + ld r3,32(r1)
> > + ld r4,40(r1)
> > sc
> > +77: ld r6,24(r1)
> > + addi r1,r1,112
> > + mtlr r6
> > blr
> > .cfi_endproc
> > V_FUNCTION_END(__kernel_clock_gettime)
--
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v3] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2017-07-25 13:47 ` Santosh Sivaraj
@ 2017-07-26 2:02 ` Benjamin Herrenschmidt
0 siblings, 0 replies; 15+ messages in thread
From: Benjamin Herrenschmidt @ 2017-07-26 2:02 UTC (permalink / raw)
To: Santosh Sivaraj
Cc: linuxppc-dev, Michael Ellerman, John Stultz, Thomas Gleixner,
Frederic Weisbecker
On Tue, 2017-07-25 at 19:17 +0530, Santosh Sivaraj wrote:
> I get the point. I looked at the generated assembly a bit closer, the update
> count is optimized out. Will send the alternative asm only patch.
We could do it in C the way x86 does it, using some helpers for
begin/end and have either an lwsync (but that would be slower than
the data dependency I think) or carefully crafting the helpers to
create one (make them return the pointer).
If you go down that path though, you need to make sure we do not
generate any TOC reference as the vDSO doesn't have a TOC.
Cheers,
Ben.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2017-07-21 9:09 ` [PATCH v2] " Santosh Sivaraj
2017-07-25 6:56 ` [PATCH v3] " Santosh Sivaraj
@ 2017-08-11 8:23 ` Santosh Sivaraj
1 sibling, 0 replies; 15+ messages in thread
From: Santosh Sivaraj @ 2017-08-11 8:23 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Ellerman, John Stultz, Thomas Gleixner,
Frederic Weisbecker, Benjamin Herrenschmidt
Current vDSO64 implementation does not have support for coarse clocks
(CLOCK_MONOTONIC_COARSE, CLOCK_REALTIME_COARSE), for which it falls back
to system call, increasing the response time, vDSO implementation reduces
the cycle time. Below is a benchmark of the difference in execution time
with and without vDSO support.
(Non-coarse clocks are also included just for completion)
Without vDSO support:
--------------------
clock-gettime-realtime: syscall: 172 nsec/call
clock-gettime-realtime: libc: 26 nsec/call
clock-gettime-realtime: vdso: 21 nsec/call
clock-gettime-monotonic: syscall: 170 nsec/call
clock-gettime-monotonic: libc: 30 nsec/call
clock-gettime-monotonic: vdso: 24 nsec/call
clock-gettime-realtime-coarse: syscall: 153 nsec/call
clock-gettime-realtime-coarse: libc: 15 nsec/call
clock-gettime-realtime-coarse: vdso: 9 nsec/call
clock-gettime-monotonic-coarse: syscall: 167 nsec/call
clock-gettime-monotonic-coarse: libc: 15 nsec/call
clock-gettime-monotonic-coarse: vdso: 11 nsec/call
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
arch/powerpc/kernel/asm-offsets.c | 2 +
arch/powerpc/kernel/vdso64/gettimeofday.S | 73 ++++++++++++++++++++++++++++---
2 files changed, 68 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6e95c2c..c6acaa5 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -396,6 +396,8 @@ int main(void)
/* Other bits used by the vdso */
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
+ DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 3820213..5229d1e 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -60,19 +60,26 @@ V_FUNCTION_END(__kernel_gettimeofday)
*/
V_FUNCTION_BEGIN(__kernel_clock_gettime)
.cfi_startproc
+ mr r11,r4 /* r11 saves tp */
+ mflr r12 /* r12 saves lr */
+ lis r7,NSEC_PER_SEC@h /* want nanoseconds */
+ ori r7,r7,NSEC_PER_SEC@l
+
/* Check for supported clock IDs */
cmpwi cr0,r3,CLOCK_REALTIME
cmpwi cr1,r3,CLOCK_MONOTONIC
cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
+ beq cr0,50f
- mflr r12 /* r12 saves lr */
+ cmpwi cr0,r3,CLOCK_REALTIME_COARSE
+ cmpwi cr1,r3,CLOCK_MONOTONIC_COARSE
+ cror cr0*4+eq,cr0*4+eq,cr1*4+eq
+ beq cr0,65f
+
+ b 99f /* Fallback to syscall */
.cfi_register lr,r12
- mr r11,r4 /* r11 saves tp */
- bl V_LOCAL_FUNC(__get_datapage) /* get data page */
- lis r7,NSEC_PER_SEC@h /* want nanoseconds */
- ori r7,r7,NSEC_PER_SEC@l
-50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
+50: bl V_LOCAL_FUNC(__get_datapage) /* get data page */
+ bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
bne cr1,80f /* if not monotonic, all done */
/*
@@ -110,6 +117,58 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
1: bge cr1,80f
addi r4,r4,-1
add r5,r5,r7
+ b 80f
+
+ /*
+ * For coarse clocks we get data directly from the vdso data page, so
+ * we don't need to call __do_get_tspec, but we still need to do the
+ * counter trick.
+ */
+65: bl V_LOCAL_FUNC(__get_datapage) /* get data page */
+70: ld r8,CFG_TB_UPDATE_COUNT(r3)
+ andi. r0,r8,1 /* pending update ? loop */
+ bne- 70b
+ xor r0,r8,r8 /* create dependency */
+ add r3,r3,r0
+
+ /*
+ * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
+ * too
+ */
+ ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
+ ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3)
+ bne cr1,78f
+
+ /* CLOCK_MONOTONIC_COARSE */
+ lwa r6,WTOM_CLOCK_SEC(r3)
+ lwa r9,WTOM_CLOCK_NSEC(r3)
+
+ /* check if counter has updated */
+78: or r0,r6,r9
+ xor r0,r0,r0
+ add r3,r3,r0
+ ld r0,CFG_TB_UPDATE_COUNT(r3)
+ cmpld cr0,r0,r8 /* check if updated */
+ bne- 70b
+
+ /* Counter has not updated, so continue calculating proper values for
+ * sec and nsec if monotonic coarse, or just return with the proper
+ * values for realtime.
+ */
+ bne cr1,80f
+
+ /* Add wall->monotonic offset and check for overflow or underflow.
+ */
+ add r4,r4,r6
+ add r5,r5,r9
+ cmpd cr0,r5,r7
+ cmpdi cr1,r5,0
+ blt 79f
+ subf r5,r7,r5
+ addi r4,r4,1
+79: bge cr1,80f
+ addi r4,r4,-1
+ add r5,r5,r7
80: std r4,TSPC64_TV_SEC(r11)
std r5,TSPC64_TV_NSEC(r11)
--
2.9.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2017-07-20 9:58 [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE Santosh Sivaraj
2017-07-20 13:18 ` Michael Ellerman
2017-07-21 9:09 ` [PATCH v2] " Santosh Sivaraj
@ 2017-07-22 19:29 ` kbuild test robot
2017-07-23 15:12 ` kbuild test robot
3 siblings, 0 replies; 15+ messages in thread
From: kbuild test robot @ 2017-07-22 19:29 UTC (permalink / raw)
To: Santosh Sivaraj; +Cc: kbuild-all, linuxppc-dev, Srikar Dronamraju
[-- Attachment #1: Type: text/plain, Size: 1176 bytes --]
Hi Santosh,
[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.13-rc1 next-20170721]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Santosh-Sivaraj/powerpc-vdso64-Add-support-for-CLOCK_-REALTIME-MONOTONIC-_COARSE/20170722-235025
base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-defconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=powerpc
All errors (new ones prefixed by >>):
arch/powerpc/kernel/vdso64/gettime.o: In function `.kernel_clock_gettime':
>> gettime.c:(.text+0xac): undefined reference to `.__get_datapage'
collect2: error: ld returned 1 exit status
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 23382 bytes --]
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE
2017-07-20 9:58 [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE Santosh Sivaraj
` (2 preceding siblings ...)
2017-07-22 19:29 ` kbuild test robot
@ 2017-07-23 15:12 ` kbuild test robot
3 siblings, 0 replies; 15+ messages in thread
From: kbuild test robot @ 2017-07-23 15:12 UTC (permalink / raw)
To: Santosh Sivaraj; +Cc: kbuild-all, linuxppc-dev, Srikar Dronamraju
[-- Attachment #1: Type: text/plain, Size: 1410 bytes --]
Hi Santosh,
[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.13-rc1 next-20170721]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Santosh-Sivaraj/powerpc-vdso64-Add-support-for-CLOCK_-REALTIME-MONOTONIC-_COARSE/20170722-235025
base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-ps3_defconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=powerpc
All errors (new ones prefixed by >>):
arch/powerpc/kernel/vdso64/gettime.o: In function `kernel_clock_gettime':
>> arch/powerpc/kernel/vdso64/gettime.c:134: undefined reference to `.__get_datapage'
collect2: error: ld returned 1 exit status
vim +134 arch/powerpc/kernel/vdso64/gettime.c
130
131 notrace int kernel_clock_gettime(clockid_t clk_id, struct timespec *tp)
132 {
133 int ret;
> 134 struct vdso_data *vdata = __get_datapage();
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 14817 bytes --]
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2017-08-11 8:23 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-07-20 9:58 [PATCH] powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE Santosh Sivaraj
2017-07-20 13:18 ` Michael Ellerman
2017-07-20 21:17 ` Benjamin Herrenschmidt
2017-07-20 22:03 ` Segher Boessenkool
2017-07-21 3:05 ` Anton Blanchard
2017-07-21 4:40 ` Santosh Sivaraj
2017-07-21 6:05 ` Benjamin Herrenschmidt
2017-07-21 9:09 ` [PATCH v2] " Santosh Sivaraj
2017-07-25 6:56 ` [PATCH v3] " Santosh Sivaraj
2017-07-25 10:07 ` Benjamin Herrenschmidt
2017-07-25 13:47 ` Santosh Sivaraj
2017-07-26 2:02 ` Benjamin Herrenschmidt
2017-08-11 8:23 ` [PATCH] " Santosh Sivaraj
2017-07-22 19:29 ` kbuild test robot
2017-07-23 15:12 ` kbuild test robot
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).