linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Santosh Sivaraj <santosh@fossix.org>
To: Christophe Leroy <christophe.leroy@c-s.fr>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Subject: Re: [PATCH] powerpc/vdso64: inline __get_datapage()
Date: Wed, 21 Aug 2019 16:50:58 +0530	[thread overview]
Message-ID: <874l2apxwl.fsf@santosiv.in.ibm.com> (raw)
In-Reply-To: <3724e1ba-d0f8-7247-73c2-6d83a3dbd040@c-s.fr>

Christophe Leroy <christophe.leroy@c-s.fr> writes:

> Le 21/08/2019 à 11:29, Santosh Sivaraj a écrit :
>> __get_datapage() is only a few instructions to retrieve the
>> address of the page where the kernel stores data to the VDSO.
>> 
>> By inlining this function into its users, a bl/blr pair and
>> a mflr/mtlr pair is avoided, plus a few reg moves.
>> 
>> clock-gettime-monotonic: syscall: 514 nsec/call  396 nsec/call
>> clock-gettime-monotonic:    libc: 25 nsec/call   24 nsec/call
>> clock-gettime-monotonic:    vdso: 20 nsec/call   20 nsec/call
>> clock-getres-monotonic: syscall: 347 nsec/call   372 nsec/call
>> clock-getres-monotonic:    libc: 19 nsec/call    19 nsec/call
>> clock-getres-monotonic:    vdso: 10 nsec/call    10 nsec/call
>> clock-gettime-monotonic-coarse: syscall: 511 nsec/call   396 nsec/call
>> clock-gettime-monotonic-coarse:    libc: 23 nsec/call    21 nsec/call
>> clock-gettime-monotonic-coarse:    vdso: 15 nsec/call    13 nsec/call
>> clock-gettime-realtime: syscall: 526 nsec/call   405 nsec/call
>> clock-gettime-realtime:    libc: 24 nsec/call    23 nsec/call
>> clock-gettime-realtime:    vdso: 18 nsec/call    18 nsec/call
>> clock-getres-realtime: syscall: 342 nsec/call    372 nsec/call
>> clock-getres-realtime:    libc: 19 nsec/call     19 nsec/call
>> clock-getres-realtime:    vdso: 10 nsec/call     10 nsec/call
>> clock-gettime-realtime-coarse: syscall: 515 nsec/call    373 nsec/call
>> clock-gettime-realtime-coarse:    libc: 23 nsec/call     22 nsec/call
>> clock-gettime-realtime-coarse:    vdso: 14 nsec/call     13 nsec/call
>
> I think you should only put the measurements on vdso calls, and only the 
> ones that are impacted by the change. For exemple, getres function 
> doesn't use __get_datapage so showing it here is pointless.
>
> gettimeofday should be shown there as it uses __get_datapage()
>
>
>> 
>> Based on the patch by Christophe Leroy <christophe.leroy@c-s.fr> for vdso32.
>> 
>> Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
>> ---
>> 
>> except for a couple of calls (1 or 2 nsec reduction), there are no
>> improvements in the call times. Or is 10 nsec the minimum granularity??
>
> Maybe the ones that show no improvements are the ones that don't use 
> __get_datapage() at all ...

Yes makes sense.

>
>> 
>> So I don't know if its even worth updating vdso64 except to keep vdso32 and
>> vdso64 equal.
>
> 2ns on a 15ns call is 13% so it is worth it I think.

true. Since datapage.h is the same for both 32 and 64, may be we should put
it in include/asm.

Thanks,
Santosh
>
> Christophe
>
>
>> 
>> 
>>   arch/powerpc/kernel/vdso64/cacheflush.S   | 10 ++++----
>>   arch/powerpc/kernel/vdso64/datapage.S     | 29 ++++-------------------
>>   arch/powerpc/kernel/vdso64/datapage.h     | 10 ++++++++
>>   arch/powerpc/kernel/vdso64/gettimeofday.S |  8 ++++---
>>   4 files changed, 24 insertions(+), 33 deletions(-)
>>   create mode 100644 arch/powerpc/kernel/vdso64/datapage.h
>> 
>> diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
>> index 3f92561a64c4..30e8b0d29bea 100644
>> --- a/arch/powerpc/kernel/vdso64/cacheflush.S
>> +++ b/arch/powerpc/kernel/vdso64/cacheflush.S
>> @@ -10,6 +10,8 @@
>>   #include <asm/vdso.h>
>>   #include <asm/asm-offsets.h>
>>   
>> +#include "datapage.h"
>> +
>>   	.text
>>   
>>   /*
>> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>>     .cfi_startproc
>>   	mflr	r12
>>     .cfi_register lr,r12
>> -	mr	r11,r3
>> -	bl	V_LOCAL_FUNC(__get_datapage)
>> +	get_datapage	r11, r0
>>   	mtlr	r12
>> -	mr	r10,r3
>>   
>>   	lwz	r7,CFG_DCACHE_BLOCKSZ(r10)
>>   	addi	r5,r7,-1
>> -	andc	r6,r11,r5		/* round low to line bdy */
>> +	andc	r6,r3,r5		/* round low to line bdy */
>>   	subf	r8,r6,r4		/* compute length */
>>   	add	r8,r8,r5		/* ensure we get enough */
>>   	lwz	r9,CFG_DCACHE_LOGBLOCKSZ(r10)
>> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>>   
>>   	lwz	r7,CFG_ICACHE_BLOCKSZ(r10)
>>   	addi	r5,r7,-1
>> -	andc	r6,r11,r5		/* round low to line bdy */
>> +	andc	r6,r3,r5		/* round low to line bdy */
>>   	subf	r8,r6,r4		/* compute length */
>>   	add	r8,r8,r5
>>   	lwz	r9,CFG_ICACHE_LOGBLOCKSZ(r10)
>> diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
>> index dc84f5ae3802..8712f57c931c 100644
>> --- a/arch/powerpc/kernel/vdso64/datapage.S
>> +++ b/arch/powerpc/kernel/vdso64/datapage.S
>> @@ -11,34 +11,13 @@
>>   #include <asm/unistd.h>
>>   #include <asm/vdso.h>
>>   
>> +#include "datapage.h"
>> +
>>   	.text
>>   .global	__kernel_datapage_offset;
>>   __kernel_datapage_offset:
>>   	.long	0
>>   
>> -V_FUNCTION_BEGIN(__get_datapage)
>> -  .cfi_startproc
>> -	/* We don't want that exposed or overridable as we want other objects
>> -	 * to be able to bl directly to here
>> -	 */
>> -	.protected __get_datapage
>> -	.hidden __get_datapage
>> -
>> -	mflr	r0
>> -  .cfi_register lr,r0
>> -
>> -	bcl	20,31,data_page_branch
>> -data_page_branch:
>> -	mflr	r3
>> -	mtlr	r0
>> -	addi	r3, r3, __kernel_datapage_offset-data_page_branch
>> -	lwz	r0,0(r3)
>> -  .cfi_restore lr
>> -	add	r3,r0,r3
>> -	blr
>> -  .cfi_endproc
>> -V_FUNCTION_END(__get_datapage)
>> -
>>   /*
>>    * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
>>    *
>> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
>>   	mflr	r12
>>     .cfi_register lr,r12
>>   	mr	r4,r3
>> -	bl	V_LOCAL_FUNC(__get_datapage)
>> +	get_datapage	r3, r0
>>   	mtlr	r12
>>   	addi	r3,r3,CFG_SYSCALL_MAP64
>>   	cmpldi	cr0,r4,0
>> @@ -75,7 +54,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
>>     .cfi_startproc
>>   	mflr	r12
>>     .cfi_register lr,r12
>> -	bl	V_LOCAL_FUNC(__get_datapage)
>> +	get_datapage	r3, r0
>>   	ld	r3,CFG_TB_TICKS_PER_SEC(r3)
>>   	mtlr	r12
>>   	crclr	cr0*4+so
>> diff --git a/arch/powerpc/kernel/vdso64/datapage.h b/arch/powerpc/kernel/vdso64/datapage.h
>> new file mode 100644
>> index 000000000000..f2f0da0f65f3
>> --- /dev/null
>> +++ b/arch/powerpc/kernel/vdso64/datapage.h
>> @@ -0,0 +1,10 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +
>> +.macro get_datapage ptr, tmp
>> +	bcl	20,31,888f
>> +888:
>> +	mflr	\ptr
>> +	addi	\ptr, \ptr, __kernel_datapage_offset - 888b
>> +	lwz	\tmp, 0(\ptr)
>> +	add	\ptr, \tmp, \ptr
>> +.endm
>> diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
>> index 07bfe33fe874..7bcc879392cc 100644
>> --- a/arch/powerpc/kernel/vdso64/gettimeofday.S
>> +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
>> @@ -12,6 +12,8 @@
>>   #include <asm/asm-offsets.h>
>>   #include <asm/unistd.h>
>>   
>> +#include "datapage.h"
>> +
>>   	.text
>>   /*
>>    * Exact prototype of gettimeofday
>> @@ -26,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>>   
>>   	mr	r11,r3			/* r11 holds tv */
>>   	mr	r10,r4			/* r10 holds tz */
>> -	bl	V_LOCAL_FUNC(__get_datapage)	/* get data page */
>> +	get_datapage	r3, r0
>>   	cmpldi	r11,0			/* check if tv is NULL */
>>   	beq	2f
>>   	lis	r7,1000000@ha		/* load up USEC_PER_SEC */
>> @@ -71,7 +73,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
>>   	mflr	r12			/* r12 saves lr */
>>     .cfi_register lr,r12
>>   	mr	r11,r4			/* r11 saves tp */
>> -	bl	V_LOCAL_FUNC(__get_datapage)	/* get data page */
>> +	get_datapage	r3, r0		/* get data page */
>>   	lis	r7,NSEC_PER_SEC@h	/* want nanoseconds */
>>   	ori	r7,r7,NSEC_PER_SEC@l
>>   	beq	cr5,70f
>> @@ -218,7 +220,7 @@ V_FUNCTION_BEGIN(__kernel_time)
>>     .cfi_register lr,r12
>>   
>>   	mr	r11,r3			/* r11 holds t */
>> -	bl	V_LOCAL_FUNC(__get_datapage)
>> +	get_datapage	r3, r0
>>   
>>   	ld	r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
>>   
>> 

  reply	other threads:[~2019-08-21 11:23 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-16 14:48 [PATCH] powerpc/vdso32: inline __get_datapage() Christophe Leroy
2019-08-21  4:37 ` Benjamin Herrenschmidt
2019-08-21  9:29   ` [PATCH] powerpc/vdso64: " Santosh Sivaraj
2019-08-21  9:46     ` Christophe Leroy
2019-08-21 11:20       ` Santosh Sivaraj [this message]
2019-08-21 15:58       ` Nathan Lynch
2019-08-21 11:44     ` Segher Boessenkool
2019-08-21 11:50       ` Christophe Leroy
2019-08-21 12:15         ` Segher Boessenkool
2019-08-21 16:23           ` Christophe Leroy
2019-08-22 16:18             ` Santosh Sivaraj

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=874l2apxwl.fsf@santosiv.in.ibm.com \
    --to=santosh@fossix.org \
    --cc=benh@kernel.crashing.org \
    --cc=christophe.leroy@c-s.fr \
    --cc=linuxppc-dev@lists.ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).