All of lore.kernel.org
 help / color / mirror / Atom feed
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
To: Jiri Olsa <jolsa@redhat.com>
Cc: mingo@elte.hu, acme@redhat.com, fweisbec@gmail.com,
	hpa@zytor.com, ananth@in.ibm.com, davem@davemloft.net,
	linux-kernel@vger.kernel.org, tglx@linutronix.de,
	a.p.zijlstra@chello.nl, eric.dumazet@gmail.com,
	2nddept-manager@sdl.hitachi.co.jp,
	"2nddept-manager@sdl.hitachi.co.jp" 
	<2nddept-manager@sdl.hitachi.co.jp>
Subject: Re: [PATCH 1/2] x86: separating entry text section
Date: Tue, 22 Feb 2011 12:22:47 +0900	[thread overview]
Message-ID: <4D632C07.6090205@hitachi.com> (raw)
In-Reply-To: <1298298313-5980-2-git-send-email-jolsa@redhat.com>

(2011/02/21 23:25), Jiri Olsa wrote:
> Putting x86 entry code to the separate section: .entry.text.
> 
> Separating the entry text section seems to have performance
> benefits with regards to the instruction cache usage.
> 
> Running hackbench showed that the change compresses the icache
> footprint. The icache miss rate went down by about 8%:
> 
> before patch:
>      26282174  L1-icache-load-misses      ( +-   0.099% )  (scaled from 81.00%)
> 
> after patch:
>      24237651  L1-icache-load-misses      ( +-   0.117% )  (scaled from 80.96%)
> 
> 
> Whole perf output follows.
> 
> - results for current tip tree:
> 
>  Performance counter stats for './hackbench/hackbench 10' (500 runs):
> 
>     817646684  L1-icache-loads            ( +-   0.150% )  (scaled from 80.99%)
>      26282174  L1-icache-load-misses      ( +-   0.099% )  (scaled from 81.00%)
>        211864  L1-icache-prefetches       ( +-   0.616% )  (scaled from 80.99%)
> <not counted>  L1-icache-prefetch-misses
>     817646737  iTLB-loads                 ( +-   0.151% )  (scaled from 80.98%)
>         82368  iTLB-load-misses           ( +-   0.451% )  (scaled from 80.98%)
> 
>   0.206651959  seconds time elapsed   ( +-   0.152% )
> 
> 
> - results for current tip tree with the patch applied are:
> 
>  Performance counter stats for './hackbench/hackbench 10' (500 runs):
> 
>     960162049  L1-icache-loads            ( +-   0.114% )  (scaled from 80.95%)
>      24237651  L1-icache-load-misses      ( +-   0.117% )  (scaled from 80.96%)
>        179800  L1-icache-prefetches       ( +-   0.530% )  (scaled from 80.95%)
> <not counted>  L1-icache-prefetch-misses
>     960352725  iTLB-loads                 ( +-   0.114% )  (scaled from 80.93%)
>         84410  iTLB-load-misses           ( +-   0.491% )  (scaled from 80.92%)
> 
>   0.210509948  seconds time elapsed   ( +-   0.140% )
> 
> 
> wbr,
> jirka
> 
> 
> Signed-off-by: Jiri Olsa <jolsa@redhat.com>

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>

Thanks!

> ---
>  arch/x86/ia32/ia32entry.S         |    2 ++
>  arch/x86/kernel/entry_32.S        |    6 ++++--
>  arch/x86/kernel/entry_64.S        |    6 ++++--
>  arch/x86/kernel/vmlinux.lds.S     |    1 +
>  include/asm-generic/sections.h    |    1 +
>  include/asm-generic/vmlinux.lds.h |    6 ++++++
>  6 files changed, 18 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
> index 0ed7896..50f1630 100644
> --- a/arch/x86/ia32/ia32entry.S
> +++ b/arch/x86/ia32/ia32entry.S
> @@ -25,6 +25,8 @@
>  #define sysretl_audit ia32_ret_from_sys_call
>  #endif
>  
> +	.section .entry.text, "ax"
> +
>  #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
>  
>  	.macro IA32_ARG_FIXUP noebp=0
> diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
> index c8b4efa..f5accf8 100644
> --- a/arch/x86/kernel/entry_32.S
> +++ b/arch/x86/kernel/entry_32.S
> @@ -65,6 +65,8 @@
>  #define sysexit_audit	syscall_exit_work
>  #endif
>  
> +	.section .entry.text, "ax"
> +
>  /*
>   * We use macros for low-level operations which need to be overridden
>   * for paravirtualization.  The following will never clobber any registers:
> @@ -788,7 +790,7 @@ ENDPROC(ptregs_clone)
>   */
>  .section .init.rodata,"a"
>  ENTRY(interrupt)
> -.text
> +.section .entry.text, "ax"
>  	.p2align 5
>  	.p2align CONFIG_X86_L1_CACHE_SHIFT
>  ENTRY(irq_entries_start)
> @@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR
>        .endif
>        .previous
>  	.long 1b
> -      .text
> +      .section .entry.text, "ax"
>  vector=vector+1
>      .endif
>    .endr
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index 891268c..39f8d21 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -61,6 +61,8 @@
>  #define __AUDIT_ARCH_LE	   0x40000000
>  
>  	.code64
> +	.section .entry.text, "ax"
> +
>  #ifdef CONFIG_FUNCTION_TRACER
>  #ifdef CONFIG_DYNAMIC_FTRACE
>  ENTRY(mcount)
> @@ -744,7 +746,7 @@ END(stub_rt_sigreturn)
>   */
>  	.section .init.rodata,"a"
>  ENTRY(interrupt)
> -	.text
> +	.section .entry.text
>  	.p2align 5
>  	.p2align CONFIG_X86_L1_CACHE_SHIFT
>  ENTRY(irq_entries_start)
> @@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR
>        .endif
>        .previous
>  	.quad 1b
> -      .text
> +      .section .entry.text
>  vector=vector+1
>      .endif
>    .endr
> diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
> index e70cc3d..459dce2 100644
> --- a/arch/x86/kernel/vmlinux.lds.S
> +++ b/arch/x86/kernel/vmlinux.lds.S
> @@ -105,6 +105,7 @@ SECTIONS
>  		SCHED_TEXT
>  		LOCK_TEXT
>  		KPROBES_TEXT
> +		ENTRY_TEXT
>  		IRQENTRY_TEXT
>  		*(.fixup)
>  		*(.gnu.warning)
> diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
> index b3bfabc..c1a1216 100644
> --- a/include/asm-generic/sections.h
> +++ b/include/asm-generic/sections.h
> @@ -11,6 +11,7 @@ extern char _sinittext[], _einittext[];
>  extern char _end[];
>  extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
>  extern char __kprobes_text_start[], __kprobes_text_end[];
> +extern char __entry_text_start[], __entry_text_end[];
>  extern char __initdata_begin[], __initdata_end[];
>  extern char __start_rodata[], __end_rodata[];
>  
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index fe77e33..906c3ce 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -424,6 +424,12 @@
>  		*(.kprobes.text)					\
>  		VMLINUX_SYMBOL(__kprobes_text_end) = .;
>  
> +#define ENTRY_TEXT							\
> +		ALIGN_FUNCTION();					\
> +		VMLINUX_SYMBOL(__entry_text_start) = .;			\
> +		*(.entry.text)						\
> +		VMLINUX_SYMBOL(__entry_text_end) = .;
> +
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
>  #define IRQENTRY_TEXT							\
>  		ALIGN_FUNCTION();					\


-- 
Masami HIRAMATSU
2nd Dept. Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com

  reply	other threads:[~2011-02-22  3:22 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-02-14 15:12 [RFC,PATCH] kprobes - optimized kprobes might crash before setting kernel stack Jiri Olsa
2011-02-15  9:41 ` Masami Hiramatsu
2011-02-15 12:30   ` Jiri Olsa
2011-02-15 15:55     ` Masami Hiramatsu
2011-02-15 16:54       ` Jiri Olsa
2011-02-15 17:05       ` [PATCH] kprobes - do not allow optimized kprobes in entry code Jiri Olsa
2011-02-16  3:36         ` Masami Hiramatsu
2011-02-17 15:11           ` Ingo Molnar
2011-02-17 15:20             ` Jiri Olsa
2011-02-18 16:26             ` Jiri Olsa
2011-02-19 14:14               ` Masami Hiramatsu
2011-02-20 12:59                 ` Ingo Molnar
2011-02-21 11:54                   ` Jiri Olsa
2011-02-21 14:25                   ` [PATCH 0/2] x86: separating entry text section + kprobes fix Jiri Olsa
2011-02-21 14:25                     ` [PATCH 1/2] x86: separating entry text section Jiri Olsa
2011-02-22  3:22                       ` Masami Hiramatsu [this message]
2011-02-22  8:09                       ` Ingo Molnar
2011-02-22 12:52                         ` Jiri Olsa
2011-03-07 10:44                           ` Jiri Olsa
2011-03-07 15:29                             ` Ingo Molnar
2011-03-07 18:10                               ` Jiri Olsa
2011-03-08 16:15                                 ` Ingo Molnar
2011-03-08 20:15                                 ` [tip:perf/core] x86: Separate out " tip-bot for Jiri Olsa
2011-02-21 14:25                     ` [PATCH 2/2] kprobes: disabling optimized kprobes for " Jiri Olsa
2011-02-22  3:22                       ` Masami Hiramatsu
2011-03-08 20:16                       ` [tip:perf/core] kprobes: Disabling " tip-bot for Jiri Olsa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4D632C07.6090205@hitachi.com \
    --to=masami.hiramatsu.pt@hitachi.com \
    --cc=2nddept-manager@sdl.hitachi.co.jp \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@redhat.com \
    --cc=ananth@in.ibm.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=fweisbec@gmail.com \
    --cc=hpa@zytor.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.