From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751209AbaHJGLK (ORCPT ); Sun, 10 Aug 2014 02:11:10 -0400 Received: from mail-we0-f181.google.com ([74.125.82.181]:57444 "EHLO mail-we0-f181.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751022AbaHJGLJ (ORCPT ); Sun, 10 Aug 2014 02:11:09 -0400 Date: Sun, 10 Aug 2014 08:11:03 +0200 From: Ingo Molnar To: Borislav Petkov Cc: x86-ml , lkml , Peter Zijlstra , Steven Rostedt , Thomas Gleixner , Linus Torvalds , Jason Baron , Andrew Morton Subject: Re: [RFC PATCH] Flipped jump labels Message-ID: <20140810061103.GA13968@gmail.com> References: <20140809105742.GA5910@pd.tnic> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20140809105742.GA5910@pd.tnic> User-Agent: Mutt/1.5.23 (2014-03-12) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org * Borislav Petkov wrote: > Hi dudes, > > with the current impl. of jump labels, people can't really do the > following: > > --- > JMP unlikely_code > likely_code > > unlikely_code: > unlikely code > --- > > and after some initialization queries overwrite the JMP with a NOP so > that the likely code gets executed at 0 cost. > > The issue is that jump labels unconditionally add NOPs by default > (see arch_static_branch). For example, native_sched_clock() gets the > following code layout here: > > -- > NOP > unlikely code (which computes time in ns from jiffies) > likely code (which does RDTSC) > -- > > Yes, unlikely code comes first. > > when the jump labels get initialized and all checks done, at runtime we > have this: > > 0xffffffff8100ce40 : push %rbp > 0xffffffff8100ce41 : mov %rsp,%rbp > 0xffffffff8100ce44 : and $0xfffffffffffffff0,%rsp > > unconditional JMP!!! > > 0xffffffff8100ce48 : jmpq 0xffffffff8100ce70 > > unlikely code using jiffies > > 0xffffffff8100ce4d : mov 0x9a71ac(%rip),%r8 # 0xffffffff819b4000 > 0xffffffff8100ce54 : movabs $0xffc2f745d964b800,%rax > 0xffffffff8100ce5e : leaveq > 0xffffffff8100ce5f : imul $0x3d0900,%r8,%rdx > 0xffffffff8100ce66 : add %rdx,%rax > 0xffffffff8100ce69 : retq > 0xffffffff8100ce6a : nopw 0x0(%rax,%rax,1) > > likely code using RDTSC, see JMP target address. > > 0xffffffff8100ce70 : rdtsc > > > so what we end up getting is not really helping because we always get to > pay for that JMP on all modern systems which sport RDTSC even though we > shouldn't really. > > And remember this is not something cheap: sched_clock uses the TSC > even if it is unstable so we're always jumping like insane and > unconditionally. > > So, long story short, we want this, instead: > > 0xffffffff8100cf10 : push %rbp > 0xffffffff8100cf11 : mov %rsp,%rbp > 0xffffffff8100cf14 : and $0xfffffffffffffff0,%rsp > > unconditional JMP is nopped out > > 0xffffffff8100cf18 : data32 data32 data32 xchg %ax,%ax > > likely code which comes first in the function so all the advantages from > it to front end, branch pred, yadda yadda, get to be enjoyed :) > > 0xffffffff8100cf1d : rdtsc > 0xffffffff8100cf1f : mov %eax,%esi > 0xffffffff8100cf21 : mov %rdx,%rax > 0xffffffff8100cf24 : shl $0x20,%rax > 0xffffffff8100cf28 : or %rsi,%rax > 0xffffffff8100cf2b : mov %rax,%rcx > 0xffffffff8100cf2e : incl %gs:0xb8e0 > 0xffffffff8100cf36 : mov %gs:0x1d0c30,%rsi > 0xffffffff8100cf3f : mov %gs:0x1d0c38,%rax > 0xffffffff8100cf48 : cmp %rax,%rsi > 0xffffffff8100cf4b : jne 0xffffffff8100cf90 > 0xffffffff8100cf4d : mov (%rsi),%eax > 0xffffffff8100cf4f : mul %rcx > 0xffffffff8100cf52 : shrd $0xa,%rdx,%rax > 0xffffffff8100cf57 : add 0x8(%rsi),%rax > 0xffffffff8100cf5b : decl %gs:0xb8e0 > 0xffffffff8100cf63 : je 0xffffffff8100cf88 > 0xffffffff8100cf65 : leaveq > 0xffffffff8100cf66 : retq > > Done, we return here. > > 0xffffffff8100cf67 : nop > > unlikely code which does the jiffies math. > > 0xffffffff8100cf68 : mov 0x9a7091(%rip),%rax # 0xffffffff819b4000 > 0xffffffff8100cf6f : leaveq > 0xffffffff8100cf70 : imul $0x3d0900,%rax,%rdx > ... > > > So basically what I'm proposing is a jump label type which is > initialized by default to jump to the unlikely code and once > initialization has happened, JMP gets overwritten. > > The things to pay attention here is > > * this label should be used in places where it is very likely for the > jump to get overwritten. Basically the opposite to tracepoints for which > the jump labels were created initially, to be mostly off. > > * It must be used in places where JMP gets overwritten only after some > initialization done first. > > Anyway, below is a concept patch for myself to try the idea first - it > seems to work here. Constructive ideas and suggestions are welcome, as > always. > > --- > diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h > index 6a2cefb4395a..2d963c6489a8 100644 > --- a/arch/x86/include/asm/jump_label.h > +++ b/arch/x86/include/asm/jump_label.h > @@ -30,6 +30,22 @@ l_yes: > return true; > } > > +static __always_inline bool arch_static_branch_active(struct static_key *key) > +{ > + asm_volatile_goto("1:" > + "jmp %l[l_yes]\n\t" > + ".byte " __stringify(GENERIC_NOP3) "\n\t" > + ".pushsection __jump_table, \"aw\" \n\t" > + _ASM_ALIGN "\n\t" > + _ASM_PTR "1b, %l[l_yes], %c0 \n\t" > + ".popsection \n\t" > + : : "i" (key) : : l_yes); > + return false; > +l_yes: > + return true; > +} > + > + > #endif /* __KERNEL__ */ > > #ifdef CONFIG_X86_64 > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > index 4ca327e900ae..81bc2c4a7eab 100644 > --- a/arch/x86/kernel/tsc.c > +++ b/arch/x86/kernel/tsc.c > @@ -33,7 +33,7 @@ EXPORT_SYMBOL(tsc_khz); > */ > static int __read_mostly tsc_unstable; > > -static struct static_key __use_tsc = STATIC_KEY_INIT; > +static struct static_key __use_tsc = STATIC_KEY_INIT_FALSE; > > int tsc_clocksource_reliable; > > @@ -280,7 +280,7 @@ u64 native_sched_clock(void) > * very important for it to be as fast as the platform > * can achieve it. ) > */ > - if (!static_key_false(&__use_tsc)) { > + if (arch_static_branch_active(&__use_tsc)) { > /* No locking but a rare wrong value is not a big deal: */ > return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); > } Wouldn't using STATIC_KEY_INIT_TRUE and static_key_true() [instead of !static_key_false()] result in the same good code placement effects? Thanks, Ingo