From mboxrd@z Thu Jan 1 00:00:00 1970 From: Vineet Gupta Subject: Re: [RFC PATCH REBASED 2/3] x86: Move fpu_counter into ARCH specific thread_struct Date: Mon, 23 Sep 2013 12:28:31 +0530 Message-ID: <523FE697.7000709@synopsys.com> References: <1379398639-27403-1-git-send-email-vgupta@synopsys.com> <1379398639-27403-3-git-send-email-vgupta@synopsys.com> <52383020.1090801@synopsys.com> Mime-Version: 1.0 Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: 7bit Return-path: Received: from us01smtp3.synopsys.com ([198.182.44.81]:61019 "EHLO hermes.synopsys.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751944Ab3IWG7Q (ORCPT ); Mon, 23 Sep 2013 02:59:16 -0400 In-Reply-To: <52383020.1090801@synopsys.com> Sender: linux-arch-owner@vger.kernel.org List-ID: To: "H. Peter Anvin" , Ingo Molnar , Peter Zijlstra , Andrew Morton , Thomas Gleixner Cc: linux-arch@vger.kernel.org, x86@kernel.org, Borislav Petkov , Frederic Weisbecker On 09/17/2013 04:04 PM, Vineet Gupta wrote: > [+cc: x86 folks] as the mailer skipped the CC list. > > On 09/17/2013 11:48 AM, Vineet Gupta wrote: >> Only a couple of arches (sh/x86) use fpu_counter in task_struct so it >> can be moved out into ARCH specific thread_struct, reducing the size of >> task_struct for other arches. >> >> Compile tested i386_defconfig + gcc 4.7.3 >> >> Signed-off-by: Vineet Gupta >> Cc: Thomas Gleixner >> Cc: Ingo Molnar >> Cc: Peter Zijlstra >> Cc: "H. Peter Anvin" >> Cc: x86@kernel.org >> Cc: Suresh Siddha >> Cc: Borislav Petkov >> Cc: Vincent Palatin >> Cc: Len Brown >> Cc: Al Viro >> Cc: Paul Gortmaker >> Cc: Pekka Riikonen >> Cc: Andrew Morton >> Cc: Dave Jones >> Cc: Frederic Weisbecker >> Cc: "Paul E. McKenney" >> Cc: linux-kernel@vger.kernel.org >> --- >> arch/x86/include/asm/fpu-internal.h | 10 +++++----- >> arch/x86/include/asm/processor.h | 9 +++++++++ >> arch/x86/kernel/i387.c | 2 +- >> arch/x86/kernel/process_32.c | 4 ++-- >> arch/x86/kernel/process_64.c | 2 +- >> arch/x86/kernel/traps.c | 2 +- >> 6 files changed, 19 insertions(+), 10 deletions(-) >> >> diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h >> index 4d0bda7..c49a613 100644 >> --- a/arch/x86/include/asm/fpu-internal.h >> +++ b/arch/x86/include/asm/fpu-internal.h >> @@ -365,7 +365,7 @@ static inline void drop_fpu(struct task_struct *tsk) >> * Forget coprocessor state.. >> */ >> preempt_disable(); >> - tsk->fpu_counter = 0; >> + tsk->thread.fpu_counter = 0; >> __drop_fpu(tsk); >> clear_used_math(); >> preempt_enable(); >> @@ -424,7 +424,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta >> * or if the past 5 consecutive context-switches used math. >> */ >> fpu.preload = tsk_used_math(new) && (use_eager_fpu() || >> - new->fpu_counter > 5); >> + new->thread.fpu_counter > 5); >> if (__thread_has_fpu(old)) { >> if (!__save_init_fpu(old)) >> cpu = ~0; >> @@ -433,16 +433,16 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta >> >> /* Don't change CR0.TS if we just switch! */ >> if (fpu.preload) { >> - new->fpu_counter++; >> + new->thread.fpu_counter++; >> __thread_set_has_fpu(new); >> prefetch(new->thread.fpu.state); >> } else if (!use_eager_fpu()) >> stts(); >> } else { >> - old->fpu_counter = 0; >> + old->thread.fpu_counter = 0; >> old->thread.fpu.last_cpu = ~0; >> if (fpu.preload) { >> - new->fpu_counter++; >> + new->thread.fpu_counter++; >> if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) >> fpu.preload = 0; >> else >> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h >> index 987c75e..7b034a4 100644 >> --- a/arch/x86/include/asm/processor.h >> +++ b/arch/x86/include/asm/processor.h >> @@ -488,6 +488,15 @@ struct thread_struct { >> unsigned long iopl; >> /* Max allowed port in the bitmap, in bytes: */ >> unsigned io_bitmap_max; >> + /* >> + * fpu_counter contains the number of consecutive context switches >> + * that the FPU is used. If this is over a threshold, the lazy fpu >> + * saving becomes unlazy to save the trap. This is an unsigned char >> + * so that after 256 times the counter wraps and the behavior turns >> + * lazy again; this to deal with bursty apps that only use FPU for >> + * a short time >> + */ >> + unsigned char fpu_counter; >> }; >> >> /* >> diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c >> index 5d576ab..e8368c6 100644 >> --- a/arch/x86/kernel/i387.c >> +++ b/arch/x86/kernel/i387.c >> @@ -100,7 +100,7 @@ void unlazy_fpu(struct task_struct *tsk) >> __save_init_fpu(tsk); >> __thread_fpu_end(tsk); >> } else >> - tsk->fpu_counter = 0; >> + tsk->thread.fpu_counter = 0; >> preempt_enable(); >> } >> EXPORT_SYMBOL(unlazy_fpu); >> diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c >> index 884f98f..6af43b0 100644 >> --- a/arch/x86/kernel/process_32.c >> +++ b/arch/x86/kernel/process_32.c >> @@ -153,7 +153,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, >> childregs->orig_ax = -1; >> childregs->cs = __KERNEL_CS | get_kernel_rpl(); >> childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; >> - p->fpu_counter = 0; >> + p->thread.fpu_counter = 0; >> p->thread.io_bitmap_ptr = NULL; >> memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); >> return 0; >> @@ -166,7 +166,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, >> p->thread.ip = (unsigned long) ret_from_fork; >> task_user_gs(p) = get_user_gs(current_pt_regs()); >> >> - p->fpu_counter = 0; >> + p->thread.fpu_counter = 0; >> p->thread.io_bitmap_ptr = NULL; >> tsk = current; >> err = -ENOMEM; >> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c >> index bb1dc51..bbab295 100644 >> --- a/arch/x86/kernel/process_64.c >> +++ b/arch/x86/kernel/process_64.c >> @@ -163,7 +163,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, >> p->thread.sp = (unsigned long) childregs; >> p->thread.usersp = me->thread.usersp; >> set_tsk_thread_flag(p, TIF_FORK); >> - p->fpu_counter = 0; >> + p->thread.fpu_counter = 0; >> p->thread.io_bitmap_ptr = NULL; >> >> savesegment(gs, p->thread.gsindex); >> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c >> index 8c8093b..64b980f 100644 >> --- a/arch/x86/kernel/traps.c >> +++ b/arch/x86/kernel/traps.c >> @@ -653,7 +653,7 @@ void math_state_restore(void) >> return; >> } >> >> - tsk->fpu_counter++; >> + tsk->thread.fpu_counter++; >> } >> EXPORT_SYMBOL_GPL(math_state_restore); >> >> > Comments please ! Can I get ACK/NAK on this while waiting for SH folks to respond on a similar change. It seems a straightforward mechanical change ! -Vineet