All of lore.kernel.org
 help / color / mirror / Atom feed
From: Benjamin LaHaise <bcrl@kvack.org>
To: Andi Kleen <ak@suse.de>
Cc: linux-kernel@vger.kernel.org
Subject: rfc/rft: use r10 as current on x86-64
Date: Tue, 22 Nov 2005 11:52:04 -0500	[thread overview]
Message-ID: <20051122165204.GG1127@kvack.org> (raw)

Hello Andi et al,

The patch below converts x86-64 to use r10 as the current pointer instead 
of gs:pcurrent.  This results in a ~34KB savings in the code segment of 
the kernel.  I've tested this with running a few regular applications, 
plus a few 32 bit binaries.  If this patch is interesting, it probably 
makes sense to merge the thread info structure into the task_struct so 
that the assembly bits for syscall entry can be cleaned up.  Comments?

		-ben
-- 
"Time is what keeps everything from happening all at once." -- John Wheeler
Don't Email: <dont@kvack.org>.


diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index a9cd42e..e547830 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -31,6 +31,7 @@ cflags-$(CONFIG_MK8) += $(call cc-option
 cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
 CFLAGS += $(cflags-y)
 
+CFLAGS += -ffixed-r10
 CFLAGS += -mno-red-zone
 CFLAGS += -mcmodel=kernel
 CFLAGS += -pipe
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index e0eb0c7..cdb5918 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -99,6 +99,7 @@ sysenter_do_call:	
 	cmpl	$(IA32_NR_syscalls),%eax
 	jae	ia32_badsys
 	IA32_ARG_FIXUP 1
+	movq    %gs:pda_pcurrent,%r10
 	call	*ia32_sys_call_table(,%rax,8)
 	movq	%rax,RAX-ARGOFFSET(%rsp)
 	GET_THREAD_INFO(%r10)
@@ -127,6 +128,7 @@ sysenter_tracesys:
 	CLEAR_RREGS
 	movq	$-ENOSYS,RAX(%rsp)	/* really needed? */
 	movq	%rsp,%rdi        /* &pt_regs -> arg1 */
+	movq    %gs:pda_pcurrent,%r10
 	call	syscall_trace_enter
 	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
 	RESTORE_REST
@@ -198,6 +200,7 @@ cstar_do_call:	
 	cmpl $IA32_NR_syscalls,%eax
 	jae  ia32_badsys
 	IA32_ARG_FIXUP 1
+	movq    %gs:pda_pcurrent,%r10
 	call *ia32_sys_call_table(,%rax,8)
 	movq %rax,RAX-ARGOFFSET(%rsp)
 	GET_THREAD_INFO(%r10)
@@ -220,6 +223,7 @@ cstar_tracesys:	
 	CLEAR_RREGS
 	movq $-ENOSYS,RAX(%rsp)	/* really needed? */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
+	movq    %gs:pda_pcurrent,%r10
 	call syscall_trace_enter
 	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
 	RESTORE_REST
@@ -282,6 +286,7 @@ ia32_do_syscall:	
 	cmpl $(IA32_NR_syscalls),%eax
 	jae  ia32_badsys
 	IA32_ARG_FIXUP
+	movq    %gs:pda_pcurrent,%r10
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
 	movq %rax,RAX-ARGOFFSET(%rsp)
@@ -291,6 +296,7 @@ ia32_tracesys:			 
 	SAVE_REST
 	movq $-ENOSYS,RAX(%rsp)	/* really needed? */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
+	movq    %gs:pda_pcurrent,%r10
 	call syscall_trace_enter
 	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
 	RESTORE_REST
@@ -336,6 +342,7 @@ ENTRY(ia32_ptregs_common)
 	CFI_ADJUST_CFA_OFFSET -8
 	CFI_REGISTER rip, r11
 	SAVE_REST
+	movq    %gs:pda_pcurrent,%r10
 	call *%rax
 	RESTORE_REST
 	jmp  ia32_sysret	/* misbalances the return cache */
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 9ff4204..53a829c 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -201,6 +201,7 @@ ENTRY(system_call)
 	cmpq $__NR_syscall_max,%rax
 	ja badsys
 	movq %r10,%rcx
+	movq	%gs:pda_pcurrent,%r10
 	call *sys_call_table(,%rax,8)  # XXX:	 rip relative
 	movq %rax,RAX-ARGOFFSET(%rsp)
 /*
@@ -235,6 +236,7 @@ sysret_careful:
 	sti
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET 8
+	movq	%gs:pda_pcurrent,%r10
 	call schedule
 	popq  %rdi
 	CFI_ADJUST_CFA_OFFSET -8
@@ -266,12 +268,14 @@ tracesys:			 
 	movq $-ENOSYS,RAX(%rsp)
 	FIXUP_TOP_OF_STACK %rdi
 	movq %rsp,%rdi
+	movq	%gs:pda_pcurrent,%r10
 	call syscall_trace_enter
 	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
 	RESTORE_REST
 	cmpq $__NR_syscall_max,%rax
 	ja  1f
 	movq %r10,%rcx	/* fixup for C */
+	movq	%gs:pda_pcurrent,%r10
 	call *sys_call_table(,%rax,8)
 	movq %rax,RAX-ARGOFFSET(%rsp)
 1:	SAVE_REST
@@ -324,6 +328,7 @@ int_careful:
 	sti
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET 8
+	movq	%gs:pda_pcurrent,%r10
 	call schedule
 	popq %rdi
 	CFI_ADJUST_CFA_OFFSET -8
@@ -409,6 +414,7 @@ ENTRY(stub_execve)
 	movq %r11, %r15
 	CFI_REGISTER rip, r15
 	FIXUP_TOP_OF_STACK %r11
+	movq	%gs:pda_pcurrent,%r10
 	call sys_execve
 	GET_THREAD_INFO(%rcx)
 	bt $TIF_IA32,threadinfo_flags(%rcx)
@@ -441,6 +447,7 @@ ENTRY(stub_rt_sigreturn)
 	SAVE_REST
 	movq %rsp,%rdi
 	FIXUP_TOP_OF_STACK %r11
+	movq	%gs:pda_pcurrent,%r10
 	call sys_rt_sigreturn
 	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
 	RESTORE_REST
@@ -498,6 +505,7 @@ ENTRY(stub_rt_sigreturn)
 	cmoveq %rax,%rsp /*todo This needs CFI annotation! */
 	pushq %rdi			# save old stack	
 	CFI_ADJUST_CFA_OFFSET	8
+	movq	%gs:pda_pcurrent,%r10
 	call \func
 	.endm
 
@@ -559,6 +567,7 @@ retint_careful:
 	sti
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET	8
+	movq	%gs:pda_pcurrent,%r10
 	call  schedule
 	popq %rdi		
 	CFI_ADJUST_CFA_OFFSET	-8
@@ -574,6 +583,7 @@ retint_signal:
 	movq $-1,ORIG_RAX(%rsp) 			
 	xorl %esi,%esi		# oldset
 	movq %rsp,%rdi		# &pt_regs
+	movq	%gs:pda_pcurrent,%r10
 	call do_notify_resume
 	RESTORE_REST
 	cli
@@ -592,6 +602,7 @@ retint_kernel:	
 	jnc  retint_restore_args
 	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
 	jnc  retint_restore_args
+	movq	%gs:pda_pcurrent,%r10
 	call preempt_schedule_irq
 	jmp exit_intr
 #endif	
@@ -682,6 +693,7 @@ ENTRY(spurious_interrupt)
 	testl %edx,%edx
 	js    1f
 	swapgs
+	movq	%gs:pda_pcurrent,%r10
 	xorl  %ebx,%ebx
 1:	movq %rsp,%rdi
 	movq ORIG_RAX(%rsp),%rsi
@@ -734,6 +746,7 @@ ENTRY(error_entry)
 	je  error_kernelspace
 error_swapgs:	
 	swapgs
+	movq	%gs:pda_pcurrent,%r10
 error_sti:	
 	movq %rdi,RDI(%rsp) 	
 	movq %rsp,%rdi
@@ -876,6 +889,7 @@ ENTRY(execve)
 	CFI_STARTPROC
 	FAKE_STACK_FRAME $0
 	SAVE_ALL	
+	movq %gs:pda_pcurrent,%r10
 	call sys_execve
 	movq %rax, RAX(%rsp)	
 	RESTORE_REST
@@ -953,6 +967,7 @@ paranoid_userspace:	
 	jmp paranoid_userspace
 paranoid_schedule:
 	sti
+	movq	%gs:pda_pcurrent,%r10
 	call schedule
 	cli
 	jmp paranoid_userspace
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 5afd63e..340bce2 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -435,8 +435,10 @@ int copy_thread(int nr, unsigned long cl
 
 	childregs->rax = 0;
 	childregs->rsp = rsp;
-	if (rsp == ~0UL)
+	if (rsp == ~0UL) {
+		childregs->r10 = (long)p;
 		childregs->rsp = (unsigned long)childregs;
+	}
 
 	p->thread.rsp = (unsigned long) childregs;
 	p->thread.rsp0 = (unsigned long) (childregs+1);
@@ -568,6 +570,7 @@ __switch_to(struct task_struct *prev_p, 
 	prev->userrsp = read_pda(oldrsp); 
 	write_pda(oldrsp, next->userrsp); 
 	write_pda(pcurrent, next_p); 
+	current = next_p;
 	write_pda(kernelstack,
 	    (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
 
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 06dc354..3af8688 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -132,16 +132,16 @@ void pda_init(int cpu)
 
 	if (cpu == 0) {
 		/* others are initialized in smpboot.c */
-		pda->pcurrent = &init_task;
+		current = pda->pcurrent = &init_task;
 		pda->irqstackptr = boot_cpu_stack; 
 	} else {
+		current = pda->pcurrent;
 		pda->irqstackptr = (char *)
 			__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
 		if (!pda->irqstackptr)
 			panic("cannot allocate irqstack for cpu %d", cpu); 
 	}
 
-
 	pda->irqstackptr += IRQSTACKSIZE-64;
 } 
 
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index bf337f4..a6008ae 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -277,6 +277,7 @@ void show_registers(struct pt_regs *regs
 	const int cpu = safe_smp_processor_id(); 
 	struct task_struct *cur = cpu_pda[cpu].pcurrent; 
 
+	current = cur;
 		rsp = regs->rsp;
 
 	printk("CPU %d ", cpu);
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S
index dfa358b..f24497d 100644
--- a/arch/x86_64/lib/copy_user.S
+++ b/arch/x86_64/lib/copy_user.S
@@ -95,6 +95,7 @@ copy_user_generic:	
 	.previous
 .Lcug:	
 	pushq %rbx
+	pushq %r12
 	xorl %eax,%eax		/*zero for the exception handler */
 
 #ifdef FIX_ALIGNMENT
@@ -117,20 +118,20 @@ copy_user_generic:	
 .Ls1:	movq (%rsi),%r11
 .Ls2:	movq 1*8(%rsi),%r8
 .Ls3:	movq 2*8(%rsi),%r9
-.Ls4:	movq 3*8(%rsi),%r10
+.Ls4:	movq 3*8(%rsi),%r12
 .Ld1:	movq %r11,(%rdi)
 .Ld2:	movq %r8,1*8(%rdi)
 .Ld3:	movq %r9,2*8(%rdi)
-.Ld4:	movq %r10,3*8(%rdi)
+.Ld4:	movq %r12,3*8(%rdi)
 		
 .Ls5:	movq 4*8(%rsi),%r11
 .Ls6:	movq 5*8(%rsi),%r8
 .Ls7:	movq 6*8(%rsi),%r9
-.Ls8:	movq 7*8(%rsi),%r10
+.Ls8:	movq 7*8(%rsi),%r12
 .Ld5:	movq %r11,4*8(%rdi)
 .Ld6:	movq %r8,5*8(%rdi)
 .Ld7:	movq %r9,6*8(%rdi)
-.Ld8:	movq %r10,7*8(%rdi)
+.Ld8:	movq %r12,7*8(%rdi)
 	
 	decq %rdx
 
@@ -169,6 +170,7 @@ copy_user_generic:	
 	jnz .Lloop_1
 			
 .Lende:
+	popq %r12
 	popq %rbx
 	ret	
 
diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S
index 72fd55e..8e0ee5f 100644
--- a/arch/x86_64/lib/csum-copy.S
+++ b/arch/x86_64/lib/csum-copy.S
@@ -84,7 +84,7 @@ csum_partial_copy_generic:
 	/* main loop. clear in 64 byte blocks */
 	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
 	/* r11:	temp3, rdx: temp4, r12 loopcnt */
-	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
+	/* r15:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
 	.p2align 4
 .Lloop:
 	source
@@ -97,7 +97,7 @@ csum_partial_copy_generic:
 	movq  24(%rdi),%rdx
 
 	source
-	movq  32(%rdi),%r10
+	movq  32(%rdi),%r15
 	source
 	movq  40(%rdi),%rbp
 	source
@@ -112,7 +112,7 @@ csum_partial_copy_generic:
 	adcq  %r8,%rax
 	adcq  %r11,%rax
 	adcq  %rdx,%rax
-	adcq  %r10,%rax
+	adcq  %r15,%rax
 	adcq  %rbp,%rax
 	adcq  %r14,%rax
 	adcq  %r13,%rax
@@ -129,7 +129,7 @@ csum_partial_copy_generic:
 	movq %rdx,24(%rsi)
 
 	dest
-	movq %r10,32(%rsi)
+	movq %r15,32(%rsi)
 	dest
 	movq %rbp,40(%rsi)
 	dest
@@ -149,7 +149,7 @@ csum_partial_copy_generic:
 	/* do last upto 56 bytes */
 .Lhandle_tail:
 	/* ecx:	count */
-	movl %ecx,%r10d
+	movl %ecx,%r15d
 	andl $63,%ecx
 	shrl $3,%ecx
 	jz 	 .Lfold
@@ -176,7 +176,7 @@ csum_partial_copy_generic:
 
 	/* do last upto 6 bytes */	
 .Lhandle_7:
-	movl %r10d,%ecx
+	movl %r15d,%ecx
 	andl $7,%ecx
 	shrl $1,%ecx
 	jz   .Lhandle_1
@@ -198,7 +198,7 @@ csum_partial_copy_generic:
 	
 	/* handle last odd byte */
 .Lhandle_1:
-	testl $1,%r10d
+	testl $1,%r15d
 	jz    .Lende
 	xorl  %ebx,%ebx
 	source
diff --git a/include/asm-x86_64/current.h b/include/asm-x86_64/current.h
index bc8adec..6675f2d 100644
--- a/include/asm-x86_64/current.h
+++ b/include/asm-x86_64/current.h
@@ -6,13 +6,7 @@ struct task_struct;
 
 #include <asm/pda.h>
 
-static inline struct task_struct *get_current(void) 
-{ 
-	struct task_struct *t = read_pda(pcurrent); 
-	return t;
-} 
-
-#define current get_current()
+register struct task_struct *current __asm__("%r10");
 
 #else
 

             reply	other threads:[~2005-11-22 16:54 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-11-22 16:52 Benjamin LaHaise [this message]
2005-11-22 17:10 ` rfc/rft: use r10 as current on x86-64 Andi Kleen
2005-11-22 17:26   ` Benjamin LaHaise
2005-11-22 17:46   ` Brian Gerst
2005-11-22 17:55     ` Andreas Steinmetz
2005-11-23 22:48 ` Pavel Machek
2005-11-23 22:54   ` Benjamin LaHaise

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20051122165204.GG1127@kvack.org \
    --to=bcrl@kvack.org \
    --cc=ak@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.