virtualization.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
From: Jeremy Fitzhardinge <jeremy@goop.org>
To: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@muc.de>,
	lkml - Kernel Mailing List <linux-kernel@vger.kernel.org>,
	virtualization <virtualization@lists.osdl.org>
Subject: Re: [PATCH 5/7] Use %gs for per-cpu sections in kernel
Date: Fri, 22 Sep 2006 15:39:58 -0700	[thread overview]
Message-ID: <4514663E.5050707@goop.org> (raw)
In-Reply-To: <1158926386.26261.17.camel@localhost.localdomain>

Rusty Russell wrote:
> This patch actually uses the gs register to implement the per-cpu
> sections.  It's fairly straightforward: the gs segment starts at the
> per-cpu offset for the particular cpu (or 0, in very early boot).  
>
> We also implement x86_64-inspired (via Jeremy Fitzhardinge) per-cpu
> accesses where a general lvalue isn't needed.  These
> single-instruction accesses are slightly more efficient, plus (being a
> single insn) are atomic wrt. preemption so we can use them to
> implement cpu_local_inc etc.
>
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
>
> Index: ak-fresh/arch/i386/kernel/cpu/common.c
> ===================================================================
> --- ak-fresh.orig/arch/i386/kernel/cpu/common.c	2006-09-22 16:48:14.000000000 +1000
> +++ ak-fresh/arch/i386/kernel/cpu/common.c	2006-09-22 17:02:47.000000000 +1000
> @@ -13,6 +13,7 @@
>  #include <asm/mmu_context.h>
>  #include <asm/mtrr.h>
>  #include <asm/mce.h>
> +#include <asm/smp.h>
>  #ifdef CONFIG_X86_LOCAL_APIC
>  #include <asm/mpspec.h>
>  #include <asm/apic.h>
> @@ -601,12 +602,24 @@
>  	struct thread_struct *thread = &current->thread;
>  	struct desc_struct *gdt;
>  	__u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
> -	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
>  
>  	if (cpu_test_and_set(cpu, cpu_initialized)) {
>  		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
>  		for (;;) local_irq_enable();
>  	}
> +
> +	/* Set up GDT entry for 16bit stack */
> +	stk16_off = (u32)&per_cpu(cpu_16bit_stack, cpu);
> +	gdt = per_cpu(cpu_gdt_table, cpu);
> +	*(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |=
> +		((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
> +		((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
> +		(CPU_16BIT_STACK_SIZE - 1);
>   

This should use pack_descriptor().  I'd never got around to changing it, 
but it really should.

> +	/* Complete percpu area setup early, before calling printk(),
> +	   since it may end up using it indirectly. */
> +	setup_percpu_for_this_cpu(cpu);
> +
>   

I managed to get all this done in head.S before going into C code; is 
that not still possible?  Or is there a later patch to do this.

> +static __cpuinit void setup_percpu_descriptor(struct desc_struct *gdt,
> +					      unsigned long per_cpu_off)
> +{
> +	unsigned limit, flags;
> +
> +	limit = (1 << 20);
> +	flags = 0x8;		/* 4k granularity */
>   

Why not set the limit to the percpu section size?  It would avoid having 
it clipped under Xen.


> +/* Be careful not to use %gs references until this is setup: needs to
> + * be done on this CPU. */
> +void __init setup_percpu_for_this_cpu(unsigned int cpu)
> +{
> +	struct desc_struct *gdt = per_cpu(cpu_gdt_table, cpu);
> +	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
> +
> +	per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
> +	setup_percpu_descriptor(&gdt[GDT_ENTRY_PERCPU],	__per_cpu_offset[cpu]);
> +	cpu_gdt_descr->address = (unsigned long)gdt;
> +	cpu_gdt_descr->size = GDT_SIZE - 1;
> +	load_gdt(cpu_gdt_descr);
> +	set_kernel_gs();
> +}
>   

Everything except the load_gdt and set_kernel_gs could be done in advance.

> +
>  void __devinit smp_prepare_boot_cpu(void)
>  {
> +	setup_percpu_for_this_cpu(0);
> +
>  	cpu_set(smp_processor_id(), cpu_online_map);
>  	cpu_set(smp_processor_id(), cpu_callout_map);
>  	cpu_set(smp_processor_id(), cpu_present_map);
> Index: ak-fresh/include/asm-i386/percpu.h
> ===================================================================
> --- ak-fresh.orig/include/asm-i386/percpu.h	2006-09-22 16:48:14.000000000 +1000
> +++ ak-fresh/include/asm-i386/percpu.h	2006-09-22 16:59:00.000000000 +1000
> @@ -1,6 +1,107 @@
>  #ifndef __ARCH_I386_PERCPU__
>  #define __ARCH_I386_PERCPU__
>  
> +#ifdef CONFIG_SMP
> +/* Same as generic implementation except for optimized local access. */
> +#define __GENERIC_PER_CPU
> +
> +/* This is used for other cpus to find our section. */
> +extern unsigned long __per_cpu_offset[NR_CPUS];
> +
> +/* Separate out the type, so (int[3], foo) works. */
> +#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
> +#define DEFINE_PER_CPU(type, name) \
> +    __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
> +
> +/* We can use this directly for local CPU (faster). */
> +DECLARE_PER_CPU(unsigned long, this_cpu_off);
> +
> +/* var is in discarded region: offset to particular copy we want */
> +#define per_cpu(var, cpu) (*({				\
> +	extern int simple_indentifier_##var(void);	\
> +	RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); }))
> +
> +#define __raw_get_cpu_var(var) (*({					\
> +	extern int simple_indentifier_##var(void);			\
> +	RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off));	\
> +}))
> +
> +#define __get_cpu_var(var) __raw_get_cpu_var(var)
> +
> +/* A macro to avoid #include hell... */
> +#define percpu_modcopy(pcpudst, src, size)			\
> +do {								\
> +	unsigned int __i;					\
> +	for_each_possible_cpu(__i)				\
> +		memcpy((pcpudst)+__per_cpu_offset[__i],		\
> +		       (src), (size));				\
> +} while (0)
> +
> +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
> +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
> +
> +/* gs segment starts at (positive) offset == __per_cpu_offset[cpu] */
> +#define __percpu_seg "%%gs:"
> +#else  /* !SMP */
>  #include <asm-generic/percpu.h>
> +#define __percpu_seg ""
> +#endif	/* SMP */
> +
> +/* For arch-specific code, we can use direct single-insn ops (they
> + * don't give an lvalue though). */
> +extern void __bad_percpu_size(void);
> +
> +#define percpu_to_op(op,var,val)				\
> +	do {							\
> +		typedef typeof(var) T__;			\
> +		if (0) { T__ tmp__; tmp__ = (val); }		\
> +		switch (sizeof(var)) {				\
> +		case 1:						\
> +			asm(op "b %1,"__percpu_seg"%0"		\
>   

So are symbols referencing the .data.percpu section 0-based?  Wouldn't 
you need to subtract __per_cpu_start from the symbols to get a 0-based 
segment offset?

Or is the only percpu benefit you're getting from %gs is a slightly 
quicker way of getting the percpu_offset?  Does that help much?
> +#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
> +#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val)
> +#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val)
> +#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val)
> +#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val)
>   

Why x86_?  If some other arch implemented a similar mechanism, wouldn't 
they want to use the same macro names?

    J

  parent reply	other threads:[~2006-09-22 22:39 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-09-22 11:51 [PATCH 0/7] Using %gs for per-cpu areas on x86 Rusty Russell
2006-09-22 11:53 ` [PATCH 1/7] Use per-cpu GDT tables from early in boot Rusty Russell
2006-09-22 11:55   ` [PATCH 2/7] Rusty Russell
2006-09-22 11:56     ` [PATCH 3/7] Update sys_vm86 to cope with changed pt_regs and %gs usage Rusty Russell
2006-09-22 11:58       ` [PATCH 4/7] Fix places where using %gs changes the usermode ABI Rusty Russell
2006-09-22 11:59         ` [PATCH 5/7] Use %gs for per-cpu sections in kernel Rusty Russell
2006-09-22 12:00           ` [PATCH 6/7] (Optional) implement smp_processor_id() as a per-cpu var Rusty Russell
2006-09-22 12:01             ` [PATCH 7/7] (Optional) implement current " Rusty Russell
2006-09-25  5:29               ` Rusty Russell
2006-09-25  5:27             ` [PATCH 6/7] (Optional) implement smp_processor_id() " Rusty Russell
2006-09-22 12:32           ` [PATCH 5/7] Use %gs for per-cpu sections in kernel Andi Kleen
2006-09-22 22:43             ` Jeremy Fitzhardinge
2006-09-22 23:52               ` Andi Kleen
2006-09-23  4:51             ` Rusty Russell
2006-09-23  8:17               ` Andi Kleen
2006-09-23  8:55                 ` Rusty Russell
2006-09-22 22:39           ` Jeremy Fitzhardinge [this message]
2006-09-23  4:31             ` Rusty Russell
2006-09-25  1:03               ` Jeremy Fitzhardinge
2006-09-25  1:16                 ` Rusty Russell
2006-09-25  1:36                   ` Jeremy Fitzhardinge
2006-09-25  2:51                     ` Rusty Russell
2006-09-25  5:25                       ` Jeremy Fitzhardinge
2006-09-25  6:03                         ` Rusty Russell
2006-09-25  6:25                           ` Jeremy Fitzhardinge
2006-09-25 23:33                             ` Rusty Russell
2006-09-23  8:13             ` Andi Kleen
2006-09-25  1:07               ` Jeremy Fitzhardinge
2006-09-25  1:20                 ` Rusty Russell
2006-09-25  5:26                   ` Rusty Russell
2006-09-22 22:24     ` [PATCH 2/7] Jeremy Fitzhardinge
2006-09-23  4:36       ` Rusty Russell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4514663E.5050707@goop.org \
    --to=jeremy@goop.org \
    --cc=ak@muc.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rusty@rustcorp.com.au \
    --cc=virtualization@lists.osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).