public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 16/16] 2.6.17-rc6 perfmon2 patch for review: modified x86_64 files
@ 2006-06-15  9:07 Stephane Eranian
  2006-06-30 12:06 ` Andi Kleen
  0 siblings, 1 reply; 2+ messages in thread
From: Stephane Eranian @ 2006-06-15  9:07 UTC (permalink / raw)
  To: linux-kernel; +Cc: eranian

This patch contains the modified files for x86_64 (AMD and Intel EM64T).




diff -ur linux-2.6.17-rc6.orig/arch/x86_64/Kconfig linux-2.6.17-rc6/arch/x86_64/Kconfig
--- linux-2.6.17-rc6.orig/arch/x86_64/Kconfig	2006-06-08 01:42:31.000000000 -0700
+++ linux-2.6.17-rc6/arch/x86_64/Kconfig	2006-06-13 06:58:43.000000000 -0700
@@ -501,6 +501,8 @@
          optimal TLB usage. If you have pretty much any version of binutils, 
 	 this can increase your kernel build time by roughly one minute.
 
+source "arch/x86_64/perfmon/Kconfig"
+
 endmenu
 
 #
diff -ur linux-2.6.17-rc6.orig/arch/x86_64/Makefile linux-2.6.17-rc6/arch/x86_64/Makefile
--- linux-2.6.17-rc6.orig/arch/x86_64/Makefile	2006-06-08 01:42:31.000000000 -0700
+++ linux-2.6.17-rc6/arch/x86_64/Makefile	2006-06-08 01:49:22.000000000 -0700
@@ -65,6 +65,7 @@
 					   arch/x86_64/crypto/
 core-$(CONFIG_IA32_EMULATION)		+= arch/x86_64/ia32/
 drivers-$(CONFIG_PCI)			+= arch/x86_64/pci/
+drivers-$(CONFIG_PERFMON)		+= arch/x86_64/perfmon/
 drivers-$(CONFIG_OPROFILE)		+= arch/x86_64/oprofile/
 
 boot := arch/x86_64/boot
diff -ur linux-2.6.17-rc6.orig/arch/x86_64/ia32/ia32entry.S linux-2.6.17-rc6/arch/x86_64/ia32/ia32entry.S
--- linux-2.6.17-rc6.orig/arch/x86_64/ia32/ia32entry.S	2006-06-08 01:42:31.000000000 -0700
+++ linux-2.6.17-rc6/arch/x86_64/ia32/ia32entry.S	2006-06-08 01:49:22.000000000 -0700
@@ -696,4 +696,17 @@
 	.quad sys_sync_file_range
 	.quad sys_tee
 	.quad compat_sys_vmsplice
+   	.quad sys_pfm_create_context
+       	.quad sys_pfm_write_pmcs
+       	.quad sys_pfm_write_pmds
+       	.quad sys_pfm_read_pmds		/* 320 */
+       	.quad sys_pfm_load_context
+       	.quad sys_pfm_start
+       	.quad sys_pfm_stop
+       	.quad sys_pfm_restart
+       	.quad sys_pfm_create_evtsets	/* 325 */
+       	.quad sys_pfm_getinfo_evtsets
+       	.quad sys_pfm_delete_evtsets
+  	.quad sys_pfm_unload_context
+
 ia32_syscall_end:		
diff -ur linux-2.6.17-rc6.orig/arch/x86_64/kernel/apic.c linux-2.6.17-rc6/arch/x86_64/kernel/apic.c
--- linux-2.6.17-rc6.orig/arch/x86_64/kernel/apic.c	2006-06-08 01:42:31.000000000 -0700
+++ linux-2.6.17-rc6/arch/x86_64/kernel/apic.c	2006-06-08 01:49:22.000000000 -0700
@@ -26,6 +26,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
 #include <linux/module.h>
+#include <linux/perfmon.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -934,6 +935,7 @@
 void smp_local_timer_interrupt(struct pt_regs *regs)
 {
 	profile_tick(CPU_PROFILING, regs);
+ 	pfm_handle_switch_timeout();
 #ifdef CONFIG_SMP
 	update_process_times(user_mode(regs));
 #endif
diff -ur linux-2.6.17-rc6.orig/arch/x86_64/kernel/entry.S linux-2.6.17-rc6/arch/x86_64/kernel/entry.S
--- linux-2.6.17-rc6.orig/arch/x86_64/kernel/entry.S	2006-06-08 01:42:31.000000000 -0700
+++ linux-2.6.17-rc6/arch/x86_64/kernel/entry.S	2006-06-08 01:49:22.000000000 -0700
@@ -640,6 +640,11 @@
 
 ENTRY(spurious_interrupt)
 	apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
+
+#ifdef CONFIG_PERFMON
+ENTRY(pmu_interrupt)
+	apicinterrupt LOCAL_PERFMON_VECTOR,smp_pmu_interrupt
+#endif
 #endif
 				
 /*
diff -ur linux-2.6.17-rc6.orig/arch/x86_64/kernel/i8259.c linux-2.6.17-rc6/arch/x86_64/kernel/i8259.c
--- linux-2.6.17-rc6.orig/arch/x86_64/kernel/i8259.c	2006-03-19 21:53:29.000000000 -0800
+++ linux-2.6.17-rc6/arch/x86_64/kernel/i8259.c	2006-06-08 01:49:22.000000000 -0700
@@ -13,6 +13,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
 #include <linux/bitops.h>
+#include <linux/perfmon.h>
 
 #include <asm/acpi.h>
 #include <asm/atomic.h>
@@ -589,6 +590,8 @@
 	/* IPI vectors for APIC spurious and error interrupts */
 	set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 	set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+
+ 	pfm_vector_init();
 #endif
 
 	/*
diff -ur linux-2.6.17-rc6.orig/arch/x86_64/kernel/nmi.c linux-2.6.17-rc6/arch/x86_64/kernel/nmi.c
--- linux-2.6.17-rc6.orig/arch/x86_64/kernel/nmi.c	2006-06-08 01:42:31.000000000 -0700
+++ linux-2.6.17-rc6/arch/x86_64/kernel/nmi.c	2006-06-08 01:49:22.000000000 -0700
@@ -248,6 +248,7 @@
 	old_owner = lapic_nmi_owner;
 	lapic_nmi_owner |= LAPIC_NMI_RESERVED;
 	spin_unlock(&lapic_nmi_owner_lock);
+
 	if (old_owner & LAPIC_NMI_RESERVED)
 		return -EBUSY;
 	if (old_owner & LAPIC_NMI_WATCHDOG)
diff -ur linux-2.6.17-rc6.orig/arch/x86_64/kernel/process.c linux-2.6.17-rc6/arch/x86_64/kernel/process.c
--- linux-2.6.17-rc6.orig/arch/x86_64/kernel/process.c	2006-06-08 01:42:31.000000000 -0700
+++ linux-2.6.17-rc6/arch/x86_64/kernel/process.c	2006-06-08 01:49:22.000000000 -0700
@@ -37,6 +37,7 @@
 #include <linux/random.h>
 #include <linux/notifier.h>
 #include <linux/kprobes.h>
+#include <linux/perfmon.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -358,6 +359,7 @@
 		t->io_bitmap_max = 0;
 		put_cpu();
 	}
+	pfm_exit_thread(me);
 }
 
 void flush_thread(void)
@@ -459,6 +461,8 @@
 	asm("mov %%es,%0" : "=m" (p->thread.es));
 	asm("mov %%ds,%0" : "=m" (p->thread.ds));
 
+	pfm_copy_thread(p, childregs);
+
 	if (unlikely(me->thread.io_bitmap_ptr != NULL)) { 
 		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
 		if (!p->thread.io_bitmap_ptr) {
@@ -482,6 +486,8 @@
 		if (err) 
 			goto out;
 	}
+
+
 	err = 0;
 out:
 	if (err && p->thread.io_bitmap_ptr) {
@@ -615,6 +621,7 @@
 			memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 		}
 	}
+	pfm_ctxswin(next_p);
 
 	return prev_p;
 }
diff -ur linux-2.6.17-rc6.orig/arch/x86_64/kernel/signal.c linux-2.6.17-rc6/arch/x86_64/kernel/signal.c
--- linux-2.6.17-rc6.orig/arch/x86_64/kernel/signal.c	2006-06-08 01:42:31.000000000 -0700
+++ linux-2.6.17-rc6/arch/x86_64/kernel/signal.c	2006-06-08 01:49:22.000000000 -0700
@@ -24,6 +24,7 @@
 #include <linux/stddef.h>
 #include <linux/personality.h>
 #include <linux/compiler.h>
+#include <linux/perfmon.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
@@ -492,6 +493,10 @@
 		regs->eflags |= TF_MASK;
 		clear_thread_flag(TIF_SINGLESTEP);
 	}
+	/*
+	 * must be done before signals
+	 */
+	pfm_handle_work();
 
 	/* deal with pending signal delivery */
 	if (thread_info_flags & _TIF_SIGPENDING)
diff -ur linux-2.6.17-rc6.orig/arch/x86_64/kernel/x8664_ksyms.c linux-2.6.17-rc6/arch/x86_64/kernel/x8664_ksyms.c
--- linux-2.6.17-rc6.orig/arch/x86_64/kernel/x8664_ksyms.c	2006-06-08 01:42:31.000000000 -0700
+++ linux-2.6.17-rc6/arch/x86_64/kernel/x8664_ksyms.c	2006-06-08 01:49:22.000000000 -0700
@@ -95,6 +95,7 @@
 EXPORT_SYMBOL(__read_lock_failed);
 
 EXPORT_SYMBOL(smp_call_function);
+EXPORT_SYMBOL(smp_call_function_single);
 EXPORT_SYMBOL(cpu_callout_map);
 #endif
 
Only in linux-2.6.17-rc6/arch/x86_64: perfmon
diff -ur linux-2.6.17-rc6.orig/include/asm-x86_64/hw_irq.h linux-2.6.17-rc6/include/asm-x86_64/hw_irq.h
--- linux-2.6.17-rc6.orig/include/asm-x86_64/hw_irq.h	2006-03-19 21:53:29.000000000 -0800
+++ linux-2.6.17-rc6/include/asm-x86_64/hw_irq.h	2006-06-08 01:49:22.000000000 -0700
@@ -67,6 +67,7 @@
  * sources per level' errata.
  */
 #define LOCAL_TIMER_VECTOR	0xef
+#define LOCAL_PERFMON_VECTOR	0xee
 
 /*
  * First APIC vector available to drivers: (vectors 0x30-0xee)
@@ -74,7 +75,7 @@
  * levels. (0x80 is the syscall vector)
  */
 #define FIRST_DEVICE_VECTOR	0x31
-#define FIRST_SYSTEM_VECTOR	0xef   /* duplicated in irq.h */
+#define FIRST_SYSTEM_VECTOR	0xee   /* duplicated in irq.h */
 
 
 #ifndef __ASSEMBLY__
diff -ur linux-2.6.17-rc6.orig/include/asm-x86_64/irq.h linux-2.6.17-rc6/include/asm-x86_64/irq.h
--- linux-2.6.17-rc6.orig/include/asm-x86_64/irq.h	2006-03-19 21:53:29.000000000 -0800
+++ linux-2.6.17-rc6/include/asm-x86_64/irq.h	2006-06-08 01:49:22.000000000 -0700
@@ -29,7 +29,7 @@
  */
 #define NR_VECTORS 256
 
-#define FIRST_SYSTEM_VECTOR	0xef   /* duplicated in hw_irq.h */
+#define FIRST_SYSTEM_VECTOR	0xee   /* duplicated in hw_irq.h */
 
 #ifdef CONFIG_PCI_MSI
 #define NR_IRQS FIRST_SYSTEM_VECTOR
Only in linux-2.6.17-rc6/include/asm-x86_64: perfmon.h
Only in linux-2.6.17-rc6/include/asm-x86_64: perfmon_em64t_pebs_smpl.h
diff -ur linux-2.6.17-rc6.orig/include/asm-x86_64/system.h linux-2.6.17-rc6/include/asm-x86_64/system.h
--- linux-2.6.17-rc6.orig/include/asm-x86_64/system.h	2006-06-08 01:42:36.000000000 -0700
+++ linux-2.6.17-rc6/include/asm-x86_64/system.h	2006-06-08 01:49:22.000000000 -0700
@@ -27,6 +27,7 @@
 	,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
 
 #define switch_to(prev,next,last) \
+	pfm_ctxswout(prev);							  \
 	asm volatile(SAVE_CONTEXT						    \
 		     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
 		     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
diff -ur linux-2.6.17-rc6.orig/include/asm-x86_64/unistd.h linux-2.6.17-rc6/include/asm-x86_64/unistd.h
--- linux-2.6.17-rc6.orig/include/asm-x86_64/unistd.h	2006-06-08 01:42:36.000000000 -0700
+++ linux-2.6.17-rc6/include/asm-x86_64/unistd.h	2006-06-08 01:49:22.000000000 -0700
@@ -617,8 +617,32 @@
 __SYSCALL(__NR_sync_file_range, sys_sync_file_range)
 #define __NR_vmsplice		278
 __SYSCALL(__NR_vmsplice, sys_vmsplice)
-
-#define __NR_syscall_max __NR_vmsplice
+#define __NR_pfm_create_context	279
+ __SYSCALL(__NR_pfm_create_context, sys_pfm_create_context)
+#define __NR_pfm_write_pmcs	(__NR_pfm_create_context+1)
+__SYSCALL(__NR_pfm_write_pmcs, sys_pfm_write_pmcs)
+#define __NR_pfm_write_pmds	(__NR_pfm_create_context+2)
+__SYSCALL(__NR_pfm_write_pmds, sys_pfm_write_pmds)
+#define __NR_pfm_read_pmds	(__NR_pfm_create_context+3)
+__SYSCALL(__NR_pfm_read_pmds, sys_pfm_read_pmds)
+#define __NR_pfm_load_context	(__NR_pfm_create_context+4)
+__SYSCALL(__NR_pfm_load_context, sys_pfm_load_context)
+#define __NR_pfm_start		(__NR_pfm_create_context+5)
+__SYSCALL(__NR_pfm_start, sys_pfm_start)
+#define __NR_pfm_stop		(__NR_pfm_create_context+6)
+__SYSCALL(__NR_pfm_stop, sys_pfm_stop)
+#define __NR_pfm_restart	(__NR_pfm_create_context+7)
+__SYSCALL(__NR_pfm_restart, sys_pfm_restart)
+#define __NR_pfm_create_evtsets	(__NR_pfm_create_context+8)
+__SYSCALL(__NR_pfm_create_evtsets, sys_pfm_create_evtsets)
+#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
+__SYSCALL(__NR_pfm_getinfo_evtsets, sys_pfm_getinfo_evtsets)
+#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
+__SYSCALL(__NR_pfm_delete_evtsets, sys_pfm_delete_evtsets)
+#define __NR_pfm_unload_context	(__NR_pfm_create_context+11)
+__SYSCALL(__NR_pfm_unload_context, sys_pfm_unload_context)
+  
+#define __NR_syscall_max __NR_pfm_unload_context
 
 #ifndef __NO_STUBS
 

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH 16/16] 2.6.17-rc6 perfmon2 patch for review: modified x86_64 files
  2006-06-15  9:07 [PATCH 16/16] 2.6.17-rc6 perfmon2 patch for review: modified x86_64 files Stephane Eranian
@ 2006-06-30 12:06 ` Andi Kleen
  0 siblings, 0 replies; 2+ messages in thread
From: Andi Kleen @ 2006-06-30 12:06 UTC (permalink / raw)
  To: Stephane Eranian; +Cc: eranian, linux-kernel

Stephane Eranian <eranian@frankl.hpl.hp.com> writes:

> This patch contains the modified files for x86_64 (AMD and Intel EM64T).
> 

Description/rationale/what you changed/why/etc. missing here...

In general your patches would be easier to review if you used diff -p

The patch is too big and should be split into smaller pieces.

>  	.quad sys_sync_file_range
>  	.quad sys_tee
>  	.quad compat_sys_vmsplice
> +   	.quad sys_pfm_create_context
> +       	.quad sys_pfm_write_pmcs
> +       	.quad sys_pfm_write_pmds
> +       	.quad sys_pfm_read_pmds		/* 320 */
> +       	.quad sys_pfm_load_context
> +       	.quad sys_pfm_start
> +       	.quad sys_pfm_stop
> +       	.quad sys_pfm_restart
> +       	.quad sys_pfm_create_evtsets	/* 325 */
> +       	.quad sys_pfm_getinfo_evtsets
> +       	.quad sys_pfm_delete_evtsets
> +  	.quad sys_pfm_unload_context

I suppose all these system calls need separate review. 
The indentation is unusual


I trust you tested they are all 32bit emulation clean.
>  	/*
> diff -ur linux-2.6.17-rc6.orig/arch/x86_64/kernel/nmi.c linux-2.6.17-rc6/arch/x86_64/kernel/nmi.c
> --- linux-2.6.17-rc6.orig/arch/x86_64/kernel/nmi.c	2006-06-08 01:42:31.000000000 -0700
> +++ linux-2.6.17-rc6/arch/x86_64/kernel/nmi.c	2006-06-08 01:49:22.000000000 -0700
> @@ -248,6 +248,7 @@
>  	old_owner = lapic_nmi_owner;
>  	lapic_nmi_owner |= LAPIC_NMI_RESERVED;
>  	spin_unlock(&lapic_nmi_owner_lock);
> +

?? 

>  		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
>  		if (!p->thread.io_bitmap_ptr) {
> @@ -482,6 +486,8 @@
>  		if (err) 
>  			goto out;
>  	}
> +
> +

?? 

>  	err = 0;
>  out:
>  	if (err && p->thread.io_bitmap_ptr) {
> @@ -615,6 +621,7 @@
>  			memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
>  		}
>  	}
> +	pfm_ctxswin(next_p);

You definitely add far too much code to the context switch.

Please fold the existing debug register check and the existing 
io bitmap check together into a single bitmap test and do the individual checks
then only inside that if (). The non debug case is supposed to be fast
and not weighted down by so many checks.

>  	}
> +	/*
> +	 * must be done before signals
> +	 */
> +	pfm_handle_work();

The comment is not very enlightening. 

Also this should be inside the usual bitmap checks.


> diff -ur linux-2.6.17-rc6.orig/include/asm-x86_64/hw_irq.h linux-2.6.17-rc6/include/asm-x86_64/hw_irq.h
> --- linux-2.6.17-rc6.orig/include/asm-x86_64/hw_irq.h	2006-03-19 21:53:29.000000000 -0800
> +++ linux-2.6.17-rc6/include/asm-x86_64/hw_irq.h	2006-06-08 01:49:22.000000000 -0700
> @@ -67,6 +67,7 @@
>   * sources per level' errata.
>   */
>  #define LOCAL_TIMER_VECTOR	0xef
> +#define LOCAL_PERFMON_VECTOR	0xee
>  
>  /*
>   * First APIC vector available to drivers: (vectors 0x30-0xee)
> @@ -74,7 +75,7 @@
>   * levels. (0x80 is the syscall vector)
>   */
>  #define FIRST_DEVICE_VECTOR	0x31
> -#define FIRST_SYSTEM_VECTOR	0xef   /* duplicated in irq.h */
> +#define FIRST_SYSTEM_VECTOR	0xee   /* duplicated in irq.h */
>  
>  
>  #ifndef __ASSEMBLY__
> diff -ur linux-2.6.17-rc6.orig/include/asm-x86_64/irq.h linux-2.6.17-rc6/include/asm-x86_64/irq.h
> --- linux-2.6.17-rc6.orig/include/asm-x86_64/irq.h	2006-03-19 21:53:29.000000000 -0800
> +++ linux-2.6.17-rc6/include/asm-x86_64/irq.h	2006-06-08 01:49:22.000000000 -0700
> @@ -29,7 +29,7 @@
>   */
>  #define NR_VECTORS 256
>  
> -#define FIRST_SYSTEM_VECTOR	0xef   /* duplicated in hw_irq.h */
> +#define FIRST_SYSTEM_VECTOR	0xee   /* duplicated in hw_irq.h */
>  
>  #ifdef CONFIG_PCI_MSI
>  #define NR_IRQS FIRST_SYSTEM_VECTOR
> Only in linux-2.6.17-rc6/include/asm-x86_64: perfmon.h
> Only in linux-2.6.17-rc6/include/asm-x86_64: perfmon_em64t_pebs_smpl.h
> diff -ur linux-2.6.17-rc6.orig/include/asm-x86_64/system.h linux-2.6.17-rc6/include/asm-x86_64/system.h
> --- linux-2.6.17-rc6.orig/include/asm-x86_64/system.h	2006-06-08 01:42:36.000000000 -0700
> +++ linux-2.6.17-rc6/include/asm-x86_64/system.h	2006-06-08 01:49:22.000000000 -0700
> @@ -27,6 +27,7 @@
>  	,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
>  
>  #define switch_to(prev,next,last) \
> +	pfm_ctxswout(prev);							  \

No way. At best please a single shared test in the context switch as described above, not a multitude
of different hooks all over the fast paths. This should be in __switch_to()

-Andi

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2006-06-30 12:06 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-06-15  9:07 [PATCH 16/16] 2.6.17-rc6 perfmon2 patch for review: modified x86_64 files Stephane Eranian
2006-06-30 12:06 ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox