linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Fix oprofile sampling of marked events on POWER7
@ 2009-05-01 17:17 Maynard Johnson
  2009-06-15 13:40 ` Maynard Johnson
  0 siblings, 1 reply; 3+ messages in thread
From: Maynard Johnson @ 2009-05-01 17:17 UTC (permalink / raw)
  To: linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 1296 bytes --]

Description
-----------
Change ppc64 oprofile kernel driver to use the SLOT bits (MMCRA[37:39]only on 
older processors where those bits are defined.

Background
----------
The performance monitor unit of the 64-bit POWER processor family has the 
ability to collect accurate instruction-level samples when profiling on marked 
events (i.e., "PM_MRK_<event-name>").  In processors prior to POWER6, the MMCRA 
register contained "slot information" that the oprofile kernel driver used to 
adjust the value latched in the SIAR at the time of a PMU interrupt.  But as of 
POWER6, these slot bits in MMCRA are no longer necessary for oprofile to use, 
since the SIAR itself holds the accurate sampled instruction address.  With 
POWER6, these MMCRA slot bits were zero'ed out by hardware so oprofile's use of 
these slot bits was, in effect, a NOP.  But with POWER7, these bits are no 
longer zero'ed out; however, they serve some other purpose rather than slot 
information.  Thus, using these bits on POWER7 to adjust the SIAR value results 
in samples being attributed to the wrong instructions.  The attached patch 
changes the oprofile kernel driver to ignore these slot bits on all newer 
processors starting with POWER6.

Thanks.
-Maynard

Signed-off-by: Maynard Johnson <maynardj@us.ibm.com>

[-- Attachment #2: oprof-p7.patch --]
[-- Type: text/plain, Size: 1222 bytes --]

diff -paur linux/arch/powerpc/oprofile/op_model_power4.c linux-p7-oprofile-patch//arch/powerpc/oprofile/op_model_power4.c
--- linux/arch/powerpc/oprofile/op_model_power4.c	2009-05-01 08:20:21.000000000 -0500
+++ linux-p7-oprofile-patch//arch/powerpc/oprofile/op_model_power4.c	2009-05-01 08:20:05.000000000 -0500
@@ -26,6 +26,7 @@
 static unsigned long reset_value[OP_MAX_COUNTER];
 
 static int oprofile_running;
+static int use_slot_nums;
 
 /* mmcr values are set in power4_reg_setup, used in power4_cpu_setup */
 static u32 mmcr0_val;
@@ -61,6 +62,12 @@ static int power4_reg_setup(struct op_co
 	else
 		mmcr0_val |= MMCR0_PROBLEM_DISABLE;
 
+	if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p) ||
+	    __is_processor(PV_970) || __is_processor(PV_970FX) ||
+	    __is_processor(PV_970MP) || __is_processor(PV_970GX) ||
+	    __is_processor(PV_POWER5) || __is_processor(PV_POWER5p))
+		use_slot_nums = 1;
+
 	return 0;
 }
 
@@ -206,7 +213,7 @@ static unsigned long get_pc(struct pt_re
 
 	mmcra = mfspr(SPRN_MMCRA);
 
-	if (mmcra & MMCRA_SAMPLE_ENABLE) {
+	if (use_slot_nums && (mmcra & MMCRA_SAMPLE_ENABLE)) {
 		slot = ((mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT);
 		if (slot > 1)
 			pc += 4 * (slot - 1);

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Fix oprofile sampling of marked events on POWER7
  2009-05-01 17:17 [PATCH] Fix oprofile sampling of marked events on POWER7 Maynard Johnson
@ 2009-06-15 13:40 ` Maynard Johnson
  2009-06-16  4:12   ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 3+ messages in thread
From: Maynard Johnson @ 2009-06-15 13:40 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Anton Blanchard

Looks like this posting got overlooked, so I'm re-posting the original patch.  Ben, can you please review?  BTW, Anton reported the problem that this patch fixes.

Thanks.
-Maynard

==============================================================


Description
-----------
Change ppc64 oprofile kernel driver to use the SLOT bits (MMCRA[37:39]only on 
older processors where those bits are defined.

Background
----------
The performance monitor unit of the 64-bit POWER processor family has the 
ability to collect accurate instruction-level samples when profiling on marked 
events (i.e., "PM_MRK_<event-name>").  In processors prior to POWER6, the MMCRA 
register contained "slot information" that the oprofile kernel driver used to 
adjust the value latched in the SIAR at the time of a PMU interrupt.  But as of 
POWER6, these slot bits in MMCRA are no longer necessary for oprofile to use, 
since the SIAR itself holds the accurate sampled instruction address.  With 
POWER6, these MMCRA slot bits were zero'ed out by hardware so oprofile's use of 
these slot bits was, in effect, a NOP.  But with POWER7, these bits are no 
longer zero'ed out; however, they serve some other purpose rather than slot 
information.  Thus, using these bits on POWER7 to adjust the SIAR value results 
in samples being attributed to the wrong instructions.  The attached patch 
changes the oprofile kernel driver to ignore these slot bits on all newer 
processors starting with POWER6.

Thanks.
-Maynard

Signed-off-by: Maynard Johnson <maynardj@us.ibm.com>



diff -paur linux/arch/powerpc/oprofile/op_model_power4.c linux-p7-oprofile-patch//arch/powerpc/oprofile/op_model_power4.c
--- linux/arch/powerpc/oprofile/op_model_power4.c	2009-05-01 08:20:21.000000000 -0500
+++ linux-p7-oprofile-patch//arch/powerpc/oprofile/op_model_power4.c	2009-05-01 08:20:05.000000000 -0500
@@ -26,6 +26,7 @@
 static unsigned long reset_value[OP_MAX_COUNTER];

 static int oprofile_running;
+static int use_slot_nums;

 /* mmcr values are set in power4_reg_setup, used in power4_cpu_setup */
 static u32 mmcr0_val;
@@ -61,6 +62,12 @@ static int power4_reg_setup(struct op_co
 	else
 		mmcr0_val |= MMCR0_PROBLEM_DISABLE;

+	if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p) ||
+	    __is_processor(PV_970) || __is_processor(PV_970FX) ||
+	    __is_processor(PV_970MP) || __is_processor(PV_970GX) ||
+	    __is_processor(PV_POWER5) || __is_processor(PV_POWER5p))
+		use_slot_nums = 1;
+
 	return 0;
 }

@@ -206,7 +213,7 @@ static unsigned long get_pc(struct pt_re

 	mmcra = mfspr(SPRN_MMCRA);

-	if (mmcra & MMCRA_SAMPLE_ENABLE) {
+	if (use_slot_nums && (mmcra & MMCRA_SAMPLE_ENABLE)) {
 		slot = ((mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT);
 		if (slot > 1)
 			pc += 4 * (slot - 1);

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Fix oprofile sampling of marked events on POWER7
  2009-06-15 13:40 ` Maynard Johnson
@ 2009-06-16  4:12   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 3+ messages in thread
From: Benjamin Herrenschmidt @ 2009-06-16  4:12 UTC (permalink / raw)
  To: Maynard Johnson; +Cc: linuxppc-dev, Anton Blanchard

On Mon, 2009-06-15 at 08:40 -0500, Maynard Johnson wrote:
> Looks like this posting got overlooked, so I'm re-posting the original patch.  Ben, can you please review?  BTW, Anton reported the problem that this patch fixes.

It was merged upstream after 2.6.30 -rc6, see commit e5fc948b...

Cheers,
Ben.

> Thanks.
> -Maynard
> 
> ==============================================================
> 
> 
> Description
> -----------
> Change ppc64 oprofile kernel driver to use the SLOT bits (MMCRA[37:39]only on 
> older processors where those bits are defined.
> 
> Background
> ----------
> The performance monitor unit of the 64-bit POWER processor family has the 
> ability to collect accurate instruction-level samples when profiling on marked 
> events (i.e., "PM_MRK_<event-name>").  In processors prior to POWER6, the MMCRA 
> register contained "slot information" that the oprofile kernel driver used to 
> adjust the value latched in the SIAR at the time of a PMU interrupt.  But as of 
> POWER6, these slot bits in MMCRA are no longer necessary for oprofile to use, 
> since the SIAR itself holds the accurate sampled instruction address.  With 
> POWER6, these MMCRA slot bits were zero'ed out by hardware so oprofile's use of 
> these slot bits was, in effect, a NOP.  But with POWER7, these bits are no 
> longer zero'ed out; however, they serve some other purpose rather than slot 
> information.  Thus, using these bits on POWER7 to adjust the SIAR value results 
> in samples being attributed to the wrong instructions.  The attached patch 
> changes the oprofile kernel driver to ignore these slot bits on all newer 
> processors starting with POWER6.
> 
> Thanks.
> -Maynard
> 
> Signed-off-by: Maynard Johnson <maynardj@us.ibm.com>
> 
> 
> 
> diff -paur linux/arch/powerpc/oprofile/op_model_power4.c linux-p7-oprofile-patch//arch/powerpc/oprofile/op_model_power4.c
> --- linux/arch/powerpc/oprofile/op_model_power4.c	2009-05-01 08:20:21.000000000 -0500
> +++ linux-p7-oprofile-patch//arch/powerpc/oprofile/op_model_power4.c	2009-05-01 08:20:05.000000000 -0500
> @@ -26,6 +26,7 @@
>  static unsigned long reset_value[OP_MAX_COUNTER];
> 
>  static int oprofile_running;
> +static int use_slot_nums;
> 
>  /* mmcr values are set in power4_reg_setup, used in power4_cpu_setup */
>  static u32 mmcr0_val;
> @@ -61,6 +62,12 @@ static int power4_reg_setup(struct op_co
>  	else
>  		mmcr0_val |= MMCR0_PROBLEM_DISABLE;
> 
> +	if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p) ||
> +	    __is_processor(PV_970) || __is_processor(PV_970FX) ||
> +	    __is_processor(PV_970MP) || __is_processor(PV_970GX) ||
> +	    __is_processor(PV_POWER5) || __is_processor(PV_POWER5p))
> +		use_slot_nums = 1;
> +
>  	return 0;
>  }
> 
> @@ -206,7 +213,7 @@ static unsigned long get_pc(struct pt_re
> 
>  	mmcra = mfspr(SPRN_MMCRA);
> 
> -	if (mmcra & MMCRA_SAMPLE_ENABLE) {
> +	if (use_slot_nums && (mmcra & MMCRA_SAMPLE_ENABLE)) {
>  		slot = ((mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT);
>  		if (slot > 1)
>  			pc += 4 * (slot - 1);
> 
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2009-06-16  4:12 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-05-01 17:17 [PATCH] Fix oprofile sampling of marked events on POWER7 Maynard Johnson
2009-06-15 13:40 ` Maynard Johnson
2009-06-16  4:12   ` Benjamin Herrenschmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).