* [PATCH] sparc: perf: Add support M7 processor
@ 2015-03-19 20:06 David Ahern
2015-03-20 1:56 ` David Miller
` (15 more replies)
0 siblings, 16 replies; 17+ messages in thread
From: David Ahern @ 2015-03-19 20:06 UTC (permalink / raw)
To: sparclinux
The M7 processor has a different hypervisor group id and different PCR fast
trap values. PIC read/write functions and PCR bit fields are the same as
the T4 so those are reused.
Signed-off-by: David Ahern <david.ahern@oracle.com>
Acked-by: Bob Picco <bob.picco@oracle.com>
---
arch/sparc/include/asm/hypervisor.h | 12 +++++++++++
arch/sparc/kernel/hvapi.c | 1 +
arch/sparc/kernel/hvcalls.S | 16 +++++++++++++++
arch/sparc/kernel/pcr.c | 33 ++++++++++++++++++++++++++++++
arch/sparc/kernel/perf_event.c | 40 +++++++++++++++++++++++++++++++++++++
5 files changed, 102 insertions(+)
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
index 4f6725ff4c33..f5b6537306f0 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -2957,6 +2957,17 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
unsigned long reg_val);
#endif
+
+#define HV_FAST_M7_GET_PERFREG 0x43
+#define HV_FAST_M7_SET_PERFREG 0x44
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_m7_get_perfreg(unsigned long reg_num,
+ unsigned long *reg_val);
+unsigned long sun4v_m7_set_perfreg(unsigned long reg_num,
+ unsigned long reg_val);
+#endif
+
/* Function numbers for HV_CORE_TRAP. */
#define HV_CORE_SET_VER 0x00
#define HV_CORE_PUTCHAR 0x01
@@ -2981,6 +2992,7 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
#define HV_GRP_SDIO 0x0108
#define HV_GRP_SDIO_ERR 0x0109
#define HV_GRP_REBOOT_DATA 0x0110
+#define HV_GRP_M7_PERF 0x0114
#define HV_GRP_NIAG_PERF 0x0200
#define HV_GRP_FIRE_PERF 0x0201
#define HV_GRP_N2_CPU 0x0202
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 5c55145bfbf0..662500fa555f 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -48,6 +48,7 @@ static struct api_info api_table[] = {
{ .group = HV_GRP_VT_CPU, },
{ .group = HV_GRP_T5_CPU, },
{ .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_M7_PERF, },
};
static DEFINE_SPINLOCK(hvapi_lock);
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index caedf8320416..afbaba52d2f1 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@@ -837,3 +837,19 @@ ENTRY(sun4v_t5_set_perfreg)
retl
nop
ENDPROC(sun4v_t5_set_perfreg)
+
+ENTRY(sun4v_m7_get_perfreg)
+ mov %o1, %o4
+ mov HV_FAST_M7_GET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_m7_get_perfreg)
+
+ENTRY(sun4v_m7_set_perfreg)
+ mov HV_FAST_M7_SET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_m7_set_perfreg)
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index 7e967c8018c8..eb978c77c76a 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -217,6 +217,31 @@ static const struct pcr_ops n5_pcr_ops = {
.pcr_nmi_disable = PCR_N4_PICNPT,
};
+static u64 m7_pcr_read(unsigned long reg_num)
+{
+ unsigned long val;
+
+ (void) sun4v_m7_get_perfreg(reg_num, &val);
+
+ return val;
+}
+
+static void m7_pcr_write(unsigned long reg_num, u64 val)
+{
+ (void) sun4v_m7_set_perfreg(reg_num, val);
+}
+
+static const struct pcr_ops m7_pcr_ops = {
+ .read_pcr = m7_pcr_read,
+ .write_pcr = m7_pcr_write,
+ .read_pic = n4_pic_read,
+ .write_pic = n4_pic_write,
+ .nmi_picl_value = n4_picl_value,
+ .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE |
+ PCR_N4_UTRACE | PCR_N4_TOE |
+ (26 << PCR_N4_SL_SHIFT)),
+ .pcr_nmi_disable = PCR_N4_PICNPT,
+};
static unsigned long perf_hsvc_group;
static unsigned long perf_hsvc_major;
@@ -248,6 +273,10 @@ static int __init register_perf_hsvc(void)
perf_hsvc_group = HV_GRP_T5_CPU;
break;
+ case SUN4V_CHIP_SPARC_M7:
+ perf_hsvc_group = HV_GRP_M7_PERF;
+ break;
+
default:
return -ENODEV;
}
@@ -293,6 +322,10 @@ static int __init setup_sun4v_pcr_ops(void)
pcr_ops = &n5_pcr_ops;
break;
+ case SUN4V_CHIP_SPARC_M7:
+ pcr_ops = &m7_pcr_ops;
+ break;
+
default:
ret = -ENODEV;
break;
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index af53c25da2e7..86eebfa3b158 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -792,6 +792,42 @@ static const struct sparc_pmu niagara4_pmu = {
.num_pic_regs = 4,
};
+static void sparc_m7_write_pmc(int idx, u64 val)
+{
+ u64 pcr;
+
+ pcr = pcr_ops->read_pcr(idx);
+ /* ensure ov and ntc are reset */
+ pcr &= ~(PCR_N4_OV | PCR_N4_NTC);
+
+ pcr_ops->write_pic(idx, val & 0xffffffff);
+
+ pcr_ops->write_pcr(idx, pcr);
+}
+
+static const struct sparc_pmu sparc_m7_pmu = {
+ .event_map = niagara4_event_map,
+ .cache_map = &niagara4_cache_map,
+ .max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
+ .read_pmc = sparc_vt_read_pmc,
+ .write_pmc = sparc_m7_write_pmc,
+ .upper_shift = 5,
+ .lower_shift = 5,
+ .event_mask = 0x7ff,
+ .user_bit = PCR_N4_UTRACE,
+ .priv_bit = PCR_N4_STRACE,
+
+ /* We explicitly don't support hypervisor tracing. */
+ .hv_bit = 0,
+
+ .irq_bit = PCR_N4_TOE,
+ .upper_nop = 0,
+ .lower_nop = 0,
+ .flags = 0,
+ .max_hw_events = 4,
+ .num_pcrs = 4,
+ .num_pic_regs = 4,
+};
static const struct sparc_pmu *sparc_pmu __read_mostly;
static u64 event_encoding(u64 event_id, int idx)
@@ -1658,6 +1694,10 @@ static bool __init supported_pmu(void)
sparc_pmu = &niagara4_pmu;
return true;
}
+ if (!strcmp(sparc_pmu_type, "sparc-m7")) {
+ sparc_pmu = &sparc_m7_pmu;
+ return true;
+ }
return false;
}
--
2.3.0
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
@ 2015-03-20 1:56 ` David Miller
2015-03-20 2:55 ` David Ahern
` (14 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2015-03-20 1:56 UTC (permalink / raw)
To: sparclinux
From: David Ahern <david.ahern@oracle.com>
Date: Thu, 19 Mar 2015 16:06:37 -0400
> The M7 processor has a different hypervisor group id and different PCR fast
> trap values. PIC read/write functions and PCR bit fields are the same as
> the T4 so those are reused.
>
> Signed-off-by: David Ahern <david.ahern@oracle.com>
> Acked-by: Bob Picco <bob.picco@oracle.com>
Applied, but two questions:
1) Why didn't you have to deal with the overflow event
latching issues I address in sparc_vt_write_pmc()?
2) How simple is it to hook up a similar set of support
for sparc-m6? It seems like the only PMU type string
we won't match after this.
Thanks.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
2015-03-20 1:56 ` David Miller
@ 2015-03-20 2:55 ` David Ahern
2015-03-20 19:38 ` David Miller
` (13 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: David Ahern @ 2015-03-20 2:55 UTC (permalink / raw)
To: sparclinux
On 3/19/15 7:56 PM, David Miller wrote:
> Applied, but two questions:
>
> 1) Why didn't you have to deal with the overflow event
> latching issues I address in sparc_vt_write_pmc()?
I saw the note. I need to understand why you wrote that. Relevant
sections of the PRM for the T4 and the M7 have the same wording, so I
was surprised to read that. Perhaps a h/w (or h/w revision) quirk?
It was not needed for the M7 -- bare metal or LDOM -- so I opted to go
with the purist approach based on the PRM. As I get time and access to
hardware I will take a look at the T4.
>
> 2) How simple is it to hook up a similar set of support
> for sparc-m6? It seems like the only PMU type string
> we won't match after this.
Ditto. Time and H/W access.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
2015-03-20 1:56 ` David Miller
2015-03-20 2:55 ` David Ahern
@ 2015-03-20 19:38 ` David Miller
2015-03-20 19:41 ` David Ahern
` (12 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2015-03-20 19:38 UTC (permalink / raw)
To: sparclinux
From: David Ahern <david.ahern@oracle.com>
Date: Thu, 19 Mar 2015 20:55:27 -0600
> On 3/19/15 7:56 PM, David Miller wrote:
>> Applied, but two questions:
>>
>> 1) Why didn't you have to deal with the overflow event
>> latching issues I address in sparc_vt_write_pmc()?
>
> I saw the note. I need to understand why you wrote that. Relevant
> sections of the PRM for the T4 and the M7 have the same wording, so I
> was surprised to read that. Perhaps a h/w (or h/w revision) quirk?
>
> It was not needed for the M7 -- bare metal or LDOM -- so I opted to go
> with the purist approach based on the PRM. As I get time and access to
> hardware I will take a look at the T4.
I hate having inconsistencies like this.
My two big stress tests were:
1) "perf record make -s -j128" of a kernel build on my T4-2
2) Same kernel build, but instead of using perf record, I ran
"perf top" in another window while "make -s -j128" was
happening.
Eventually, especially in case #2, events simply stopped being
recorded.
I really want to get to the bottom of this rathern than putting our
hands in our pockets and saying "meh".
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
` (2 preceding siblings ...)
2015-03-20 19:38 ` David Miller
@ 2015-03-20 19:41 ` David Ahern
2015-03-20 19:50 ` David Miller
` (11 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: David Ahern @ 2015-03-20 19:41 UTC (permalink / raw)
To: sparclinux
On 3/20/15 1:38 PM, David Miller wrote:
> My two big stress tests were:
>
> 1) "perf record make -s -j128" of a kernel build on my T4-2
>
> 2) Same kernel build, but instead of using perf record, I ran
> "perf top" in another window while "make -s -j128" was
> happening.
>
> Eventually, especially in case #2, events simply stopped being
> recorded.
I am spending a lot of time on perf right now; will add those 2 cases to
the list.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
` (3 preceding siblings ...)
2015-03-20 19:41 ` David Ahern
@ 2015-03-20 19:50 ` David Miller
2015-04-13 17:53 ` David Ahern
` (10 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2015-03-20 19:50 UTC (permalink / raw)
To: sparclinux
From: David Ahern <david.ahern@oracle.com>
Date: Fri, 20 Mar 2015 13:41:37 -0600
> On 3/20/15 1:38 PM, David Miller wrote:
>> My two big stress tests were:
>>
>> 1) "perf record make -s -j128" of a kernel build on my T4-2
>>
>> 2) Same kernel build, but instead of using perf record, I ran
>> "perf top" in another window while "make -s -j128" was
>> happening.
>>
>> Eventually, especially in case #2, events simply stopped being
>> recorded.
>
> I am spending a lot of time on perf right now; will add those 2 cases
> to the list.
Thanks a lot.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
` (4 preceding siblings ...)
2015-03-20 19:50 ` David Miller
@ 2015-04-13 17:53 ` David Ahern
2015-04-16 19:35 ` David Miller
` (9 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: David Ahern @ 2015-04-13 17:53 UTC (permalink / raw)
To: sparclinux
On 3/20/15 1:38 PM, David Miller wrote:
> From: David Ahern <david.ahern@oracle.com>
> Date: Thu, 19 Mar 2015 20:55:27 -0600
>
>> On 3/19/15 7:56 PM, David Miller wrote:
>>> Applied, but two questions:
>>>
>>> 1) Why didn't you have to deal with the overflow event
>>> latching issues I address in sparc_vt_write_pmc()?
>>
>> I saw the note. I need to understand why you wrote that. Relevant
>> sections of the PRM for the T4 and the M7 have the same wording, so I
>> was surprised to read that. Perhaps a h/w (or h/w revision) quirk?
>>
>> It was not needed for the M7 -- bare metal or LDOM -- so I opted to go
>> with the purist approach based on the PRM. As I get time and access to
>> hardware I will take a look at the T4.
>
> I hate having inconsistencies like this.
>
> My two big stress tests were:
>
> 1) "perf record make -s -j128" of a kernel build on my T4-2
>
> 2) Same kernel build, but instead of using perf record, I ran
> "perf top" in another window while "make -s -j128" was
> happening.
>
> Eventually, especially in case #2, events simply stopped being
> recorded.
T7-4 showed no problems with the patch that was accepted. Through
several 'perf record -- make -j 1024' sessions (make clean in between)
and with a perf-top running in a separate window for a long period of
time, all sessions continued to see samples.
I changed the T4 write_pmc handler to use the m7 variant:
+static void sparc_m7_write_pmc(int idx, u64 val);
static const struct sparc_pmu niagara4_pmu = {
.event_map = niagara4_event_map,
.cache_map = &niagara4_cache_map,
.max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
.read_pmc = sparc_vt_read_pmc,
- .write_pmc = sparc_vt_write_pmc,
+ .write_pmc = sparc_m7_write_pmc,
.upper_shift = 5,
.lower_shift = 5,
.event_mask = 0x7ff,
and a T4-1 showed no problems either (-j 64 for this one).
David
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
` (5 preceding siblings ...)
2015-04-13 17:53 ` David Ahern
@ 2015-04-16 19:35 ` David Miller
2015-04-21 20:15 ` David Miller
` (8 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2015-04-16 19:35 UTC (permalink / raw)
To: sparclinux
From: David Ahern <david.ahern@oracle.com>
Date: Mon, 13 Apr 2015 11:53:03 -0600
> T7-4 showed no problems with the patch that was accepted. Through
> several 'perf record -- make -j 1024' sessions (make clean in between)
> and with a perf-top running in a separate window for a long period of
> time, all sessions continued to see samples.
>
> I changed the T4 write_pmc handler to use the m7 variant:
>
> +static void sparc_m7_write_pmc(int idx, u64 val);
>
> static const struct sparc_pmu niagara4_pmu = {
> .event_map = niagara4_event_map,
> .cache_map = &niagara4_cache_map,
> .max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
> .read_pmc = sparc_vt_read_pmc,
> - .write_pmc = sparc_vt_write_pmc,
> + .write_pmc = sparc_m7_write_pmc,
> .upper_shift = 5,
> .lower_shift = 5,
> .event_mask = 0x7ff,
>
> and a T4-1 showed no problems either (-j 64 for this one).
Fair enough. I'll run the same test and if I can't replicate the
problems I ran into way-back-when, let's just use the same routine
for all of these chips.
Thanks.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
` (6 preceding siblings ...)
2015-04-16 19:35 ` David Miller
@ 2015-04-21 20:15 ` David Miller
2015-04-22 22:51 ` David Miller
` (7 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2015-04-21 20:15 UTC (permalink / raw)
To: sparclinux
From: David Ahern <david.ahern@oracle.com>
Date: Mon, 13 Apr 2015 11:53:03 -0600
> T7-4 showed no problems with the patch that was accepted. Through
> several 'perf record -- make -j 1024' sessions (make clean in between)
> and with a perf-top running in a separate window for a long period of
> time, all sessions continued to see samples.
>
> I changed the T4 write_pmc handler to use the m7 variant:
>
> +static void sparc_m7_write_pmc(int idx, u64 val);
...
> and a T4-1 showed no problems either (-j 64 for this one).
Ok, I'm convinced, and just pushed the following into my tree.
Thanks.
==========
[PATCH] sparc64: Use M7 PMC write on all chips T4 and onward.
They both work equally well, and the M7 implementation is
simpler and cheaper (less register writes).
With help from David Ahern.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
arch/sparc/kernel/perf_event.c | 35 +++--------------------------------
1 file changed, 3 insertions(+), 32 deletions(-)
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 86eebfa..59cf917 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -737,25 +737,9 @@ static void sparc_vt_write_pmc(int idx, u64 val)
{
u64 pcr;
- /* There seems to be an internal latch on the overflow event
- * on SPARC-T4 that prevents it from triggering unless you
- * update the PIC exactly as we do here. The requirement
- * seems to be that you have to turn off event counting in the
- * PCR around the PIC update.
- *
- * For example, after the following sequence:
- *
- * 1) set PIC to -1
- * 2) enable event counting and overflow reporting in PCR
- * 3) overflow triggers, softint 15 handler invoked
- * 4) clear OV bit in PCR
- * 5) write PIC to -1
- *
- * a subsequent overflow event will not trigger. This
- * sequence works on SPARC-T3 and previous chips.
- */
pcr = pcr_ops->read_pcr(idx);
- pcr_ops->write_pcr(idx, PCR_N4_PICNPT);
+ /* ensure ov and ntc are reset */
+ pcr &= ~(PCR_N4_OV | PCR_N4_NTC);
pcr_ops->write_pic(idx, val & 0xffffffff);
@@ -792,25 +776,12 @@ static const struct sparc_pmu niagara4_pmu = {
.num_pic_regs = 4,
};
-static void sparc_m7_write_pmc(int idx, u64 val)
-{
- u64 pcr;
-
- pcr = pcr_ops->read_pcr(idx);
- /* ensure ov and ntc are reset */
- pcr &= ~(PCR_N4_OV | PCR_N4_NTC);
-
- pcr_ops->write_pic(idx, val & 0xffffffff);
-
- pcr_ops->write_pcr(idx, pcr);
-}
-
static const struct sparc_pmu sparc_m7_pmu = {
.event_map = niagara4_event_map,
.cache_map = &niagara4_cache_map,
.max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
.read_pmc = sparc_vt_read_pmc,
- .write_pmc = sparc_m7_write_pmc,
+ .write_pmc = sparc_vt_write_pmc,
.upper_shift = 5,
.lower_shift = 5,
.event_mask = 0x7ff,
--
2.1.2.532.g19b5d50
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
` (7 preceding siblings ...)
2015-04-21 20:15 ` David Miller
@ 2015-04-22 22:51 ` David Miller
2015-04-22 23:10 ` Khalid Aziz
` (6 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2015-04-22 22:51 UTC (permalink / raw)
To: sparclinux
David, are there any major M7 patches remaining for basic
functionality?
Khalid's TTE patch is the only one I can think of and I'm surprised
that hasn't been resubmitted yet, as I'd really like to apply it.
Thanks.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
` (8 preceding siblings ...)
2015-04-22 22:51 ` David Miller
@ 2015-04-22 23:10 ` Khalid Aziz
2015-04-22 23:13 ` David Miller
` (5 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: Khalid Aziz @ 2015-04-22 23:10 UTC (permalink / raw)
To: sparclinux
On 04/22/2015 04:51 PM, David Miller wrote:
> Khalid's TTE patch is the only one I can think of and I'm surprised
> that hasn't been resubmitted yet, as I'd really like to apply it.
I need to verify this patch on 4.0 and then I will resubmit it. Just
didn't get a chance to do that. I should be able to do that early next week.
--
Khalid
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
` (9 preceding siblings ...)
2015-04-22 23:10 ` Khalid Aziz
@ 2015-04-22 23:13 ` David Miller
2015-04-22 23:19 ` David Ahern
` (4 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2015-04-22 23:13 UTC (permalink / raw)
To: sparclinux
From: Khalid Aziz <khalid.aziz@oracle.com>
Date: Wed, 22 Apr 2015 17:10:01 -0600
> On 04/22/2015 04:51 PM, David Miller wrote:
>> Khalid's TTE patch is the only one I can think of and I'm surprised
>> that hasn't been resubmitted yet, as I'd really like to apply it.
>
> I need to verify this patch on 4.0 and then I will resubmit it. Just
> didn't get a chance to do that. I should be able to do that early next
> week.
Ok, thanks Khalid.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
` (10 preceding siblings ...)
2015-04-22 23:13 ` David Miller
@ 2015-04-22 23:19 ` David Ahern
2015-04-22 23:25 ` David Miller
` (3 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: David Ahern @ 2015-04-22 23:19 UTC (permalink / raw)
To: sparclinux
On 4/22/15 4:51 PM, David Miller wrote:
>
> David, are there any major M7 patches remaining for basic
> functionality?
>
I am not aware of anything for basic functionality.
Only thing left in my queue is optimized versions of the ffs / fls
families, but that patch is v9b specific, not M7.
I'd like to put some attention on precise mode for perf counters; it
just has not bubbled to the top.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
` (11 preceding siblings ...)
2015-04-22 23:19 ` David Ahern
@ 2015-04-22 23:25 ` David Miller
2015-04-22 23:30 ` Khalid Aziz
` (2 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2015-04-22 23:25 UTC (permalink / raw)
To: sparclinux
From: David Ahern <david.ahern@oracle.com>
Date: Wed, 22 Apr 2015 17:19:23 -0600
> Only thing left in my queue is optimized versions of the ffs / fls
> families, but that patch is v9b specific, not M7.
Something faster than the popc thing in arch/sparc/lib/ffs.S?
Are you thinking of using "lzcnt"? I wasn't impressed with the
performance of that instruction last time I played around with it.
> I'd like to put some attention on precise mode for perf counters; it
> just has not bubbled to the top.
That plus the backtrace deadlock thing we're discussing in another
thread, that bug is irritating because your pagefault_disable() change
should "just work".
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
` (12 preceding siblings ...)
2015-04-22 23:25 ` David Miller
@ 2015-04-22 23:30 ` Khalid Aziz
2015-04-23 0:29 ` David Ahern
2015-04-23 1:39 ` David Miller
15 siblings, 0 replies; 17+ messages in thread
From: Khalid Aziz @ 2015-04-22 23:30 UTC (permalink / raw)
To: sparclinux
On 04/22/2015 04:51 PM, David Miller wrote:
>
> David, are there any major M7 patches remaining for basic
> functionality?
>
There is one more patch I am working on that would be part of basic
functionality. M7 introduces the register MCDPER which determines
whether a precise or a disrupting exception will be delivered to a task
when it encounters MCD error. This should be a per task value and hence
will need to become part of task context (thread info flags maybe). I am
working on code to save and restore this register on context switches,
and propagate it on task dup.
--
Khalid
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
` (13 preceding siblings ...)
2015-04-22 23:30 ` Khalid Aziz
@ 2015-04-23 0:29 ` David Ahern
2015-04-23 1:39 ` David Miller
15 siblings, 0 replies; 17+ messages in thread
From: David Ahern @ 2015-04-23 0:29 UTC (permalink / raw)
To: sparclinux
[-- Attachment #1: Type: text/plain, Size: 1352 bytes --]
On 4/22/15 5:25 PM, David Miller wrote:
> From: David Ahern <david.ahern@oracle.com>
> Date: Wed, 22 Apr 2015 17:19:23 -0600
>
>> Only thing left in my queue is optimized versions of the ffs / fls
>> families, but that patch is v9b specific, not M7.
>
> Something faster than the popc thing in arch/sparc/lib/ffs.S?
hmmm... i saw that, but wasn't clear 1) how it got inserted and 2) the
overhead of a function call versus inline. Anyways, what I have is the
same 3 instructions as an inline. But really the __ffs was just along
for the ride; the focus was on __fls.
>
> Are you thinking of using "lzcnt"? I wasn't impressed with the
> performance of that instruction last time I played around with it.
A comparison of what I hacked together is attached (columns too wide for
inline). Data is from a T4-2. It shows lzcnt to be better for __fls, fls
and fl64.
>
>> I'd like to put some attention on precise mode for perf counters; it
>> just has not bubbled to the top.
>
> That plus the backtrace deadlock thing we're discussing in another
> thread, that bug is irritating because your pagefault_disable() change
> should "just work".
>
oh, yes. forgot about that one. I spent too many hours trying to figure
out why processes get killed with a sigbus. I added an option to perf
tool to skip userspace chains until I can get back to it.
[-- Attachment #2: fls-cmp.txt --]
[-- Type: text/plain, Size: 739 bytes --]
- "slow" means version from asm-generic.
- Times are in nsec.
- 'bit' column shown to ensure correct answer between current and lzcnt
- average of 10 back-to-back calls
| __fls | fls | fls64
word | lzcnt slow | lzcnt slow | lzcnt slow
| bit dt bit dt | bit dt bit dt | bit dt bit dt
0 | 0 15 0 67 | 0 19 0 21 | 0 14 0 14
1 | 0 13 0 50 | 1 32 1 61 | 1 20 1 51
80000000 | 31 13 31 39 | 32 30 32 49 | 64 25 64 37
8000000000000000 | 63 13 63 34 | 0 17 0 16 | 0 12 0 14
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] sparc: perf: Add support M7 processor
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
` (14 preceding siblings ...)
2015-04-23 0:29 ` David Ahern
@ 2015-04-23 1:39 ` David Miller
15 siblings, 0 replies; 17+ messages in thread
From: David Miller @ 2015-04-23 1:39 UTC (permalink / raw)
To: sparclinux
From: David Ahern <david.ahern@oracle.com>
Date: Wed, 22 Apr 2015 18:29:12 -0600
> On 4/22/15 5:25 PM, David Miller wrote:
>> From: David Ahern <david.ahern@oracle.com>
>> Date: Wed, 22 Apr 2015 17:19:23 -0600
>>
>>> Only thing left in my queue is optimized versions of the ffs / fls
>>> families, but that patch is v9b specific, not M7.
>>
>> Something faster than the popc thing in arch/sparc/lib/ffs.S?
>
> hmmm... i saw that, but wasn't clear 1) how it got inserted and 2) the
> overhead of a function call versus inline. Anyways, what I have is the
> same 3 instructions as an inline. But really the __ffs was just along
> for the ride; the focus was on __fls.
Because we must support all processors in a single kernel image, the
called assembler routine that gets patched is the best tradeoff in my
opinion.
I strongly recommend we do the same thing for any optimizations done
to fls*().
>> Are you thinking of using "lzcnt"? I wasn't impressed with the
>> performance of that instruction last time I played around with it.
>
> A comparison of what I hacked together is attached (columns too wide
> for inline). Data is from a T4-2. It shows lzcnt to be better for
> __fls, fls and fl64.
Cool, is it faster when used in your tests for ffs() too?
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2015-04-23 1:39 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-03-19 20:06 [PATCH] sparc: perf: Add support M7 processor David Ahern
2015-03-20 1:56 ` David Miller
2015-03-20 2:55 ` David Ahern
2015-03-20 19:38 ` David Miller
2015-03-20 19:41 ` David Ahern
2015-03-20 19:50 ` David Miller
2015-04-13 17:53 ` David Ahern
2015-04-16 19:35 ` David Miller
2015-04-21 20:15 ` David Miller
2015-04-22 22:51 ` David Miller
2015-04-22 23:10 ` Khalid Aziz
2015-04-22 23:13 ` David Miller
2015-04-22 23:19 ` David Ahern
2015-04-22 23:25 ` David Miller
2015-04-22 23:30 ` Khalid Aziz
2015-04-23 0:29 ` David Ahern
2015-04-23 1:39 ` David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).