linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Stephane Eranian <eranian@google.com>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: mingo@elte.hu, linux-kernel@vger.kernel.org, paulus@samba.org,
	robert.richter@amd.com, fweisbec@gmail.com
Subject: Re: [RFC][PATCH 08/11] perf, x86: Implement simple LBR support
Date: Wed, 3 Mar 2010 22:57:27 +0100	[thread overview]
Message-ID: <bd4cb8901003031357x9c13c96h3899ab0459a34b0e@mail.gmail.com> (raw)
In-Reply-To: <20100303164306.451251096@chello.nl>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 12897 bytes --]

I don't understand how LBR state is migrated when a per-thread event is movedfrom one CPU to another. It seems LBR is managed per-cpu.
Can you explain this to me?

On Wed, Mar 3, 2010 at 5:39 PM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:> Implement support for Intel LBR stacks that support> FREEZE_LBRS_ON_PMI. We do not (yet?) support the LBR config register> because that is SMT wide and would also put undue restraints on the> PEBS users.>> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>> --->  arch/x86/kernel/cpu/perf_event.c           |   22 ++>  arch/x86/kernel/cpu/perf_event_intel.c     |   13 +>  arch/x86/kernel/cpu/perf_event_intel_lbr.c |  228 +++++++++++++++++++++++++++++>  3 files changed, 263 insertions(+)>> Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c> ===================================================================> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c> +++ linux-2.6/arch/x86/kernel/cpu/perf_event.c> @@ -48,6 +48,12 @@ struct amd_nb {>        struct event_constraint event_constraints[X86_PMC_IDX_MAX];>  };>> +#define MAX_LBR_ENTRIES                16> +> +struct lbr_entry {> +       u64     from, to, flags;> +};> +>  struct cpu_hw_events {>        /*>         * Generic x86 PMC bits> @@ -70,6 +76,14 @@ struct cpu_hw_events {>        u64                     pebs_enabled;>>        /*> +        * Intel LBR bits> +        */> +       int                     lbr_users;> +       int                     lbr_entries;> +       struct lbr_entry        lbr_stack[MAX_LBR_ENTRIES];> +       void                    *lbr_context;> +> +       /*>         * AMD specific bits>         */>        struct amd_nb           *amd_nb;> @@ -154,6 +168,13 @@ struct x86_pmu {>        int             pebs_record_size;>        void            (*drain_pebs)(void);>        struct event_constraint *pebs_constraints;> +> +       /*> +        * Intel LBR> +        */> +       unsigned long   lbr_tos, lbr_from, lbr_to; /* MSR base regs       */> +       int             lbr_nr;                    /* hardware stack size */> +       int             lbr_format;                /* hardware format     */>  };>>  static struct x86_pmu x86_pmu __read_mostly;> @@ -1238,6 +1259,7 @@ undo:>>  #include "perf_event_amd.c">  #include "perf_event_p6.c"> +#include "perf_event_intel_lbr.c">  #include "perf_event_intel_ds.c">  #include "perf_event_intel.c">> Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c> ===================================================================> --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel.c> +++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel.c> @@ -480,6 +480,7 @@ static void intel_pmu_disable_all(void)>                intel_pmu_disable_bts();>>        intel_pmu_pebs_disable_all();> +       intel_pmu_lbr_disable_all();>  }>>  static void intel_pmu_enable_all(void)> @@ -499,6 +500,7 @@ static void intel_pmu_enable_all(void)>        }>>        intel_pmu_pebs_enable_all();> +       intel_pmu_lbr_enable_all();>  }>>  static inline u64 intel_pmu_get_status(void)> @@ -675,6 +677,8 @@ again:>        inc_irq_stat(apic_perf_irqs);>        ack = status;>> +       intel_pmu_lbr_read();> +>        /*>         * PEBS overflow sets bit 62 in the global status register>         */> @@ -847,6 +851,8 @@ static __init int intel_pmu_init(void)>                memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,>                       sizeof(hw_cache_event_ids));>> +               intel_pmu_lbr_init_core();> +>                x86_pmu.event_constraints = intel_core2_event_constraints;>                pr_cont("Core2 events, ");>                break;> @@ -856,13 +862,18 @@ static __init int intel_pmu_init(void)>                memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,>                       sizeof(hw_cache_event_ids));>> +               intel_pmu_lbr_init_nhm();> +>                x86_pmu.event_constraints = intel_nehalem_event_constraints;>                pr_cont("Nehalem/Corei7 events, ");>                break;> +>        case 28: /* Atom */>                memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,>                       sizeof(hw_cache_event_ids));>> +               intel_pmu_lbr_init_atom();> +>                x86_pmu.event_constraints = intel_gen_event_constraints;>                pr_cont("Atom events, ");>                break;> @@ -872,6 +883,8 @@ static __init int intel_pmu_init(void)>                memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,>                       sizeof(hw_cache_event_ids));>> +               intel_pmu_lbr_init_nhm();> +>                x86_pmu.event_constraints = intel_westmere_event_constraints;>                pr_cont("Westmere events, ");>                break;> Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c> ===================================================================> --- /dev/null> +++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel_lbr.c> @@ -0,0 +1,228 @@> +#ifdef CONFIG_CPU_SUP_INTEL> +> +enum {> +       LBR_FORMAT_32           = 0x00,> +       LBR_FORMAT_LIP          = 0x01,> +       LBR_FORMAT_EIP          = 0x02,> +       LBR_FORMAT_EIP_FLAGS    = 0x03,> +};> +> +/*> + * We only support LBR implementations that have FREEZE_LBRS_ON_PMI> + * otherwise it becomes near impossible to get a reliable stack.> + */> +> +#define X86_DEBUGCTL_LBR                               (1 << 0)> +#define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI                (1 << 11)> +> +static void __intel_pmu_lbr_enable(void)> +{> +       u64 debugctl;> +> +       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);> +       debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);> +       wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);> +}> +> +static void __intel_pmu_lbr_disable(void)> +{> +       u64 debugctl;> +> +       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);> +       debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);> +       wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);> +}> +> +static void intel_pmu_lbr_reset_32(void)> +{> +       int i;> +> +       for (i = 0; i < x86_pmu.lbr_nr; i++)> +               wrmsrl(x86_pmu.lbr_from + i, 0);> +}> +> +static void intel_pmu_lbr_reset_64(void)> +{> +       int i;> +> +       for (i = 0; i < x86_pmu.lbr_nr; i++) {> +               wrmsrl(x86_pmu.lbr_from + i, 0);> +               wrmsrl(x86_pmu.lbr_to   + i, 0);> +       }> +}> +> +static void intel_pmu_lbr_reset(void)> +{> +       if (x86_pmu.lbr_format == LBR_FORMAT_32)> +               intel_pmu_lbr_reset_32();> +       else> +               intel_pmu_lbr_reset_64();> +}> +> +static void intel_pmu_lbr_enable(struct perf_event *event)> +{> +       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);> +> +       if (!x86_pmu.lbr_nr)> +               return;> +> +       WARN_ON(cpuc->enabled);> +> +       /*> +        * Reset the LBR stack if this is the first LBR user or> +        * we changed task context so as to avoid data leaks.> +        */> +> +       if (!cpuc->lbr_users ||> +           (event->ctx->task && cpuc->lbr_context != event->ctx)) {> +               intel_pmu_lbr_reset();> +               cpuc->lbr_context = event->ctx;> +       }> +> +       cpuc->lbr_users++;> +}> +> +static void intel_pmu_lbr_disable(struct perf_event *event)> +{> +       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);> +> +       if (!x86_pmu.lbr_nr)> +               return;> +> +       cpuc->lbr_users--;> +> +       BUG_ON(cpuc->lbr_users < 0);> +       WARN_ON(cpuc->enabled);> +}> +> +static void intel_pmu_lbr_enable_all(void)> +{> +       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);> +> +       if (cpuc->lbr_users)> +               __intel_pmu_lbr_enable();> +}> +> +static void intel_pmu_lbr_disable_all(void)> +{> +       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);> +> +       if (cpuc->lbr_users)> +               __intel_pmu_lbr_disable();> +}> +> +static inline u64 intel_pmu_lbr_tos(void)> +{> +       u64 tos;> +> +       rdmsrl(x86_pmu.lbr_tos, tos);> +> +       return tos;> +}> +> +static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)> +{> +       unsigned long mask = x86_pmu.lbr_nr - 1;> +       u64 tos = intel_pmu_lbr_tos();> +       int i;> +> +       for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) {> +               unsigned long lbr_idx = (tos - i) & mask;> +               union {> +                       struct {> +                               u32 from;> +                               u32 to;> +                       };> +                       u64     lbr;> +               } msr_lastbranch;> +> +               rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);> +> +               cpuc->lbr_stack[i].from  = msr_lastbranch.from;> +               cpuc->lbr_stack[i].to    = msr_lastbranch.to;> +               cpuc->lbr_stack[i].flags = 0;> +       }> +       cpuc->lbr_entries = i;> +}> +> +#define LBR_FROM_FLAG_MISPRED  (1ULL << 63)> +> +/*> + * Due to lack of segmentation in Linux the effective address (offset)> + * is the same as the linear address, allowing us to merge the LIP and EIP> + * LBR formats.> + */> +static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)> +{> +       unsigned long mask = x86_pmu.lbr_nr - 1;> +       u64 tos = intel_pmu_lbr_tos();> +       int i;> +> +       for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) {> +               unsigned long lbr_idx = (tos - i) & mask;> +               u64 from, to, flags = 0;> +> +               rdmsrl(x86_pmu.lbr_from + lbr_idx, from);> +               rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);> +> +               if (x86_pmu.lbr_format == LBR_FORMAT_EIP_FLAGS) {> +                       flags = !!(from & LBR_FROM_FLAG_MISPRED);> +                       from = (u64)((((s64)from) << 1) >> 1);> +               }> +> +               cpuc->lbr_stack[i].from  = from;> +               cpuc->lbr_stack[i].to    = to;> +               cpuc->lbr_stack[i].flags = flags;> +       }> +       cpuc->lbr_entries = i;> +}> +> +static void intel_pmu_lbr_read(void)> +{> +       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);> +> +       if (!cpuc->lbr_users)> +               return;> +> +       if (x86_pmu.lbr_format == LBR_FORMAT_32)> +               intel_pmu_lbr_read_32(cpuc);> +       else> +               intel_pmu_lbr_read_64(cpuc);> +}> +> +static int intel_pmu_lbr_format(void)> +{> +       u64 capabilities;> +> +       rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);> +       return capabilities & 0x1f;> +}> +> +static void intel_pmu_lbr_init_core(void)> +{> +       x86_pmu.lbr_format = intel_pmu_lbr_format();> +       x86_pmu.lbr_nr     = 4;> +       x86_pmu.lbr_tos    = 0x01c9;> +       x86_pmu.lbr_from   = 0x40;> +       x86_pmu.lbr_to     = 0x60;> +}> +> +static void intel_pmu_lbr_init_nhm(void)> +{> +       x86_pmu.lbr_format = intel_pmu_lbr_format();> +       x86_pmu.lbr_nr     = 16;> +       x86_pmu.lbr_tos    = 0x01c9;> +       x86_pmu.lbr_from   = 0x680;> +       x86_pmu.lbr_to     = 0x6c0;> +}> +> +static void intel_pmu_lbr_init_atom(void)> +{> +       x86_pmu.lbr_format = intel_pmu_lbr_format();> +       x86_pmu.lbr_nr     = 8;> +       x86_pmu.lbr_tos    = 0x01c9;> +       x86_pmu.lbr_from   = 0x40;> +       x86_pmu.lbr_to     = 0x60;> +}> +> +#endif /* CONFIG_CPU_SUP_INTEL */>> -->>


-- Stephane Eranian  | EMEA Software EngineeringGoogle France | 38 avenue de l'Opéra | 75002 ParisTel : +33 (0) 1 42 68 53 00This email may be confidential or privileged. If you received thiscommunication by mistake, pleasedon't forward it to anyone else, please erase all copies andattachments, and please let me know thatit went to the wrong person. Thanksÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

  parent reply	other threads:[~2010-03-03 21:57 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-03 16:39 [RFC][PATCH 00/11] Another stab at PEBS and LBR support Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 01/11] perf, x86: Remove superfluous arguments to x86_perf_event_set_period() Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 02/11] perf, x86: Remove superfluous arguments to x86_perf_event_update() Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 03/11] perf, x86: Change x86_pmu.{enable,disable} calling convention Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 04/11] perf, x86: Use unlocked bitops Peter Zijlstra
2010-03-03 16:39 ` [RFC][PATCH 05/11] perf: Generic perf_sample_data initialization Peter Zijlstra
2010-03-03 16:49   ` David Miller
2010-03-03 21:14   ` Frederic Weisbecker
2010-03-05  8:44   ` Jean Pihet
2010-03-03 16:39 ` [RFC][PATCH 06/11] perf, x86: PEBS infrastructure Peter Zijlstra
2010-03-03 17:38   ` Robert Richter
2010-03-03 17:42     ` Peter Zijlstra
2010-03-04  8:50       ` Robert Richter
2010-03-03 16:39 ` [RFC][PATCH 07/11] perf: Provide PERF_SAMPLE_REGS Peter Zijlstra
2010-03-03 17:30   ` Stephane Eranian
2010-03-03 17:39     ` Peter Zijlstra
2010-03-03 17:49       ` Stephane Eranian
2010-03-03 17:55         ` David Miller
2010-03-03 18:18           ` Stephane Eranian
2010-03-03 19:18           ` Peter Zijlstra
2010-03-04  2:59           ` Ingo Molnar
2010-03-04 12:58             ` Arnaldo Carvalho de Melo
2010-03-03 22:02   ` Frederic Weisbecker
2010-03-04  8:58     ` Peter Zijlstra
2010-03-04 11:04       ` Ingo Molnar
2010-03-03 16:39 ` [RFC][PATCH 08/11] perf, x86: Implement simple LBR support Peter Zijlstra
2010-03-03 21:52   ` Stephane Eranian
2010-03-04  8:58     ` Peter Zijlstra
2010-03-03 21:57   ` Stephane Eranian [this message]
2010-03-04  8:58     ` Peter Zijlstra
2010-03-04 17:54       ` Stephane Eranian
2010-03-04 18:18         ` Peter Zijlstra
2010-03-04 20:23           ` Peter Zijlstra
2010-03-04 20:57             ` Stephane Eranian
2010-03-03 16:39 ` [RFC][PATCH 09/11] perf, x86: Implement PERF_SAMPLE_BRANCH_STACK Peter Zijlstra
2010-03-03 21:08   ` Frederic Weisbecker
2010-03-03 16:39 ` [RFC][PATCH 10/11] perf, x86: use LBR for PEBS IP+1 fixup Peter Zijlstra
2010-03-03 18:05   ` Masami Hiramatsu
2010-03-03 19:37     ` Peter Zijlstra
2010-03-03 21:11       ` Masami Hiramatsu
2010-03-03 21:50         ` Stephane Eranian
2010-03-04  8:57           ` Peter Zijlstra
2010-03-09  1:41             ` Stephane Eranian
2010-03-03 16:39 ` [RFC][PATCH 11/11] perf, x86: Clean up IA32_PERF_CAPABILITIES usage Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bd4cb8901003031357x9c13c96h3899ab0459a34b0e@mail.gmail.com \
    --to=eranian@google.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=fweisbec@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=paulus@samba.org \
    --cc=robert.richter@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).