linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ian Rogers <irogers@google.com>
To: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: acme@kernel.org, peterz@infradead.org, rrichter@amd.com,
	mingo@redhat.com, mark.rutland@arm.com, jolsa@kernel.org,
	namhyung@kernel.org, tglx@linutronix.de, bp@alien8.de,
	james.clark@arm.com, leo.yan@linaro.org,
	kan.liang@linux.intel.com, ak@linux.intel.com,
	eranian@google.com, like.xu.linux@gmail.com, x86@kernel.org,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	sandipan.das@amd.com, ananth.narayan@amd.com,
	kim.phillips@amd.com, santosh.shukla@amd.com
Subject: Re: [PATCH v3 5/5] perf script ibs: Support new IBS bits in raw trace dump
Date: Thu, 19 May 2022 16:50:58 -0700	[thread overview]
Message-ID: <CAP-5=fWP42=AW9zr5ULM3_XL4pOhtTcLSr9kjvbRiH_EivO9JA@mail.gmail.com> (raw)
In-Reply-To: <20220519054355.477-6-ravi.bangoria@amd.com>

On Wed, May 18, 2022 at 10:45 PM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>
> Interpret Additional set of IBS register bits while doing
> perf report/script raw dump.
>
> IBS op pmu ex:
>   $ sudo ./perf record -c 130 -a -e ibs_op/l3missonly=1/ --raw-samples
>   $ sudo ./perf report -D
>   ...
>   ibs_op_ctl:     0000004500070008 MaxCnt       128 L3MissOnly 1 En 1
>         Val 1 CntCtl 0=cycles CurCnt        69
>   ibs_op_data:    0000000000710002 CompToRetCtr     2 TagToRetCtr   113
>         BrnRet 0  RipInvalid 0 BrnFuse 0 Microcode 0
>   ibs_op_data2:   0000000000000002 CacheHitSt 0=M-state RmtNode 0
>         DataSrc 2=A peer cache in a near CCX
>   ibs_op_data3:   000000681d1700a1 LdOp 1 StOp 0 DcL1TlbMiss 0
>         DcL2TlbMiss 0 DcL1TlbHit2M 0 DcL1TlbHit1G 1 DcL2TlbHit2M 0
>         DcMiss 1 DcMisAcc 0 DcWcMemAcc 0 DcUcMemAcc 0 DcLockedOp 0
>         DcMissNoMabAlloc 1 DcLinAddrValid 1 DcPhyAddrValid 1
>         DcL2TlbHit1G 0 L2Miss 1 SwPf 0 OpMemWidth  8 bytes
>         OpDcMissOpenMemReqs  7 DcMissLat   104 TlbRefillLat     0
>
> IBS Fetch pmu ex:
>   $ sudo ./perf record -c 130 -a -e ibs_fetch/l3missonly=1/ --raw-samples
>   $ sudo ./perf report -D
>   ...
>   ibs_fetch_ctl:  3c1f00c700080008 MaxCnt     128 Cnt     128 Lat   199
>         En 1 Val 1 Comp 1 IcMiss 1 PhyAddrValid        1 L1TlbPgSz 4KB
>         L1TlbMiss 0 L2TlbMiss 0 RandEn 0 L2Miss 1 L3MissOnly 1
>         FetchOcMiss 1 FetchL3Miss 1
>
> With the DataSrc extensions, the source of data can be decoded among:
>  - Local L3 or other L1/L2 in CCX.
>  - A peer cache in a near CCX.
>  - Data returned from DRAM.
>  - A peer cache in a far CCX.
>  - DRAM address map with "long latency" bit set.
>  - Data returned from MMIO/Config/PCI/APIC.
>  - Extension Memory (S-Link, GenZ, etc - identified by the CS target
>     and/or address map at DF's choice).
>  - Peer Agent Memory.
>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>

Looks good, but I think part of the previous patch should also be in this one.

Thanks,
Ian
> ---
>  tools/perf/util/amd-sample-raw.c | 64 +++++++++++++++++++++++++++++---
>  1 file changed, 58 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c
> index 3b623ea6ee7e..63303f583bc0 100644
> --- a/tools/perf/util/amd-sample-raw.c
> +++ b/tools/perf/util/amd-sample-raw.c
> @@ -18,6 +18,7 @@
>  #include "pmu-events/pmu-events.h"
>
>  static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
> +static bool zen4_ibs_extensions;
>
>  static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
>  {
> @@ -39,6 +40,7 @@ static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
>         };
>         const char *ic_miss_str = NULL;
>         const char *l1tlb_pgsz_str = NULL;
> +       char l3_miss_str[sizeof(" L3MissOnly _ FetchOcMiss _ FetchL3Miss _")] = "";
>
>         if (cpu_family == 0x19 && cpu_model < 0x10) {
>                 /*
> @@ -53,12 +55,19 @@ static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
>                 ic_miss_str = ic_miss_strs[reg.ic_miss];
>         }
>
> +       if (zen4_ibs_extensions) {
> +               snprintf(l3_miss_str, sizeof(l3_miss_str),
> +                        " L3MissOnly %d FetchOcMiss %d FetchL3Miss %d",
> +                        reg.l3_miss_only, reg.fetch_oc_miss, reg.fetch_l3_miss);
> +       }
> +
>         printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s "
> -              "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s\n",
> +               "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s%s\n",
>                 reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat,
>                 reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "",
>                 reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss,
> -               reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "");
> +               reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "",
> +               l3_miss_str);
>  }
>
>  static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg)
> @@ -68,9 +77,15 @@ static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg)
>
>  static void pr_ibs_op_ctl(union ibs_op_ctl reg)
>  {
> -       printf("ibs_op_ctl:\t%016llx MaxCnt %9d En %d Val %d CntCtl %d=%s CurCnt %9d\n",
> -              reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, reg.op_en, reg.op_val,
> -              reg.cnt_ctl, reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt);
> +       char l3_miss_only[sizeof(" L3MissOnly _")] = "";
> +
> +       if (zen4_ibs_extensions)
> +               snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only);
> +
> +       printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n",
> +               reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only,
> +               reg.op_en, reg.op_val, reg.cnt_ctl,
> +               reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt);
>  }
>
>  static void pr_ibs_op_data(union ibs_op_data reg)
> @@ -84,7 +99,34 @@ static void pr_ibs_op_data(union ibs_op_data reg)
>                 reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode);
>  }
>
> -static void pr_ibs_op_data2(union ibs_op_data2 reg)
> +static void pr_ibs_op_data2_extended(union ibs_op_data2 reg)
> +{
> +       static const char * const data_src_str[] = {
> +               "",
> +               " DataSrc 1=Local L3 or other L1/L2 in CCX",
> +               " DataSrc 2=A peer cache in a near CCX",
> +               " DataSrc 3=Data returned from DRAM",
> +               " DataSrc 4=(reserved)",
> +               " DataSrc 5=A peer cache in a far CCX",
> +               " DataSrc 6=DRAM address map with \"long latency\" bit set",
> +               " DataSrc 7=Data returned from MMIO/Config/PCI/APIC",
> +               " DataSrc 8=Extension Memory (S-Link, GenZ, etc)",
> +               " DataSrc 9=(reserved)",
> +               " DataSrc 10=(reserved)",
> +               " DataSrc 11=(reserved)",
> +               " DataSrc 12=Peer Agent Memory",
> +               /* 13 to 31 are reserved. Avoid printing them. */
> +       };
> +       int data_src = (reg.data_src_hi << 3) | reg.data_src_lo;
> +
> +       printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
> +               (data_src == 1 || data_src == 2 || data_src == 5) ?
> +                       (reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state ") : "",
> +               reg.rmt_node,
> +               data_src < (int)ARRAY_SIZE(data_src_str) ? data_src_str[data_src] : "");
> +}
> +
> +static void pr_ibs_op_data2_default(union ibs_op_data2 reg)
>  {
>         static const char * const data_src_str[] = {
>                 "",
> @@ -103,6 +145,13 @@ static void pr_ibs_op_data2(union ibs_op_data2 reg)
>                reg.rmt_node, data_src_str[reg.data_src_lo]);
>  }
>
> +static void pr_ibs_op_data2(union ibs_op_data2 reg)
> +{
> +       if (zen4_ibs_extensions)
> +               return pr_ibs_op_data2_extended(reg);
> +       pr_ibs_op_data2_default(reg);
> +}
> +
>  static void pr_ibs_op_data3(union ibs_op_data3 reg)
>  {
>         char l2_miss_str[sizeof(" L2Miss _")] = "";
> @@ -279,6 +328,9 @@ bool evlist__has_amd_ibs(struct evlist *evlist)
>                 pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */;
>         }
>
> +       if (perf_env__find_pmu_cap(env, 0, "ibs_op", "zen4_ibs_extensions"))
> +               zen4_ibs_extensions = 1;
> +
>         if (ibs_fetch_type || ibs_op_type) {
>                 if (!cpu_family)
>                         parse_cpuid(env);
> --
> 2.27.0
>

      reply	other threads:[~2022-05-19 23:51 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-19  5:43 [PATCH v3 0/5] perf/amd: Zen4 IBS extensions support (tool changes) Ravi Bangoria
2022-05-19  5:43 ` [PATCH v3 1/5] perf record ibs: Warn about sampling period skew Ravi Bangoria
2022-05-19 22:14   ` Ian Rogers
2022-05-19  5:43 ` [PATCH v3 2/5] perf header: Parse non-cpu pmu capabilities Ravi Bangoria
2022-05-19 22:27   ` Ian Rogers
2022-05-20  3:49     ` Ravi Bangoria
2022-05-20  4:31       ` Ian Rogers
2022-05-20  5:20         ` Ravi Bangoria
2022-05-19  5:43 ` [PATCH v3 3/5] perf/x86/ibs: Add new IBS register bits into header Ravi Bangoria
2022-05-19 23:47   ` Ian Rogers
2022-05-19  5:43 ` [PATCH v3 4/5] perf tool ibs: Sync amd ibs header file Ravi Bangoria
2022-05-19 23:48   ` Ian Rogers
2022-05-20  3:55     ` Ravi Bangoria
2022-05-20  4:32       ` Ian Rogers
2022-05-19  5:43 ` [PATCH v3 5/5] perf script ibs: Support new IBS bits in raw trace dump Ravi Bangoria
2022-05-19 23:50   ` Ian Rogers [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAP-5=fWP42=AW9zr5ULM3_XL4pOhtTcLSr9kjvbRiH_EivO9JA@mail.gmail.com' \
    --to=irogers@google.com \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=ananth.narayan@amd.com \
    --cc=bp@alien8.de \
    --cc=eranian@google.com \
    --cc=james.clark@arm.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=kim.phillips@amd.com \
    --cc=leo.yan@linaro.org \
    --cc=like.xu.linux@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=ravi.bangoria@amd.com \
    --cc=rrichter@amd.com \
    --cc=sandipan.das@amd.com \
    --cc=santosh.shukla@amd.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).