linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ravi Bangoria <ravi.bangoria@amd.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>,
	Ingo Molnar <mingo@redhat.com>,
	Stephane Eranian <eranian@google.com>,
	Ian Rogers <irogers@google.com>,
	"Kan Liang" <kan.liang@linux.intel.com>,
	James Clark <james.clark@linaro.org>, "Leo Yan" <leo.yan@arm.com>,
	Joe Mario <jmario@redhat.com>, <linux-kernel@vger.kernel.org>,
	<linux-perf-users@vger.kernel.org>,
	"Santosh Shukla" <santosh.shukla@amd.com>,
	Ananth Narayan <ananth.narayan@amd.com>,
	Sandipan Das <sandipan.das@amd.com>
Subject: [PATCH 1/4] perf/amd/ibs: Add load/store SW filters to IBS OP PMU
Date: Thu, 29 May 2025 12:34:53 +0000	[thread overview]
Message-ID: <20250529123456.1801-2-ravi.bangoria@amd.com> (raw)
In-Reply-To: <20250529123456.1801-1-ravi.bangoria@amd.com>

Since current IBS OP PMU does not have the capability to tag only load/
stores instructions, tools like perf mem/c2c ends up recording lots of
unwanted samples. So, introduce a load/store software filter in the IBS
OP PMU:

  ibs_op/swfilt=1,ldop=1/         --> Only load samples
  ibs_op/swfilt=1,stop=1/         --> Only store samples
  ibs_op/swfilt=1,ldop=1,stop=1/  --> Load OR store samples

Other HW or SW filters in combination with this ldst filter are logical
AND. For ex:

  ibs_op/swfilt=1,ldop=1,stop=1/u is
  "privilege == userspace && (ldop == 1 || stop == 1)"

  ibs_op/swfilt=1,ldop=1,stop=1,l3missonly=1/ is
  "l3missonly == 1 && (ldop == 1 || stop == 1)"

An alternate approach is mem_op BPF filter:

  perf record --filter "mem_op == load || mem_op == store" ...

However, there are few issues with it:
o BPF filter is called after preparing entire perf sample. If the sample
  does not satisfy the filtering criteria, all the efforts of preparing
  perf sample gets wasted.
o BPF filter requires root privilege.

Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
---
 arch/x86/events/amd/ibs.c  | 73 +++++++++++++++++++++++++++++++++++---
 include/linux/perf_event.h | 14 ++++++++
 2 files changed, 83 insertions(+), 4 deletions(-)

diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 0252b7ea8bca..d18ce6464b27 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -31,6 +31,10 @@ static u32 ibs_caps;
 /* attr.config2 */
 #define IBS_SW_FILTER_MASK	1
 
+/* attr.config1 */
+#define IBS_LDOP_FILTER_MASK	(1UL << 12)
+#define IBS_STOP_FILTER_MASK	(1UL << 13)
+
 /*
  * IBS states:
  *
@@ -308,6 +312,11 @@ static int perf_ibs_init(struct perf_event *event)
 	     event->attr.exclude_hv))
 		return -EINVAL;
 
+	if (!(event->attr.config2 & IBS_SW_FILTER_MASK) &&
+	    (event->attr.config1 & (IBS_LDOP_FILTER_MASK |
+				    IBS_STOP_FILTER_MASK)))
+		return -EINVAL;
+
 	ret = validate_group(event);
 	if (ret)
 		return ret;
@@ -624,6 +633,10 @@ static struct attribute_group empty_caps_group = {
 PMU_FORMAT_ATTR(rand_en,	"config:57");
 PMU_FORMAT_ATTR(cnt_ctl,	"config:19");
 PMU_FORMAT_ATTR(swfilt,		"config2:0");
+PMU_FORMAT_ATTR(ldop,		"config1:12"); /* IBS_LDOP_FILTER_MASK */
+PMU_FORMAT_ATTR(stop,		"config1:13"); /* IBS_STOP_FILTER_MASK */
+PMU_CAP_ATTR(swfilt_ldst,	"1");
+
 PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59");
 PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16");
 PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_format, "config1:0-11");
@@ -724,6 +737,8 @@ cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i)
 
 static struct attribute *op_attrs[] = {
 	&format_attr_swfilt.attr,
+	&format_attr_ldop.attr,
+	&format_attr_stop.attr,
 	NULL,
 };
 
@@ -737,11 +752,21 @@ static struct attribute *op_l3missonly_attrs[] = {
 	NULL,
 };
 
+static struct attribute *op_attrs_caps[] = {
+	&cap_attr_swfilt_ldst.attr,
+	NULL,
+};
+
 static struct attribute_group group_op_formats = {
 	.name = "format",
 	.attrs = op_attrs,
 };
 
+static struct attribute_group group_op_caps = {
+	.name = "caps",
+	.attrs = op_attrs_caps,
+};
+
 static struct attribute *ibs_op_ldlat_format_attrs[] = {
 	&ibs_op_ldlat_format.attr.attr,
 	NULL,
@@ -761,7 +786,7 @@ static struct attribute_group group_op_l3missonly = {
 
 static const struct attribute_group *op_attr_groups[] = {
 	&group_op_formats,
-	&empty_caps_group,
+	&group_op_caps,
 	NULL,
 };
 
@@ -1148,13 +1173,23 @@ static bool perf_ibs_is_mem_sample_type(struct perf_ibs *perf_ibs,
 			      PERF_SAMPLE_PHYS_ADDR);
 }
 
+static bool perf_ibs_ld_st_filter_event(struct perf_ibs *perf_ibs,
+					struct perf_event *event)
+{
+	return perf_ibs == &perf_ibs_op &&
+	       (event->attr.config2 & IBS_SW_FILTER_MASK) &&
+	       (event->attr.config1 & (IBS_LDOP_FILTER_MASK |
+				       IBS_STOP_FILTER_MASK));
+}
+
 static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs,
 				   struct perf_event *event,
 				   int check_rip)
 {
 	if (event->attr.sample_type & PERF_SAMPLE_RAW ||
 	    perf_ibs_is_mem_sample_type(perf_ibs, event) ||
-	    perf_ibs_ldlat_event(perf_ibs, event))
+	    perf_ibs_ldlat_event(perf_ibs, event) ||
+	    perf_ibs_ld_st_filter_event(perf_ibs, event))
 		return perf_ibs->offset_max;
 	else if (check_rip)
 		return 3;
@@ -1189,6 +1224,32 @@ static bool perf_ibs_is_kernel_br_target(struct perf_event *event,
 			op_data.op_brn_ret && kernel_ip(br_target));
 }
 
+/*
+ * ibs_op/swfilt=1,ldop=1/         --> Only load samples
+ * ibs_op/swfilt=1,stop=1/         --> Only store samples
+ * ibs_op/swfilt=1,ldop=1,stop=1/  --> Load OR store samples
+ */
+static bool perf_ibs_ld_st_filter(struct perf_event *event,
+				  struct perf_ibs_data *ibs_data)
+{
+	union ibs_op_data3 op_data3;
+
+	if (!(event->attr.config1 & (IBS_LDOP_FILTER_MASK |
+				     IBS_STOP_FILTER_MASK))) {
+		return false;
+	}
+
+	op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)];
+
+	if ((event->attr.config1 & IBS_LDOP_FILTER_MASK) && op_data3.ld_op)
+		return false;
+
+	if ((event->attr.config1 & IBS_STOP_FILTER_MASK) && op_data3.st_op)
+		return false;
+
+	return true;
+}
+
 static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event *event,
 				    struct pt_regs *regs, struct perf_ibs_data *ibs_data,
 				    int br_target_idx)
@@ -1196,9 +1257,12 @@ static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event
 	if (perf_exclude_event(event, regs))
 		return true;
 
-	if (perf_ibs != &perf_ibs_op || !event->attr.exclude_kernel)
+	if (perf_ibs != &perf_ibs_op)
 		return false;
 
+	if (!event->attr.exclude_kernel)
+		goto ldst_filter;
+
 	if (perf_ibs_is_kernel_data_addr(event, ibs_data))
 		return true;
 
@@ -1206,7 +1270,8 @@ static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event
 	    perf_ibs_is_kernel_br_target(event, ibs_data, br_target_idx))
 		return true;
 
-	return false;
+ldst_filter:
+	return perf_ibs_ld_st_filter(event, ibs_data);
 }
 
 static void perf_ibs_phyaddr_clear(struct perf_ibs *perf_ibs,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0069ba6866a4..dedb92d5cd61 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1998,6 +1998,20 @@ _name##_show(struct device *dev,					\
 									\
 static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
 
+#define PMU_CAP_ATTR_SHOW(_name, _cap)					\
+static ssize_t								\
+_name##_show(struct device *dev, struct device_attribute *attr,		\
+	     char *page)						\
+{									\
+	BUILD_BUG_ON(sizeof(_cap) >= PAGE_SIZE);			\
+	return sprintf(page, _cap "\n");				\
+}
+
+#define PMU_CAP_ATTR(_name, _cap)					\
+	PMU_CAP_ATTR_SHOW(_name, _cap)					\
+									\
+static struct device_attribute cap_attr_##_name = __ATTR_RO(_name)
+
 /* Performance counter hotplug functions */
 #ifdef CONFIG_PERF_EVENTS
 int perf_event_init_cpu(unsigned int cpu);
-- 
2.43.0


  reply	other threads:[~2025-05-29 12:35 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-29 12:34 [PATCH 0/4] perf/amd/ibs: Introduce load/store SW filter Ravi Bangoria
2025-05-29 12:34 ` Ravi Bangoria [this message]
2025-05-31  7:53   ` [PATCH 1/4] perf/amd/ibs: Add load/store SW filters to IBS OP PMU Ingo Molnar
2025-06-03  6:52     ` Namhyung Kim
2025-06-10  5:12       ` Ravi Bangoria
2025-05-29 12:34 ` [PATCH 2/4] perf mem/c2c amd: Wire IBS OP PMU load/store SW filter Ravi Bangoria
2025-05-29 12:34 ` [PATCH 3/4] perf test amd: Add IBS load/store swfilt tests Ravi Bangoria
2025-05-29 12:34 ` [PATCH 4/4] perf doc amd: Update perf-amd-ibs man page Ravi Bangoria

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250529123456.1801-2-ravi.bangoria@amd.com \
    --to=ravi.bangoria@amd.com \
    --cc=acme@kernel.org \
    --cc=ananth.narayan@amd.com \
    --cc=eranian@google.com \
    --cc=irogers@google.com \
    --cc=james.clark@linaro.org \
    --cc=jmario@redhat.com \
    --cc=kan.liang@linux.intel.com \
    --cc=leo.yan@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=sandipan.das@amd.com \
    --cc=santosh.shukla@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).