From: Athira Rajeev <atrajeev@linux.ibm.com>
To: linuxppc-dev@lists.ozlabs.org, maddy@linux.ibm.com
Cc: linux-perf-users@vger.kernel.org, atrajeev@linux.ibm.com,
hbathini@linux.vnet.ibm.com, tejas05@linux.ibm.com,
venkat88@linux.ibm.com, tshah@linux.ibm.com
Subject: [PATCH 1/5] powerpc/htm: Add interface to expose HTM trace data via perf
Date: Wed, 1 Jul 2026 14:08:02 +0530 [thread overview]
Message-ID: <20260701083806.79358-2-atrajeev@linux.ibm.com> (raw)
In-Reply-To: <20260701083806.79358-1-atrajeev@linux.ibm.com>
H_HTM (Hardware Trace Macro) hypervisor call is an HCALL to export data
from Hardware Trace Macro (HTM) function. Add support for setup,
configuration and control of HTM function via PMU.
H_HTM is used as an interface for executing Hardware Trace Macro (HTM)
functions, including setup, configuration, control and dumping of the
HTM data. HTM operations can be controlled using the H_HTM hcall. The
hcall can be invoked for any core/chip of the system from within a
partition itself.
To use this, expose event as part of "htm" PMU. The event code or config
is 28 bit value, where user can specify below required fields:
event: "config:0-27"
htm_type: "config:0-3"
nodeindex: "config:4-11"
nodalchipindex: "config:12-19"
coreindexonchip: "config:20-27"
1) nodeindex, nodalchipindex, coreindexonchip: this specifies
which partition to configure the HTM for.
2) htmtype: specifies the type of HTM.
In htm_event_add: configure and start the tracing using htm_hcall_wrapper
which is defined in plpar_wrappers.h header file
In htm_event_del: stop and deconfigure the tracing using
htm_hcall_wrapper
With the changes:
# ls /sys/bus/event_source/devices/ |grep htm
htm
# ls /sys/bus/event_source/devices/htm/
events format perf_event_mux_interval_ms power subsystem type uevent
Signed-off-by: Athira Rajeev <atrajeev@linux.ibm.com>
---
arch/powerpc/perf/Makefile | 2 +-
arch/powerpc/perf/htm-perf.c | 307 +++++++++++++++++++++++++++++++++++
2 files changed, 308 insertions(+), 1 deletion(-)
create mode 100644 arch/powerpc/perf/htm-perf.c
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 78dd7e25219e..26ef30c0693c 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o
obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
-obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o vpa-dtl.o
+obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o vpa-dtl.o htm-perf.o
obj-$(CONFIG_VPA_PMU) += vpa-pmu.o
diff --git a/arch/powerpc/perf/htm-perf.c b/arch/powerpc/perf/htm-perf.c
new file mode 100644
index 000000000000..e22a7fdce2f5
--- /dev/null
+++ b/arch/powerpc/perf/htm-perf.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Perf interface to expose HTM Trace data.
+ *
+ * Copyright (C) 2025 Athira Rajeev, IBM Corporation
+ */
+
+#define pr_fmt(fmt) "htm: " fmt
+
+#include <asm/dtl.h>
+#include <linux/perf_event.h>
+#include <asm/plpar_wrappers.h>
+#include <linux/vmalloc.h>
+
+extern void perf_event_wakeup(struct perf_event *event);
+#define EVENT(_name, _code) enum{_name = _code}
+
+/*
+ * H_HTM (Hardware Trace Macro) hypervisor call is an HCALL to export
+ * data from Hardware Trace Macro (HTM) function.
+ *
+ * Event codes based on HTM type.
+ */
+EVENT(HTM_CORE, 0x2);
+EVENT(HTM_NEST, 0x1);
+
+GENERIC_EVENT_ATTR(htm_core, HTM_CORE);
+GENERIC_EVENT_ATTR(htm_nest, HTM_NEST);
+
+PMU_FORMAT_ATTR(event, "config:0-27");
+PMU_FORMAT_ATTR(htm_type, "config:0-3");
+PMU_FORMAT_ATTR(nodeindex, "config:4-11");
+PMU_FORMAT_ATTR(nodalchipindex, "config:12-19");
+PMU_FORMAT_ATTR(coreindexonchip, "config:20-27");
+
+static struct attribute *events_attr[] = {
+ GENERIC_EVENT_PTR(HTM_NEST),
+ GENERIC_EVENT_PTR(HTM_CORE),
+ NULL
+};
+
+static struct attribute_group event_group = {
+ .name = "events",
+ .attrs = events_attr,
+};
+
+static struct attribute *format_attrs[] = {
+ &format_attr_event.attr,
+ &format_attr_htm_type.attr,
+ &format_attr_nodeindex.attr,
+ &format_attr_nodalchipindex.attr,
+ &format_attr_coreindexonchip.attr,
+ NULL,
+};
+
+static const struct attribute_group format_group = {
+ .name = "format",
+ .attrs = format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+ &format_group,
+ &event_group,
+ NULL,
+};
+
+static u64 htmflags = H_HTM_FLAGS_NOWRAP;
+
+/*
+ * Check the return code for H_HTM hcall.
+ * Return non-zero value (1) if either H_PARTIAL or H_SUCCESS
+ * is returned. For other return codes:
+ * Return zero if H_NOT_AVAILABLE.
+ * Return -EBUSY if hcall return busy.
+ * Return -EINVAL if any parameter or operation is not valid.
+ * Return -EPERM if HTM Virtualization Engine Technology code
+ * is not applied.
+ * Return -EIO if the HTM state is not valid.
+ */
+static ssize_t htm_return_check(int rc)
+{
+ switch (rc) {
+ case H_SUCCESS:
+ break;
+ /* H_PARTIAL for the case where all available data can't be
+ * returned due to buffer size constraint.
+ */
+ case H_PARTIAL:
+ break;
+ /* H_NOT_AVAILABLE indicates reading from an offset outside the range,
+ * i.e. past end of file.
+ */
+ case H_NOT_AVAILABLE:
+ return 0;
+ case H_BUSY:
+ case H_LONG_BUSY_ORDER_1_MSEC:
+ case H_LONG_BUSY_ORDER_10_MSEC:
+ case H_LONG_BUSY_ORDER_100_MSEC:
+ case H_LONG_BUSY_ORDER_1_SEC:
+ case H_LONG_BUSY_ORDER_10_SEC:
+ case H_LONG_BUSY_ORDER_100_SEC:
+ return -EBUSY;
+ case H_PARAMETER:
+ goto out;
+ case H_P2:
+ goto out;
+ case H_P3:
+ goto out;
+ case H_P4:
+ goto out;
+ case H_P5:
+ goto out;
+ case H_P6:
+ return -EINVAL;
+ case H_STATE:
+ return -EIO;
+ case H_AUTHORITY:
+ return -EPERM;
+ }
+
+ /*
+ * Return 1 for H_SUCCESS/H_PARTIAL
+ */
+ return 1;
+out:
+ return -EINVAL;
+}
+
+static int htm_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 config = event->attr.config;
+ u32 htmtype;
+
+ if (event->attr.inherit)
+ return -EOPNOTSUPP;
+
+ /* test the event attr type for PMU enumeration */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (!perfmon_capable())
+ return -EACCES;
+
+ /* Return if this is a counting event */
+ if (!is_sampling_event(event))
+ return -EOPNOTSUPP;
+
+ /* no branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ htmtype = config & 0xf;
+ /* Invalid eventcode */
+ switch (htmtype) {
+ case HTM_CORE:
+ case HTM_NEST:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ htmflags = H_HTM_FLAGS_NOWRAP;
+
+ if (event->attr.freq) {
+ hwc->sample_period = event->attr.sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+ hwc->last_period = hwc->sample_period;
+ event->attr.freq = 0;
+ }
+
+ return 0;
+}
+
+static int htm_event_add(struct perf_event *event, int flags)
+{
+ int rc, ret;
+ unsigned long param1 = -1, param2 = -1;
+ int retries = 0;
+ u64 config = event->attr.config;
+ u32 htmtype, nodeindex, nodalchipindex, coreindexonchip;
+
+ /*
+ * Invoke H_HTM call with:
+ * operation as htm configure (H_HTM_OP_CONFIGURE)
+ * last three values are unused, hence set to zero
+ */
+ htmtype = config & 0xf;
+ nodeindex = (config >> 4) & 0xff;
+ nodalchipindex = (config >> 12) & 0xff;
+ coreindexonchip = (config >> 20) & 0xff;
+ do {
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_CONFIGURE, param1, param2, 0);
+ ret = htm_return_check(rc);
+ } while (ret <= 0 && ++retries < 100);
+ if (ret <= 0)
+ return -1;
+
+ /* Reset retries */
+ retries = 0;
+
+ /*
+ * Invoke H_HTM call with:
+ * operation as htm start (H_HTM_OP_START)
+ * last three values are unused, hence set to zero
+ */
+ do {
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_START, 0, 0, 0);
+ ret = htm_return_check(rc);
+ } while (ret == -EBUSY && ++retries < 100);
+
+ if (htm_return_check(rc) <= 0)
+ return -1;
+
+ return 0;
+}
+
+static void htm_event_del(struct perf_event *event, int flags)
+{
+ long rc;
+ int ret;
+ int retries = 0;
+ u64 config = event->attr.config;
+ u32 htmtype, nodeindex, nodalchipindex, coreindexonchip;
+
+ /*
+ * Invoke H_HTM call with:
+ * operation as htm stop (H_HTM_OP_STOP)
+ * last three values are unused, hence set to zero
+ */
+ htmtype = config & 0xf;
+ nodeindex = (config >> 4) & 0xff;
+ nodalchipindex = (config >> 12) & 0xff;
+ coreindexonchip = (config >> 20) & 0xff;
+ do {
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_STOP, 0, 0, 0);
+ ret = htm_return_check(rc);
+ } while (ret == -EBUSY && ++retries < 100);
+
+ /* Reset retries */
+ retries = 0;
+
+ /*
+ * Invoke H_HTM call with:
+ * operation as htm configure (H_HTM_OP_DECONFIGURE)
+ * last three values are unused, hence set to zero
+ */
+ do {
+ rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip,
+ htmtype, H_HTM_OP_DECONFIGURE, 0, 0, 0);
+ ret = htm_return_check(rc);
+ } while (ret <= 0 && ++retries < 100);
+}
+
+/*
+ * This function definition is empty as htm_dump_sample_data
+ * is used to parse and dump the HTM trace data,
+ * to perf data.
+ */
+static void htm_event_read(struct perf_event *event)
+{
+ return;
+}
+
+static void htm_event_start(struct perf_event *event, int flags)
+{
+}
+
+static void htm_event_stop(struct perf_event *event, int flags)
+{
+}
+
+static struct pmu htm_pmu = {
+ .task_ctx_nr = perf_invalid_context,
+
+ .name = "htm",
+ .attr_groups = attr_groups,
+ .event_init = htm_event_init,
+ .add = htm_event_add,
+ .del = htm_event_del,
+ .read = htm_event_read,
+ .start = htm_event_start,
+ .stop = htm_event_stop,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_EXCLUSIVE,
+};
+
+static int htm_init(void)
+{
+ int r;
+
+ /* This driver is intended only for L1 host. */
+ if (is_kvm_guest()) {
+ pr_debug("Only supported for L1 host system\n");
+ return -ENODEV;
+ }
+
+ r = perf_pmu_register(&htm_pmu, htm_pmu.name, -1);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+device_initcall(htm_init);
--
2.52.0
next prev parent reply other threads:[~2026-07-01 8:38 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-07-01 8:38 [PATCH 0/5] powerpc/htm: Add interface to expose HTM trace data via perf Athira Rajeev
2026-07-01 8:38 ` Athira Rajeev [this message]
2026-07-01 8:50 ` [PATCH 1/5] " sashiko-bot
2026-07-01 8:38 ` [PATCH 2/5] powerpc/htm: Add support to setup and free aux buffer for capturing HTM data Athira Rajeev
2026-07-01 8:50 ` sashiko-bot
2026-07-01 8:38 ` [PATCH 3/5] powerpc/perf: Capture the HTM memory configuration as part of perf data Athira Rajeev
2026-07-01 9:02 ` sashiko-bot
2026-07-01 8:38 ` [PATCH 4/5] docs: ABI: sysfs-bus-event_source-devices-htm: Document sysfs event format entries for htm pmu Athira Rajeev
2026-07-01 8:38 ` [PATCH 5/5] powerpc/perf/htm: Add documentation for Hardware Trace Macro PMU Athira Rajeev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260701083806.79358-2-atrajeev@linux.ibm.com \
--to=atrajeev@linux.ibm.com \
--cc=hbathini@linux.vnet.ibm.com \
--cc=linux-perf-users@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=maddy@linux.ibm.com \
--cc=tejas05@linux.ibm.com \
--cc=tshah@linux.ibm.com \
--cc=venkat88@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox