From: kan.liang@linux.intel.com
To: joro@8bytes.org, will@kernel.org, baolu.lu@linux.intel.com,
dwmw2@infradead.org, robin.murphy@arm.com,
robert.moore@intel.com, rafael.j.wysocki@intel.com,
lenb@kernel.org, iommu@lists.linux.dev,
linux-kernel@vger.kernel.org
Cc: Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH 5/7] iommu/vt-d: Support cpumask for IOMMU perfmon
Date: Wed, 11 Jan 2023 12:25:02 -0800 [thread overview]
Message-ID: <20230111202504.378258-6-kan.liang@linux.intel.com> (raw)
In-Reply-To: <20230111202504.378258-1-kan.liang@linux.intel.com>
From: Kan Liang <kan.liang@linux.intel.com>
The perf subsystem assumes that all counters are by default per-CPU. So
the user space tool reads a counter from each CPU. However, the IOMMU
counters are system-wide and can be read from any CPU. Here we use a CPU
mask to restrict counting to one CPU to handle the issue. (with CPU
hotplug notifier to choose a different CPU if the chosen one is taken
off-line).
The CPU is exposed to /sys/bus/event_source/devices/dmar*/cpumask for
the user space perf tool.
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
.../sysfs-bus-event_source-devices-iommu | 8 ++
drivers/iommu/intel/perfmon.c | 113 ++++++++++++++++--
include/linux/cpuhotplug.h | 1 +
3 files changed, 115 insertions(+), 7 deletions(-)
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu b/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu
index 04e08851d8e6..3519954fe713 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu
@@ -22,3 +22,11 @@ Description: Read-only. Attribute group to describe the magic bits
filter_pasid = "config1:32-53" - PASID filter
filter_ats = "config2:0-4" - Address Type filter
filter_page_table = "config2:8-12" - Page Table Level filter
+
+What: /sys/bus/event_source/devices/dmar*/cpumask
+Date: Jan 2023
+KernelVersion: 6.3
+Contact: Kan Liang <kan.liang@linux.intel.com>
+Description: Read-only. This file always returns the CPU to which the
+ IOMMU pmu is bound for access to all IOMMU pmu
+ performance monitoring events.
diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c
index 43a5075eaecd..f332232bb345 100644
--- a/drivers/iommu/intel/perfmon.c
+++ b/drivers/iommu/intel/perfmon.c
@@ -32,9 +32,30 @@ static struct attribute_group iommu_pmu_events_attr_group = {
.attrs = attrs_empty,
};
+static cpumask_t iommu_pmu_cpu_mask;
+
+static ssize_t iommu_pmu_cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, iommu_pmu_cpumask_show, NULL);
+
+static struct attribute *iommu_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static struct attribute_group iommu_pmu_cpumask_attr_group = {
+ .attrs = iommu_pmu_cpumask_attrs,
+};
+
static const struct attribute_group *iommu_pmu_attr_groups[] = {
&iommu_pmu_format_attr_group,
&iommu_pmu_events_attr_group,
+ &iommu_pmu_cpumask_attr_group,
NULL
};
@@ -637,19 +658,97 @@ void free_iommu_pmu(struct intel_iommu *iommu)
iommu->pmu = NULL;
}
+static int iommu_pmu_cpu_online(unsigned int cpu)
+{
+ if (cpumask_empty(&iommu_pmu_cpu_mask))
+ cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask);
+
+ return 0;
+}
+
+static int iommu_pmu_cpu_offline(unsigned int cpu)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ int target;
+
+ if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask))
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
+ if (target < nr_cpu_ids)
+ cpumask_set_cpu(target, &iommu_pmu_cpu_mask);
+ else
+ target = -1;
+
+ rcu_read_lock();
+
+ for_each_iommu(iommu, drhd) {
+ if (!iommu->pmu)
+ continue;
+ perf_pmu_migrate_context(&iommu->pmu->pmu, cpu, target);
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
+static int nr_iommu_pmu;
+
+static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu)
+{
+ int ret;
+
+ if (nr_iommu_pmu++)
+ return 0;
+
+ ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE,
+ "driver/iommu/intel/perfmon:online",
+ iommu_pmu_cpu_online,
+ iommu_pmu_cpu_offline);
+ if (ret)
+ nr_iommu_pmu = 0;
+
+ return ret;
+}
+
+static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu)
+{
+ if (--nr_iommu_pmu)
+ return;
+
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE);
+}
+
void iommu_pmu_register(struct intel_iommu *iommu)
{
- if (!iommu->pmu)
+ struct iommu_pmu *iommu_pmu = iommu->pmu;
+
+ if (!iommu_pmu)
return;
- if (__iommu_pmu_register(iommu)) {
- pr_err("Failed to register PMU for iommu (seq_id = %d)\n",
- iommu->seq_id);
- }
+ if (__iommu_pmu_register(iommu))
+ goto err;
+
+ if (iommu_pmu_cpuhp_setup(iommu_pmu))
+ goto unregister;
+
+ return;
+
+unregister:
+ perf_pmu_unregister(&iommu_pmu->pmu);
+err:
+ pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id);
}
void iommu_pmu_unregister(struct intel_iommu *iommu)
{
- if (iommu->pmu)
- perf_pmu_unregister(&iommu->pmu->pmu);
+ struct iommu_pmu *iommu_pmu = iommu->pmu;
+
+ if (!iommu_pmu)
+ return;
+
+ iommu_pmu_cpuhp_free(iommu_pmu);
+ perf_pmu_unregister(&iommu_pmu->pmu);
}
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 6c6859bfc454..f2ea348ce3b0 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -221,6 +221,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_X86_CQM_ONLINE,
CPUHP_AP_PERF_X86_CSTATE_ONLINE,
CPUHP_AP_PERF_X86_IDXD_ONLINE,
+ CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE,
CPUHP_AP_PERF_S390_CF_ONLINE,
CPUHP_AP_PERF_S390_SF_ONLINE,
CPUHP_AP_PERF_ARM_CCI_ONLINE,
--
2.35.1
next prev parent reply other threads:[~2023-01-11 20:25 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-11 20:24 [PATCH 0/7] iommu/vt-d: Support performance monitoring for IOMMU kan.liang
2023-01-11 20:24 ` [PATCH 1/7] iommu/vt-d: Support size of the register set in DRHD kan.liang
2023-01-12 12:42 ` Baolu Lu
2023-01-12 16:42 ` Liang, Kan
2023-01-11 20:24 ` [PATCH 2/7] iommu/vt-d: Retrieve IOMMU perfmon capability information kan.liang
2023-01-13 12:58 ` Baolu Lu
2023-01-13 16:32 ` Liang, Kan
2023-01-11 20:25 ` [PATCH 3/7] iommu/vt-d: Support Enhanced Command Interface kan.liang
2023-01-13 13:55 ` Baolu Lu
2023-01-13 14:12 ` Baolu Lu
2023-01-13 19:02 ` Liang, Kan
2023-01-13 18:19 ` Liang, Kan
2023-01-11 20:25 ` [PATCH 4/7] iommu/vt-d: Add IOMMU perfmon support kan.liang
2023-01-14 9:00 ` Baolu Lu
2023-01-16 15:12 ` Liang, Kan
2023-01-17 8:12 ` Baolu Lu
2023-01-11 20:25 ` kan.liang [this message]
2023-01-11 20:25 ` [PATCH 6/7] iommu/vt-d: Add IOMMU perfmon overflow handler support kan.liang
2023-01-14 11:05 ` Baolu Lu
2023-01-16 15:20 ` Liang, Kan
2023-01-17 16:52 ` Liang, Kan
2023-01-11 20:25 ` [PATCH 7/7] iommu/vt-d: Enable IOMMU perfmon support kan.liang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230111202504.378258-6-kan.liang@linux.intel.com \
--to=kan.liang@linux.intel.com \
--cc=baolu.lu@linux.intel.com \
--cc=dwmw2@infradead.org \
--cc=iommu@lists.linux.dev \
--cc=joro@8bytes.org \
--cc=lenb@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=rafael.j.wysocki@intel.com \
--cc=robert.moore@intel.com \
--cc=robin.murphy@arm.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.