* [PATCH v2 01/16] x86/rectrl: Fake OOBMSM interface
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
@ 2025-03-21 23:15 ` Tony Luck
2025-03-31 16:14 ` Reinette Chatre
2025-03-21 23:15 ` [PATCH v2 02/16] x86/resctrl: Move L3 initialization out of domain_add_cpu_mon() Tony Luck
` (14 subsequent siblings)
15 siblings, 1 reply; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:15 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
Real version is coming soon ... this is here so the remaining parts
will build (and run ... assuming a 2 socket system that supports RDT
monitoring ... only missing part is that the event counters just
report fixed values).
Real version of this would just add the INTEL_AET_RESCTRL Kconfig
option with dependency checks on
INTEL_VSEC=y && INTEL_AET_TELEMETRY=y && INTEL_AET_DISCOVERY=y
Just for RFC discussion.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
.../cpu/resctrl/fake_intel_aet_features.h | 73 +++++++++++++++++++
.../cpu/resctrl/fake_intel_aet_features.c | 65 +++++++++++++++++
arch/x86/Kconfig | 1 +
arch/x86/kernel/cpu/resctrl/Makefile | 1 +
drivers/platform/x86/intel/pmt/Kconfig | 3 +
5 files changed, 143 insertions(+)
create mode 100644 arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.h
create mode 100644 arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.c
diff --git a/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.h b/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.h
new file mode 100644
index 000000000000..c835c4108abc
--- /dev/null
+++ b/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Bits stolen from OOBMSM VSEC discovery code */
+
+enum pmt_feature_id {
+ FEATURE_INVALID = 0x0,
+ FEATURE_PER_CORE_PERF_TELEM = 0x1,
+ FEATURE_PER_CORE_ENV_TELEM = 0x2,
+ FEATURE_PER_RMID_PERF_TELEM = 0x3,
+ FEATURE_ACCEL_TELEM = 0x4,
+ FEATURE_UNCORE_TELEM = 0x5,
+ FEATURE_CRASH_LOG = 0x6,
+ FEATURE_PETE_LOG = 0x7,
+ FEATURE_TPMI_CTRL = 0x8,
+ FEATURE_RESERVED = 0x9,
+ FEATURE_TRACING = 0xA,
+ FEATURE_PER_RMID_ENERGY_TELEM = 0xB,
+ FEATURE_MAX = 0xB,
+};
+
+/**
+ * struct oobmsm_plat_info - Platform information for a device instance
+ * @cdie_mask: Mask of all compute dies in the partition
+ * @package_id: CPU Package id
+ * @partition: Package partition id when multiple VSEC PCI devices per package
+ * @segment: PCI segment ID
+ * @bus_number: PCI bus number
+ * @device_number: PCI device number
+ * @function_number: PCI function number
+ *
+ * Structure to store platform data for a OOBMSM device instance.
+ */
+struct oobmsm_plat_info {
+ u16 cdie_mask;
+ u8 package_id;
+ u8 partition;
+ u8 segment;
+ u8 bus_number;
+ u8 device_number;
+ u8 function_number;
+};
+
+enum oobmsm_supplier_type {
+ OOBMSM_SUP_PLAT_INFO,
+ OOBMSM_SUP_DISC_INFO,
+ OOBMSM_SUP_S3M_SIMICS,
+ OOBMSM_SUP_TYPE_MAX
+};
+
+struct oobmsm_mapping_supplier {
+ struct device *supplier_dev[OOBMSM_SUP_TYPE_MAX];
+ struct oobmsm_plat_info plat_info;
+ unsigned long features;
+};
+
+struct telemetry_region {
+ struct oobmsm_plat_info plat_info;
+ void __iomem *addr;
+ size_t size;
+ u32 guid;
+ u32 num_rmids;
+};
+
+struct pmt_feature_group {
+ enum pmt_feature_id id;
+ int count;
+ struct kref kref;
+ struct telemetry_region regions[];
+};
+
+struct pmt_feature_group *intel_pmt_get_regions_by_feature(enum pmt_feature_id id);
+
+void intel_pmt_put_feature_group(struct pmt_feature_group *feature_group);
diff --git a/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.c b/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.c
new file mode 100644
index 000000000000..b537068d99fb
--- /dev/null
+++ b/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/cleanup.h>
+#include <linux/minmax.h>
+#include <linux/slab.h>
+#include "fake_intel_aet_features.h"
+#include <linux/intel_vsec.h>
+#include <linux/resctrl.h>
+
+#include "internal.h"
+
+#define ENERGY_QWORDS ((576 * 2) + 3)
+#define PERF_QWORDS ((576 * 7) + 3)
+
+static long pg[4 * ENERGY_QWORDS + 2 * PERF_QWORDS];
+
+static int __init fill(void)
+{
+ u64 val = 0;
+
+ for (int i = 0; i < sizeof(pg); i += sizeof(val)) {
+ pg[i / sizeof(val)] = BIT_ULL(63) + val;
+ val++;
+ }
+ return 0;
+}
+device_initcall(fill);
+
+#define PKG_REGION(_entry, _guid, _addr, _pkg) \
+ [_entry] = { .guid = _guid, .addr = (void __iomem *)_addr, .plat_info = { .package_id = _pkg }}
+
+static struct pmt_feature_group fake_energy = {
+ .count = 4,
+ .regions = {
+ PKG_REGION(0, 0x26696143, &pg[0 * ENERGY_QWORDS], 0),
+ PKG_REGION(1, 0x26696143, &pg[1 * ENERGY_QWORDS], 0),
+ PKG_REGION(2, 0x26696143, &pg[2 * ENERGY_QWORDS], 1),
+ PKG_REGION(3, 0x26696143, &pg[3 * ENERGY_QWORDS], 1)
+ }
+};
+
+static struct pmt_feature_group fake_perf = {
+ .count = 2,
+ .regions = {
+ PKG_REGION(0, 0x26557651, &pg[4 * ENERGY_QWORDS + 0 * PERF_QWORDS], 0),
+ PKG_REGION(1, 0x26557651, &pg[4 * ENERGY_QWORDS + 1 * PERF_QWORDS], 1)
+ }
+};
+
+struct pmt_feature_group *
+intel_pmt_get_regions_by_feature(enum pmt_feature_id id)
+{
+ switch (id) {
+ case FEATURE_PER_RMID_ENERGY_TELEM:
+ return &fake_energy;
+ case FEATURE_PER_RMID_PERF_TELEM:
+ return &fake_perf;
+ default:
+ return ERR_PTR(-ENOENT);
+ }
+ return ERR_PTR(-ENOENT);
+}
+
+void intel_pmt_put_feature_group(struct pmt_feature_group *feature_group)
+{
+}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ea29d22a621f..6112cb6cad05 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -504,6 +504,7 @@ config X86_CPU_RESCTRL
bool "x86 CPU resource control support"
depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
depends on MISC_FILESYSTEMS
+ select INTEL_AET_RESCTRL if X86_64
select ARCH_HAS_CPU_RESCTRL
select RESCTRL_FS
select RESCTRL_FS_PSEUDO_LOCK
diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
index 909be78ec6da..2c3b09f95127 100644
--- a/arch/x86/kernel/cpu/resctrl/Makefile
+++ b/arch/x86/kernel/cpu/resctrl/Makefile
@@ -2,6 +2,7 @@
obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o
obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o
obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK) += pseudo_lock.o
+obj-$(CONFIG_INTEL_AET_RESCTRL) += fake_intel_aet_features.o
# To allow define_trace.h's recursive include:
CFLAGS_pseudo_lock.o = -I$(src)
diff --git a/drivers/platform/x86/intel/pmt/Kconfig b/drivers/platform/x86/intel/pmt/Kconfig
index e916fc966221..6d3b1f64efe9 100644
--- a/drivers/platform/x86/intel/pmt/Kconfig
+++ b/drivers/platform/x86/intel/pmt/Kconfig
@@ -38,3 +38,6 @@ config INTEL_PMT_CRASHLOG
To compile this driver as a module, choose M here: the module
will be called intel_pmt_crashlog.
+
+config INTEL_AET_RESCTRL
+ bool
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* Re: [PATCH v2 01/16] x86/rectrl: Fake OOBMSM interface
2025-03-21 23:15 ` [PATCH v2 01/16] x86/rectrl: Fake OOBMSM interface Tony Luck
@ 2025-03-31 16:14 ` Reinette Chatre
2025-03-31 21:09 ` Luck, Tony
0 siblings, 1 reply; 37+ messages in thread
From: Reinette Chatre @ 2025-03-31 16:14 UTC (permalink / raw)
To: Tony Luck, Fenghua Yu, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches
Hi Tony,
(nit in shortlog: rectrl -> resctrl)
On 3/21/25 4:15 PM, Tony Luck wrote:
> Real version is coming soon ... this is here so the remaining parts
> will build (and run ... assuming a 2 socket system that supports RDT
> monitoring ... only missing part is that the event counters just
> report fixed values).
>
> Real version of this would just add the INTEL_AET_RESCTRL Kconfig
> option with dependency checks on
> INTEL_VSEC=y && INTEL_AET_TELEMETRY=y && INTEL_AET_DISCOVERY=y
>
> Just for RFC discussion.
Would appreciate a bit more detail about what OOBMSM provides
to be able to understand this series.
Even though changelog mentions "event counters" I am not able to
recognize any unique events provided by this interface. Instead it
just seems to provide a memory region that is entirely up to resctrl
to interpret based on the "unique identifier" hinted to in cover letter.
I could not find any description that connects the "unique identifier"
to the "guid" used in following patches. I think it will be helpful to
upfront connect the high level "unique identifier" with the "guid" that
the patches use to make this obvious.
Closest information to something that can be used by resctrl is
"num_rmids". Could you please add information on how "num_rmids" relates
to the memory region that is only specified via an addr and size?
>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
> .../cpu/resctrl/fake_intel_aet_features.h | 73 +++++++++++++++++++
> .../cpu/resctrl/fake_intel_aet_features.c | 65 +++++++++++++++++
> arch/x86/Kconfig | 1 +
> arch/x86/kernel/cpu/resctrl/Makefile | 1 +
> drivers/platform/x86/intel/pmt/Kconfig | 3 +
> 5 files changed, 143 insertions(+)
> create mode 100644 arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.h
> create mode 100644 arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.c
>
> diff --git a/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.h b/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.h
> new file mode 100644
> index 000000000000..c835c4108abc
> --- /dev/null
> +++ b/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.h
> @@ -0,0 +1,73 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +/* Bits stolen from OOBMSM VSEC discovery code */
> +
> +enum pmt_feature_id {
> + FEATURE_INVALID = 0x0,
> + FEATURE_PER_CORE_PERF_TELEM = 0x1,
> + FEATURE_PER_CORE_ENV_TELEM = 0x2,
> + FEATURE_PER_RMID_PERF_TELEM = 0x3,
> + FEATURE_ACCEL_TELEM = 0x4,
> + FEATURE_UNCORE_TELEM = 0x5,
> + FEATURE_CRASH_LOG = 0x6,
> + FEATURE_PETE_LOG = 0x7,
> + FEATURE_TPMI_CTRL = 0x8,
> + FEATURE_RESERVED = 0x9,
> + FEATURE_TRACING = 0xA,
> + FEATURE_PER_RMID_ENERGY_TELEM = 0xB,
> + FEATURE_MAX = 0xB,
> +};
> +
> +/**
> + * struct oobmsm_plat_info - Platform information for a device instance
> + * @cdie_mask: Mask of all compute dies in the partition
> + * @package_id: CPU Package id
> + * @partition: Package partition id when multiple VSEC PCI devices per package
> + * @segment: PCI segment ID
> + * @bus_number: PCI bus number
> + * @device_number: PCI device number
> + * @function_number: PCI function number
> + *
> + * Structure to store platform data for a OOBMSM device instance.
> + */
> +struct oobmsm_plat_info {
> + u16 cdie_mask;
> + u8 package_id;
> + u8 partition;
> + u8 segment;
> + u8 bus_number;
> + u8 device_number;
> + u8 function_number;
> +};
> +
> +enum oobmsm_supplier_type {
> + OOBMSM_SUP_PLAT_INFO,
> + OOBMSM_SUP_DISC_INFO,
> + OOBMSM_SUP_S3M_SIMICS,
> + OOBMSM_SUP_TYPE_MAX
> +};
> +
> +struct oobmsm_mapping_supplier {
> + struct device *supplier_dev[OOBMSM_SUP_TYPE_MAX];
> + struct oobmsm_plat_info plat_info;
> + unsigned long features;
> +};
> +
> +struct telemetry_region {
> + struct oobmsm_plat_info plat_info;
> + void __iomem *addr;
> + size_t size;
> + u32 guid;
> + u32 num_rmids;
> +};
Could there be some description of what a "telemetry_region" is and
how the members should be interpreted by resctrl?
> +
> +struct pmt_feature_group {
> + enum pmt_feature_id id;
> + int count;
> + struct kref kref;
> + struct telemetry_region regions[];
> +};
> +
> +struct pmt_feature_group *intel_pmt_get_regions_by_feature(enum pmt_feature_id id);
> +
> +void intel_pmt_put_feature_group(struct pmt_feature_group *feature_group);
> diff --git a/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.c b/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.c
> new file mode 100644
> index 000000000000..b537068d99fb
> --- /dev/null
> +++ b/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.c
> @@ -0,0 +1,65 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +#include <linux/cleanup.h>
> +#include <linux/minmax.h>
> +#include <linux/slab.h>
> +#include "fake_intel_aet_features.h"
> +#include <linux/intel_vsec.h>
> +#include <linux/resctrl.h>
> +
> +#include "internal.h"
> +
> +#define ENERGY_QWORDS ((576 * 2) + 3)
> +#define PERF_QWORDS ((576 * 7) + 3)
> +
> +static long pg[4 * ENERGY_QWORDS + 2 * PERF_QWORDS];
> +
> +static int __init fill(void)
> +{
> + u64 val = 0;
> +
> + for (int i = 0; i < sizeof(pg); i += sizeof(val)) {
> + pg[i / sizeof(val)] = BIT_ULL(63) + val;
> + val++;
> + }
> + return 0;
> +}
> +device_initcall(fill);
> +
> +#define PKG_REGION(_entry, _guid, _addr, _pkg) \
> + [_entry] = { .guid = _guid, .addr = (void __iomem *)_addr, .plat_info = { .package_id = _pkg }}
> +
> +static struct pmt_feature_group fake_energy = {
> + .count = 4,
> + .regions = {
> + PKG_REGION(0, 0x26696143, &pg[0 * ENERGY_QWORDS], 0),
> + PKG_REGION(1, 0x26696143, &pg[1 * ENERGY_QWORDS], 0),
> + PKG_REGION(2, 0x26696143, &pg[2 * ENERGY_QWORDS], 1),
> + PKG_REGION(3, 0x26696143, &pg[3 * ENERGY_QWORDS], 1)
> + }
> +};
> +
> +static struct pmt_feature_group fake_perf = {
> + .count = 2,
> + .regions = {
> + PKG_REGION(0, 0x26557651, &pg[4 * ENERGY_QWORDS + 0 * PERF_QWORDS], 0),
> + PKG_REGION(1, 0x26557651, &pg[4 * ENERGY_QWORDS + 1 * PERF_QWORDS], 1)
> + }
> +};
Could there be some guidance on how to interpret the hardcoded data? For example,
one group contains two regions and the other four. Was this just done for testing
to ensure implementation supports multiple regions per package or ...?
Is it expected that multiple feature groups could have different number of regions?
I also think initializing the id would be helpful to understand the example better.
> +
> +struct pmt_feature_group *
> +intel_pmt_get_regions_by_feature(enum pmt_feature_id id)
> +{
> + switch (id) {
> + case FEATURE_PER_RMID_ENERGY_TELEM:
> + return &fake_energy;
> + case FEATURE_PER_RMID_PERF_TELEM:
> + return &fake_perf;
> + default:
> + return ERR_PTR(-ENOENT);
> + }
> + return ERR_PTR(-ENOENT);
> +}
> +
> +void intel_pmt_put_feature_group(struct pmt_feature_group *feature_group)
> +{
> +}
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index ea29d22a621f..6112cb6cad05 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -504,6 +504,7 @@ config X86_CPU_RESCTRL
> bool "x86 CPU resource control support"
> depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
> depends on MISC_FILESYSTEMS
> + select INTEL_AET_RESCTRL if X86_64
I expect something like this will stay (i.e. not part of the "Fake" code).
In this case, should this perhaps only be selected on Intel (CPU_SUP_INTEL)?
(nit: the tab is unexpected)
> select ARCH_HAS_CPU_RESCTRL
> select RESCTRL_FS
> select RESCTRL_FS_PSEUDO_LOCK
> diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
> index 909be78ec6da..2c3b09f95127 100644
> --- a/arch/x86/kernel/cpu/resctrl/Makefile
> +++ b/arch/x86/kernel/cpu/resctrl/Makefile
> @@ -2,6 +2,7 @@
> obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o
> obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o
> obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK) += pseudo_lock.o
> +obj-$(CONFIG_INTEL_AET_RESCTRL) += fake_intel_aet_features.o
>
> # To allow define_trace.h's recursive include:
> CFLAGS_pseudo_lock.o = -I$(src)
> diff --git a/drivers/platform/x86/intel/pmt/Kconfig b/drivers/platform/x86/intel/pmt/Kconfig
> index e916fc966221..6d3b1f64efe9 100644
> --- a/drivers/platform/x86/intel/pmt/Kconfig
> +++ b/drivers/platform/x86/intel/pmt/Kconfig
> @@ -38,3 +38,6 @@ config INTEL_PMT_CRASHLOG
>
> To compile this driver as a module, choose M here: the module
> will be called intel_pmt_crashlog.
> +
> +config INTEL_AET_RESCTRL
> + bool
I expect that this will also stay ... if so, could this be expanded to
have needed dependency and also be accompanied by some documentation. Something like
"Architecture selects this when ...." This will make it clear that this is
not something a user will select during kernel build while also explaining
what it is used for.
Reinette
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [PATCH v2 01/16] x86/rectrl: Fake OOBMSM interface
2025-03-31 16:14 ` Reinette Chatre
@ 2025-03-31 21:09 ` Luck, Tony
0 siblings, 0 replies; 37+ messages in thread
From: Luck, Tony @ 2025-03-31 21:09 UTC (permalink / raw)
To: Reinette Chatre
Cc: Fenghua Yu, Maciej Wieczor-Retman, Peter Newman, James Morse,
Babu Moger, Drew Fustini, Dave Martin, linux-kernel, patches
On Mon, Mar 31, 2025 at 09:14:55AM -0700, Reinette Chatre wrote:
> Hi Tony,
Thanks for the review.
> (nit in shortlog: rectrl -> resctrl)
>
> On 3/21/25 4:15 PM, Tony Luck wrote:
> > Real version is coming soon ... this is here so the remaining parts
> > will build (and run ... assuming a 2 socket system that supports RDT
> > monitoring ... only missing part is that the event counters just
> > report fixed values).
> >
> > Real version of this would just add the INTEL_AET_RESCTRL Kconfig
> > option with dependency checks on
> > INTEL_VSEC=y && INTEL_AET_TELEMETRY=y && INTEL_AET_DISCOVERY=y
> >
> > Just for RFC discussion.
>
> Would appreciate a bit more detail about what OOBMSM provides
> to be able to understand this series.
Short answer is just what you see in this structure:
struct telemetry_region {
struct oobmsm_plat_info plat_info;
void __iomem *addr;
size_t size;
u32 guid;
u32 num_rmids;
};
I've passed on your suggestion for some comments on the
definition to David Box, since this structure will be part
of his patches to enable the discovery of OOBMSM features.
The trail for breadcrumbs from this to event counters is:
1) Lookup the "guid" in XML files at https://github.com/intel/Intel-PMT
2) Each of those lists each event counter in the MMIO region referred
to by the <addr,size> fields in excruciating detail. E.g. for the first
register (at offset 0 from "addr"):
<TELI:description>Accumulated core energy usage across all cores running RMID 0</TELI:description>
<TELI:SampleType>Snapshot</TELI:SampleType>
<TransFormInputs xmlns="http://schemas.intel.com/telemetry/base/common">
<TransFormInput varName="parameter_0">
<sampleGroupIDREF>Container_0</sampleGroupIDREF>
<sampleIDREF>RMID_0_CORE_ENERGY</sampleIDREF>
</TransFormInput>
</TransFormInputs>
<TELI:transformREF>U63.45.18</TELI:transformREF>
</TELI:T_AggregatorSample>
<TELI:T_AggregatorSample sampleName="RMID_0_CORE_ENERGY_VALID" sampleGroup="RMID_0_CORE_ENERGY" datatypeIDREF="tcounter_valid" sampleID="1">
<TELI:description>If set, RMID_0_CORE_ENERGY counter reading is valid</TELI: description>
<TELI:SampleType>Snapshot</TELI:SampleType>
<TransFormInputs xmlns="http://schemas.intel.com/telemetry/base/common">
<TransFormInput varName="parameter_0">
<sampleGroupIDREF>Container_0</sampleGroupIDREF>
<sampleIDREF>RMID_0_CORE_ENERGY_VALID</sampleIDREF>
</TransFormInput>
</TransFormInputs>
<TELI:transformREF>passthru</TELI:transformREF>
</TELI:T_AggregatorSample>
This verbosity repeats for each of the events for RMID 0, then for RMID 1, 2, ...
The important bits for Linux are the name, the type, and the valid bit.
I will add more comments to the Linux structures when they are added
in one of the later patches in this series.
>
> Even though changelog mentions "event counters" I am not able to
> recognize any unique events provided by this interface. Instead it
> just seems to provide a memory region that is entirely up to resctrl
> to interpret based on the "unique identifier" hinted to in cover letter.
> I could not find any description that connects the "unique identifier"
> to the "guid" used in following patches. I think it will be helpful to
> upfront connect the high level "unique identifier" with the "guid" that
> the patches use to make this obvious.
>
> Closest information to something that can be used by resctrl is
> "num_rmids". Could you please add information on how "num_rmids" relates
> to the memory region that is only specified via an addr and size?
>
> >
> > Signed-off-by: Tony Luck <tony.luck@intel.com>
> > ---
> > .../cpu/resctrl/fake_intel_aet_features.h | 73 +++++++++++++++++++
> > .../cpu/resctrl/fake_intel_aet_features.c | 65 +++++++++++++++++
> > arch/x86/Kconfig | 1 +
> > arch/x86/kernel/cpu/resctrl/Makefile | 1 +
> > drivers/platform/x86/intel/pmt/Kconfig | 3 +
> > 5 files changed, 143 insertions(+)
> > create mode 100644 arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.h
> > create mode 100644 arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.c
> >
> > diff --git a/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.h b/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.h
> > new file mode 100644
> > index 000000000000..c835c4108abc
> > --- /dev/null
> > +++ b/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.h
> > @@ -0,0 +1,73 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +
> > +/* Bits stolen from OOBMSM VSEC discovery code */
> > +
> > +enum pmt_feature_id {
> > + FEATURE_INVALID = 0x0,
> > + FEATURE_PER_CORE_PERF_TELEM = 0x1,
> > + FEATURE_PER_CORE_ENV_TELEM = 0x2,
> > + FEATURE_PER_RMID_PERF_TELEM = 0x3,
> > + FEATURE_ACCEL_TELEM = 0x4,
> > + FEATURE_UNCORE_TELEM = 0x5,
> > + FEATURE_CRASH_LOG = 0x6,
> > + FEATURE_PETE_LOG = 0x7,
> > + FEATURE_TPMI_CTRL = 0x8,
> > + FEATURE_RESERVED = 0x9,
> > + FEATURE_TRACING = 0xA,
> > + FEATURE_PER_RMID_ENERGY_TELEM = 0xB,
> > + FEATURE_MAX = 0xB,
> > +};
> > +
> > +/**
> > + * struct oobmsm_plat_info - Platform information for a device instance
> > + * @cdie_mask: Mask of all compute dies in the partition
> > + * @package_id: CPU Package id
> > + * @partition: Package partition id when multiple VSEC PCI devices per package
> > + * @segment: PCI segment ID
> > + * @bus_number: PCI bus number
> > + * @device_number: PCI device number
> > + * @function_number: PCI function number
> > + *
> > + * Structure to store platform data for a OOBMSM device instance.
> > + */
> > +struct oobmsm_plat_info {
> > + u16 cdie_mask;
> > + u8 package_id;
> > + u8 partition;
> > + u8 segment;
> > + u8 bus_number;
> > + u8 device_number;
> > + u8 function_number;
> > +};
> > +
> > +enum oobmsm_supplier_type {
> > + OOBMSM_SUP_PLAT_INFO,
> > + OOBMSM_SUP_DISC_INFO,
> > + OOBMSM_SUP_S3M_SIMICS,
> > + OOBMSM_SUP_TYPE_MAX
> > +};
> > +
> > +struct oobmsm_mapping_supplier {
> > + struct device *supplier_dev[OOBMSM_SUP_TYPE_MAX];
> > + struct oobmsm_plat_info plat_info;
> > + unsigned long features;
> > +};
> > +
> > +struct telemetry_region {
> > + struct oobmsm_plat_info plat_info;
> > + void __iomem *addr;
> > + size_t size;
> > + u32 guid;
> > + u32 num_rmids;
> > +};
>
> Could there be some description of what a "telemetry_region" is and
> how the members should be interpreted by resctrl?
As mentioned above. I passed this request to David Box.
> > +
> > +struct pmt_feature_group {
> > + enum pmt_feature_id id;
> > + int count;
> > + struct kref kref;
> > + struct telemetry_region regions[];
> > +};
> > +
> > +struct pmt_feature_group *intel_pmt_get_regions_by_feature(enum pmt_feature_id id);
> > +
> > +void intel_pmt_put_feature_group(struct pmt_feature_group *feature_group);
> > diff --git a/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.c b/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.c
> > new file mode 100644
> > index 000000000000..b537068d99fb
> > --- /dev/null
> > +++ b/arch/x86/kernel/cpu/resctrl/fake_intel_aet_features.c
> > @@ -0,0 +1,65 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +#include <linux/cleanup.h>
> > +#include <linux/minmax.h>
> > +#include <linux/slab.h>
> > +#include "fake_intel_aet_features.h"
> > +#include <linux/intel_vsec.h>
> > +#include <linux/resctrl.h>
> > +
> > +#include "internal.h"
> > +
> > +#define ENERGY_QWORDS ((576 * 2) + 3)
> > +#define PERF_QWORDS ((576 * 7) + 3)
> > +
> > +static long pg[4 * ENERGY_QWORDS + 2 * PERF_QWORDS];
> > +
> > +static int __init fill(void)
> > +{
> > + u64 val = 0;
> > +
> > + for (int i = 0; i < sizeof(pg); i += sizeof(val)) {
> > + pg[i / sizeof(val)] = BIT_ULL(63) + val;
> > + val++;
> > + }
> > + return 0;
> > +}
> > +device_initcall(fill);
> > +
> > +#define PKG_REGION(_entry, _guid, _addr, _pkg) \
> > + [_entry] = { .guid = _guid, .addr = (void __iomem *)_addr, .plat_info = { .package_id = _pkg }}
> > +
> > +static struct pmt_feature_group fake_energy = {
> > + .count = 4,
> > + .regions = {
> > + PKG_REGION(0, 0x26696143, &pg[0 * ENERGY_QWORDS], 0),
> > + PKG_REGION(1, 0x26696143, &pg[1 * ENERGY_QWORDS], 0),
> > + PKG_REGION(2, 0x26696143, &pg[2 * ENERGY_QWORDS], 1),
> > + PKG_REGION(3, 0x26696143, &pg[3 * ENERGY_QWORDS], 1)
> > + }
> > +};
> > +
> > +static struct pmt_feature_group fake_perf = {
> > + .count = 2,
> > + .regions = {
> > + PKG_REGION(0, 0x26557651, &pg[4 * ENERGY_QWORDS + 0 * PERF_QWORDS], 0),
> > + PKG_REGION(1, 0x26557651, &pg[4 * ENERGY_QWORDS + 1 * PERF_QWORDS], 1)
> > + }
> > +};
>
> Could there be some guidance on how to interpret the hardcoded data? For example,
> one group contains two regions and the other four. Was this just done for testing
> to ensure implementation supports multiple regions per package or ...?
> Is it expected that multiple feature groups could have different number of regions?
> I also think initializing the id would be helpful to understand the example better.
You are correct. The varying number of regions was simply to exercise
the code that needs to aggregate values from multiple regions. The
first implementation of this has homogeneous aggregators all working
for the same events from the same CPUs. But I don't see that as a
requirement. I could envision a system with different aggregators
for different events, perhaps servicing different groups of CPUs on
the same package.
> > +
> > +struct pmt_feature_group *
> > +intel_pmt_get_regions_by_feature(enum pmt_feature_id id)
> > +{
> > + switch (id) {
> > + case FEATURE_PER_RMID_ENERGY_TELEM:
> > + return &fake_energy;
> > + case FEATURE_PER_RMID_PERF_TELEM:
> > + return &fake_perf;
> > + default:
> > + return ERR_PTR(-ENOENT);
> > + }
> > + return ERR_PTR(-ENOENT);
> > +}
> > +
> > +void intel_pmt_put_feature_group(struct pmt_feature_group *feature_group)
> > +{
> > +}
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index ea29d22a621f..6112cb6cad05 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -504,6 +504,7 @@ config X86_CPU_RESCTRL
> > bool "x86 CPU resource control support"
> > depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
> > depends on MISC_FILESYSTEMS
> > + select INTEL_AET_RESCTRL if X86_64
>
> I expect something like this will stay (i.e. not part of the "Fake" code).
> In this case, should this perhaps only be selected on Intel (CPU_SUP_INTEL)?
> (nit: the tab is unexpected)
Yes, this part isn't fake, and is should depend on CPU_SUP_INTEL.
> > select ARCH_HAS_CPU_RESCTRL
> > select RESCTRL_FS
> > select RESCTRL_FS_PSEUDO_LOCK
> > diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
> > index 909be78ec6da..2c3b09f95127 100644
> > --- a/arch/x86/kernel/cpu/resctrl/Makefile
> > +++ b/arch/x86/kernel/cpu/resctrl/Makefile
> > @@ -2,6 +2,7 @@
> > obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o
> > obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o
> > obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK) += pseudo_lock.o
> > +obj-$(CONFIG_INTEL_AET_RESCTRL) += fake_intel_aet_features.o
> >
> > # To allow define_trace.h's recursive include:
> > CFLAGS_pseudo_lock.o = -I$(src)
> > diff --git a/drivers/platform/x86/intel/pmt/Kconfig b/drivers/platform/x86/intel/pmt/Kconfig
> > index e916fc966221..6d3b1f64efe9 100644
> > --- a/drivers/platform/x86/intel/pmt/Kconfig
> > +++ b/drivers/platform/x86/intel/pmt/Kconfig
> > @@ -38,3 +38,6 @@ config INTEL_PMT_CRASHLOG
> >
> > To compile this driver as a module, choose M here: the module
> > will be called intel_pmt_crashlog.
> > +
> > +config INTEL_AET_RESCTRL
> > + bool
>
> I expect that this will also stay ... if so, could this be expanded to
> have needed dependency and also be accompanied by some documentation. Something like
> "Architecture selects this when ...." This will make it clear that this is
> not something a user will select during kernel build while also explaining
> what it is used for.
This part will also stay. I will add a comment as you suggest.
> Reinette
-Tony
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH v2 02/16] x86/resctrl: Move L3 initialization out of domain_add_cpu_mon()
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
2025-03-21 23:15 ` [PATCH v2 01/16] x86/rectrl: Fake OOBMSM interface Tony Luck
@ 2025-03-21 23:15 ` Tony Luck
2025-03-21 23:15 ` [PATCH v2 03/16] x86/resctrl: Refactor domain_remove_cpu_mon() ready for new domain types Tony Luck
` (13 subsequent siblings)
15 siblings, 0 replies; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:15 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
To prepare for additional types of monitoring domains, move all the
L3 specific initialization into a helper function.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/resctrl/core.c | 52 +++++++++++++++++-------------
1 file changed, 30 insertions(+), 22 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 6ed0d4f5d6a3..4205aeb4f979 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -492,33 +492,12 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
}
}
-static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
+static void setup_l3_mon_domain(int cpu, int id, struct rdt_resource *r, struct list_head *add_pos)
{
- int id = get_domain_id_from_scope(cpu, r->mon_scope);
- struct list_head *add_pos = NULL;
struct rdt_hw_mon_domain *hw_dom;
- struct rdt_domain_hdr *hdr;
struct rdt_mon_domain *d;
int err;
- lockdep_assert_held(&domain_list_lock);
-
- if (id < 0) {
- pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
- cpu, r->mon_scope, r->name);
- return;
- }
-
- hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos);
- if (hdr) {
- if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
- return;
- d = container_of(hdr, struct rdt_mon_domain, hdr);
-
- cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
- return;
- }
-
hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
if (!hw_dom)
return;
@@ -551,6 +530,35 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
}
}
+static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
+{
+ int id = get_domain_id_from_scope(cpu, r->mon_scope);
+ struct list_head *add_pos = NULL;
+ struct rdt_domain_hdr *hdr;
+
+ lockdep_assert_held(&domain_list_lock);
+
+ if (id < 0) {
+ pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->mon_scope, r->name);
+ return;
+ }
+
+ hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos);
+ if (hdr) {
+ cpumask_set_cpu(cpu, &hdr->cpu_mask);
+ return;
+ }
+
+ switch (r->rid) {
+ case RDT_RESOURCE_L3:
+ setup_l3_mon_domain(cpu, id, r, add_pos);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+
static void domain_add_cpu(int cpu, struct rdt_resource *r)
{
if (r->alloc_capable)
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH v2 03/16] x86/resctrl: Refactor domain_remove_cpu_mon() ready for new domain types
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
2025-03-21 23:15 ` [PATCH v2 01/16] x86/rectrl: Fake OOBMSM interface Tony Luck
2025-03-21 23:15 ` [PATCH v2 02/16] x86/resctrl: Move L3 initialization out of domain_add_cpu_mon() Tony Luck
@ 2025-03-21 23:15 ` Tony Luck
2025-03-21 23:15 ` [PATCH v2 04/16] x86/resctrl: Change generic monitor functions to use struct rdt_domain_hdr Tony Luck
` (12 subsequent siblings)
15 siblings, 0 replies; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:15 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
Different types of domains require different actions when the last
CPU in the domain is removed.
Refactor to make it easy to add new actions for new types of domains.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/resctrl/core.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 4205aeb4f979..3c343d0d18fb 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -635,20 +635,21 @@ static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
return;
}
- if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
+ cpumask_clear_cpu(cpu, &hdr->cpu_mask);
+ if (!cpumask_empty(&hdr->cpu_mask))
return;
d = container_of(hdr, struct rdt_mon_domain, hdr);
- hw_dom = resctrl_to_arch_mon_dom(d);
- cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
- if (cpumask_empty(&d->hdr.cpu_mask)) {
+ switch (r->rid) {
+ case RDT_RESOURCE_L3:
+ hw_dom = resctrl_to_arch_mon_dom(d);
+
resctrl_offline_mon_domain(r, d);
- list_del_rcu(&d->hdr.list);
+ list_del_rcu(&hdr->list);
synchronize_rcu();
mon_domain_free(hw_dom);
-
- return;
+ break;
}
}
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH v2 04/16] x86/resctrl: Change generic monitor functions to use struct rdt_domain_hdr
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
` (2 preceding siblings ...)
2025-03-21 23:15 ` [PATCH v2 03/16] x86/resctrl: Refactor domain_remove_cpu_mon() ready for new domain types Tony Luck
@ 2025-03-21 23:15 ` Tony Luck
2025-03-31 16:15 ` Reinette Chatre
2025-03-21 23:15 ` [PATCH v2 05/16] x86/resctrl: Add and initialize rdt_resource for package scope core monitor Tony Luck
` (11 subsequent siblings)
15 siblings, 1 reply; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:15 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
Functions that don't need the internal details of the rdt_mon_domain
can operate on just the rdt_domain_hdr.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
include/linux/resctrl.h | 2 +-
arch/x86/kernel/cpu/resctrl/core.c | 2 +-
fs/resctrl/rdtgroup.c | 57 +++++++++++++++++++-----------
3 files changed, 39 insertions(+), 22 deletions(-)
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 5c7c8bf2c47f..25c3ee78de3d 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -413,7 +413,7 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type type);
int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d);
-int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d);
+int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr);
void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d);
void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d);
void resctrl_online_cpu(unsigned int cpu);
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 3c343d0d18fb..c316268b4442 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -522,7 +522,7 @@ static void setup_l3_mon_domain(int cpu, int id, struct rdt_resource *r, struct
list_add_tail_rcu(&d->hdr.list, add_pos);
- err = resctrl_online_mon_domain(r, d);
+ err = resctrl_online_mon_domain(r, &d->hdr);
if (err) {
list_del_rcu(&d->hdr.list);
synchronize_rcu();
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index 234ec9dbe5b3..dbfb7d3bc3bc 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -3048,7 +3048,7 @@ static void mon_rmdir_one_subdir(struct rdtgroup *rdtgrp, char *name, char *subn
* when last domain being summed is removed.
*/
static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
- struct rdt_mon_domain *d)
+ struct rdt_domain_hdr *hdr)
{
struct rdtgroup *prgrp, *crgrp;
char subname[32];
@@ -3056,9 +3056,15 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
char name[32];
snc_mode = r->mon_scope == RESCTRL_L3_NODE;
- sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
- if (snc_mode)
- sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
+ if (snc_mode) {
+ struct rdt_mon_domain *d;
+
+ d = container_of(hdr, struct rdt_mon_domain, hdr);
+ sprintf(name, "mon_%s_%02d", r->name, d->ci->id);
+ sprintf(subname, "mon_sub_%s_%02d", r->name, hdr->id);
+ } else {
+ sprintf(name, "mon_%s_%02d", r->name, hdr->id);
+ }
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
mon_rmdir_one_subdir(prgrp, name, subname);
@@ -3068,11 +3074,12 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
}
}
-static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
+static int mon_add_all_files(struct kernfs_node *kn, struct rdt_domain_hdr *hdr,
struct rdt_resource *r, struct rdtgroup *prgrp,
bool do_sum)
{
struct rmid_read rr = {0};
+ struct rdt_mon_domain *d;
struct mon_data *priv;
struct mon_evt *mevt;
int ret;
@@ -3080,6 +3087,8 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
if (WARN_ON(list_empty(&r->evt_list)))
return -EPERM;
+ d = container_of(hdr, struct rdt_mon_domain, hdr);
+
list_for_each_entry(mevt, &r->evt_list, list) {
priv = mon_get_default_kn_priv(r, d, mevt, prgrp, do_sum);
if (WARN_ON_ONCE(!priv))
@@ -3089,7 +3098,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
if (ret)
return ret;
- if (!do_sum && resctrl_is_mbm_event(mevt->evtid))
+ if (r->rid == RDT_RESOURCE_L3 && !do_sum && resctrl_is_mbm_event(mevt->evtid))
mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
}
@@ -3097,10 +3106,11 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
}
static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
- struct rdt_mon_domain *d,
+ struct rdt_domain_hdr *hdr,
struct rdt_resource *r, struct rdtgroup *prgrp)
{
struct kernfs_node *kn, *ckn;
+ struct rdt_mon_domain *d;
char name[32];
bool snc_mode;
int ret = 0;
@@ -3108,7 +3118,12 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
lockdep_assert_held(&rdtgroup_mutex);
snc_mode = r->mon_scope == RESCTRL_L3_NODE;
- sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ if (snc_mode) {
+ d = container_of(hdr, struct rdt_mon_domain, hdr);
+ sprintf(name, "mon_%s_%02d", r->name, d->ci->id);
+ } else {
+ sprintf(name, "mon_%s_%02d", r->name, hdr->id);
+ }
kn = kernfs_find_and_get(parent_kn, name);
if (kn) {
/*
@@ -3124,7 +3139,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
ret = rdtgroup_kn_set_ugid(kn);
if (ret)
goto out_destroy;
- ret = mon_add_all_files(kn, d, r, prgrp, snc_mode);
+ ret = mon_add_all_files(kn, hdr, r, prgrp, snc_mode);
if (ret)
goto out_destroy;
}
@@ -3141,7 +3156,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
if (ret)
goto out_destroy;
- ret = mon_add_all_files(ckn, d, r, prgrp, false);
+ ret = mon_add_all_files(ckn, hdr, r, prgrp, false);
if (ret)
goto out_destroy;
}
@@ -3159,7 +3174,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
* and "monitor" groups with given domain id.
*/
static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
- struct rdt_mon_domain *d)
+ struct rdt_domain_hdr *hdr)
{
struct kernfs_node *parent_kn;
struct rdtgroup *prgrp, *crgrp;
@@ -3170,17 +3185,17 @@ static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
* so that mon_get_default_kn_priv() can find the allocated structure
* on subsequent calls.
*/
- mkdir_mondata_subdir(kn_mondata, d, r, &rdtgroup_default);
+ mkdir_mondata_subdir(kn_mondata, hdr, r, &rdtgroup_default);
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
parent_kn = prgrp->mon.mon_data_kn;
if (prgrp != &rdtgroup_default)
- mkdir_mondata_subdir(parent_kn, d, r, prgrp);
+ mkdir_mondata_subdir(parent_kn, hdr, r, prgrp);
head = &prgrp->mon.crdtgrp_list;
list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
parent_kn = crgrp->mon.mon_data_kn;
- mkdir_mondata_subdir(parent_kn, d, r, crgrp);
+ mkdir_mondata_subdir(parent_kn, hdr, r, crgrp);
}
}
}
@@ -3189,14 +3204,14 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
struct rdt_resource *r,
struct rdtgroup *prgrp)
{
- struct rdt_mon_domain *dom;
+ struct rdt_domain_hdr *hdr;
int ret;
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
- list_for_each_entry(dom, &r->mon_domains, hdr.list) {
- ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
+ list_for_each_entry(hdr, &r->mon_domains, list) {
+ ret = mkdir_mondata_subdir(parent_kn, hdr, r, prgrp);
if (ret)
return ret;
}
@@ -4055,7 +4070,7 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d
* per domain monitor data directories.
*/
if (resctrl_mounted && resctrl_arch_mon_capable())
- rmdir_mondata_subdir_allrdtgrp(r, d);
+ rmdir_mondata_subdir_allrdtgrp(r, &d->hdr);
if (resctrl_is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
@@ -4137,10 +4152,12 @@ int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d
return err;
}
-int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
+int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr)
{
+ struct rdt_mon_domain *d;
int err;
+ d = container_of(hdr, struct rdt_mon_domain, hdr);
mutex_lock(&rdtgroup_mutex);
err = domain_setup_mon_state(r, d);
@@ -4163,7 +4180,7 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
* If resctrl is mounted, add per domain monitor data directories.
*/
if (resctrl_mounted && resctrl_arch_mon_capable())
- mkdir_mondata_subdir_allrdtgrp(r, d);
+ mkdir_mondata_subdir_allrdtgrp(r, hdr);
out_unlock:
mutex_unlock(&rdtgroup_mutex);
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* Re: [PATCH v2 04/16] x86/resctrl: Change generic monitor functions to use struct rdt_domain_hdr
2025-03-21 23:15 ` [PATCH v2 04/16] x86/resctrl: Change generic monitor functions to use struct rdt_domain_hdr Tony Luck
@ 2025-03-31 16:15 ` Reinette Chatre
2025-03-31 21:14 ` Luck, Tony
0 siblings, 1 reply; 37+ messages in thread
From: Reinette Chatre @ 2025-03-31 16:15 UTC (permalink / raw)
To: Tony Luck, Fenghua Yu, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches
Hi Tony,
On 3/21/25 4:15 PM, Tony Luck wrote:
> Functions that don't need the internal details of the rdt_mon_domain
> can operate on just the rdt_domain_hdr.
This does not seem accurate. The functions are modified to take rdt_domain_hdr
as parameter but then the functions are modified to extract rdt_mon_domain
based on rdt_domain_hdr .... and proceeds to operate on internals of
rdt_mon_domain in a way that contradicts the changelog.
Considering what comes later this seems risky to me to rely on the
code flow to interpret which structure rdt_domain_hdr forms part of. I think
that it will be safer if rdt_domain_hdr gets an identifier that reflects which
structure it forms part of so that the accessors could be made explicit and
have error checking.
Reinette
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v2 04/16] x86/resctrl: Change generic monitor functions to use struct rdt_domain_hdr
2025-03-31 16:15 ` Reinette Chatre
@ 2025-03-31 21:14 ` Luck, Tony
0 siblings, 0 replies; 37+ messages in thread
From: Luck, Tony @ 2025-03-31 21:14 UTC (permalink / raw)
To: Reinette Chatre
Cc: Fenghua Yu, Maciej Wieczor-Retman, Peter Newman, James Morse,
Babu Moger, Drew Fustini, Dave Martin, linux-kernel, patches
On Mon, Mar 31, 2025 at 09:15:36AM -0700, Reinette Chatre wrote:
> Hi Tony,
>
> On 3/21/25 4:15 PM, Tony Luck wrote:
> > Functions that don't need the internal details of the rdt_mon_domain
> > can operate on just the rdt_domain_hdr.
>
> This does not seem accurate. The functions are modified to take rdt_domain_hdr
> as parameter but then the functions are modified to extract rdt_mon_domain
> based on rdt_domain_hdr .... and proceeds to operate on internals of
> rdt_mon_domain in a way that contradicts the changelog.
>
> Considering what comes later this seems risky to me to rely on the
> code flow to interpret which structure rdt_domain_hdr forms part of. I think
> that it will be safer if rdt_domain_hdr gets an identifier that reflects which
> structure it forms part of so that the accessors could be made explicit and
> have error checking.
This needs some more thought and cleanup. I ended up with a mix of
places that really just wanted the header. E.g. adding second and subsequent
CPUs to a domain, or deleting any but the last CPU from a domain, only
need to update the cpu_mask. But other places end up clumsily using
container_of() to get to the surrounding mon_domain. With different
types of mon_domain a type field would help sanity checking.
>
> Reinette
-Tony
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH v2 05/16] x86/resctrl: Add and initialize rdt_resource for package scope core monitor
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
` (3 preceding siblings ...)
2025-03-21 23:15 ` [PATCH v2 04/16] x86/resctrl: Change generic monitor functions to use struct rdt_domain_hdr Tony Luck
@ 2025-03-21 23:15 ` Tony Luck
2025-03-31 16:18 ` Reinette Chatre
2025-03-21 23:15 ` [PATCH v2 06/16] x86/resctrl: Prepare for resource specific event ids Tony Luck
` (10 subsequent siblings)
15 siblings, 1 reply; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:15 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
New resource for monitoring core events reported at package level.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
include/linux/resctrl.h | 1 +
include/linux/resctrl_types.h | 1 +
fs/resctrl/internal.h | 2 ++
arch/x86/kernel/cpu/resctrl/core.c | 11 +++++++++++
fs/resctrl/rdtgroup.c | 2 ++
5 files changed, 17 insertions(+)
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 25c3ee78de3d..6c895ec220fe 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -233,6 +233,7 @@ enum resctrl_scope {
RESCTRL_L2_CACHE = 2,
RESCTRL_L3_CACHE = 3,
RESCTRL_L3_NODE,
+ RESCTRL_PACKAGE,
};
/**
diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
index a7faf2cd5406..8f967e03af5a 100644
--- a/include/linux/resctrl_types.h
+++ b/include/linux/resctrl_types.h
@@ -39,6 +39,7 @@ enum resctrl_res_level {
RDT_RESOURCE_L2,
RDT_RESOURCE_MBA,
RDT_RESOURCE_SMBA,
+ RDT_RESOURCE_INTEL_AET,
/* Must be the last */
RDT_NUM_RESOURCES,
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index ec3863d18f68..3a100007301d 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -240,6 +240,8 @@ struct rdtgroup {
#define RFTYPE_DEBUG BIT(10)
+#define RFTYPE_RES_PKG BIT(11)
+
#define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL)
#define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON)
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index c316268b4442..c8cc3104f56c 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -99,6 +99,15 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
.schema_fmt = RESCTRL_SCHEMA_RANGE,
},
},
+ [RDT_RESOURCE_INTEL_AET] =
+ {
+ .r_resctrl = {
+ .rid = RDT_RESOURCE_INTEL_AET,
+ .name = "PKG",
+ .mon_scope = RESCTRL_PACKAGE,
+ .mon_domains = mon_domain_init(RDT_RESOURCE_INTEL_AET),
+ },
+ },
};
u32 resctrl_arch_system_num_rmid_idx(void)
@@ -430,6 +439,8 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
return get_cpu_cacheinfo_id(cpu, scope);
case RESCTRL_L3_NODE:
return cpu_to_node(cpu);
+ case RESCTRL_PACKAGE:
+ return topology_physical_package_id(cpu);
default:
break;
}
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index dbfb7d3bc3bc..a90291f57330 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -2179,6 +2179,8 @@ static unsigned long fflags_from_resource(struct rdt_resource *r)
case RDT_RESOURCE_MBA:
case RDT_RESOURCE_SMBA:
return RFTYPE_RES_MB;
+ case RDT_RESOURCE_INTEL_AET:
+ return RFTYPE_RES_PKG;
}
return WARN_ON_ONCE(1);
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* Re: [PATCH v2 05/16] x86/resctrl: Add and initialize rdt_resource for package scope core monitor
2025-03-21 23:15 ` [PATCH v2 05/16] x86/resctrl: Add and initialize rdt_resource for package scope core monitor Tony Luck
@ 2025-03-31 16:18 ` Reinette Chatre
2025-03-31 21:22 ` Luck, Tony
0 siblings, 1 reply; 37+ messages in thread
From: Reinette Chatre @ 2025-03-31 16:18 UTC (permalink / raw)
To: Tony Luck, Fenghua Yu, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches
Hi Tony,
On 3/21/25 4:15 PM, Tony Luck wrote:
> New resource for monitoring core events reported at package level.
Could you please add proper (per Documentation/process/maintainer-tip.rst)
changelogs to all patches in the series?
>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
> include/linux/resctrl.h | 1 +
> include/linux/resctrl_types.h | 1 +
> fs/resctrl/internal.h | 2 ++
> arch/x86/kernel/cpu/resctrl/core.c | 11 +++++++++++
> fs/resctrl/rdtgroup.c | 2 ++
> 5 files changed, 17 insertions(+)
>
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 25c3ee78de3d..6c895ec220fe 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -233,6 +233,7 @@ enum resctrl_scope {
> RESCTRL_L2_CACHE = 2,
> RESCTRL_L3_CACHE = 3,
> RESCTRL_L3_NODE,
> + RESCTRL_PACKAGE,
> };
>
> /**
> diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
> index a7faf2cd5406..8f967e03af5a 100644
> --- a/include/linux/resctrl_types.h
> +++ b/include/linux/resctrl_types.h
> @@ -39,6 +39,7 @@ enum resctrl_res_level {
> RDT_RESOURCE_L2,
> RDT_RESOURCE_MBA,
> RDT_RESOURCE_SMBA,
> + RDT_RESOURCE_INTEL_AET,
This is fs code so architecture specific code needs to be avoided. Any other
architecture that may need to report similar data would be forced to use this
resource name.
>
> /* Must be the last */
> RDT_NUM_RESOURCES,
> diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
> index ec3863d18f68..3a100007301d 100644
> --- a/fs/resctrl/internal.h
> +++ b/fs/resctrl/internal.h
> @@ -240,6 +240,8 @@ struct rdtgroup {
>
> #define RFTYPE_DEBUG BIT(10)
>
> +#define RFTYPE_RES_PKG BIT(11)
"PKG" is not the resource but instead the scope, no?
> +
> #define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL)
>
> #define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON)
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index c316268b4442..c8cc3104f56c 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -99,6 +99,15 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
> .schema_fmt = RESCTRL_SCHEMA_RANGE,
> },
> },
> + [RDT_RESOURCE_INTEL_AET] =
> + {
> + .r_resctrl = {
> + .rid = RDT_RESOURCE_INTEL_AET,
> + .name = "PKG",
The resource name should not be the scope. It should be a name that reflects the
resource being monitored. In this case it is "core"/"cpu"? I understand that this may
create confusion since the data is provided at package scope so perhaps the "resource"
can be "perf" and then all the events can include "core" in their name? If the intention
is to guide user space in how to interpret the domain IDs then we could consider
something like "perf_pkg" or even "phys_pkg_perf" that prepares resctrl for future perf
events that may need reporting at a different scope?
> + .mon_scope = RESCTRL_PACKAGE,
> + .mon_domains = mon_domain_init(RDT_RESOURCE_INTEL_AET),
> + },
> + },
> };
>
> u32 resctrl_arch_system_num_rmid_idx(void)
> @@ -430,6 +439,8 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
> return get_cpu_cacheinfo_id(cpu, scope);
> case RESCTRL_L3_NODE:
> return cpu_to_node(cpu);
> + case RESCTRL_PACKAGE:
> + return topology_physical_package_id(cpu);
> default:
> break;
> }
> diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> index dbfb7d3bc3bc..a90291f57330 100644
> --- a/fs/resctrl/rdtgroup.c
> +++ b/fs/resctrl/rdtgroup.c
> @@ -2179,6 +2179,8 @@ static unsigned long fflags_from_resource(struct rdt_resource *r)
> case RDT_RESOURCE_MBA:
> case RDT_RESOURCE_SMBA:
> return RFTYPE_RES_MB;
> + case RDT_RESOURCE_INTEL_AET:
> + return RFTYPE_RES_PKG;
> }
>
> return WARN_ON_ONCE(1);
Reinette
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [PATCH v2 05/16] x86/resctrl: Add and initialize rdt_resource for package scope core monitor
2025-03-31 16:18 ` Reinette Chatre
@ 2025-03-31 21:22 ` Luck, Tony
2025-03-31 23:49 ` Reinette Chatre
0 siblings, 1 reply; 37+ messages in thread
From: Luck, Tony @ 2025-03-31 21:22 UTC (permalink / raw)
To: Reinette Chatre
Cc: Fenghua Yu, Maciej Wieczor-Retman, Peter Newman, James Morse,
Babu Moger, Drew Fustini, Dave Martin, linux-kernel, patches
On Mon, Mar 31, 2025 at 09:18:12AM -0700, Reinette Chatre wrote:
> Hi Tony,
>
> On 3/21/25 4:15 PM, Tony Luck wrote:
> > New resource for monitoring core events reported at package level.
>
> Could you please add proper (per Documentation/process/maintainer-tip.rst)
> changelogs to all patches in the series?
Will do.
> >
> > Signed-off-by: Tony Luck <tony.luck@intel.com>
> > ---
> > include/linux/resctrl.h | 1 +
> > include/linux/resctrl_types.h | 1 +
> > fs/resctrl/internal.h | 2 ++
> > arch/x86/kernel/cpu/resctrl/core.c | 11 +++++++++++
> > fs/resctrl/rdtgroup.c | 2 ++
> > 5 files changed, 17 insertions(+)
> >
> > diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> > index 25c3ee78de3d..6c895ec220fe 100644
> > --- a/include/linux/resctrl.h
> > +++ b/include/linux/resctrl.h
> > @@ -233,6 +233,7 @@ enum resctrl_scope {
> > RESCTRL_L2_CACHE = 2,
> > RESCTRL_L3_CACHE = 3,
> > RESCTRL_L3_NODE,
> > + RESCTRL_PACKAGE,
> > };
> >
> > /**
> > diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
> > index a7faf2cd5406..8f967e03af5a 100644
> > --- a/include/linux/resctrl_types.h
> > +++ b/include/linux/resctrl_types.h
> > @@ -39,6 +39,7 @@ enum resctrl_res_level {
> > RDT_RESOURCE_L2,
> > RDT_RESOURCE_MBA,
> > RDT_RESOURCE_SMBA,
> > + RDT_RESOURCE_INTEL_AET,
>
> This is fs code so architecture specific code needs to be avoided. Any other
> architecture that may need to report similar data would be forced to use this
> resource name.
Maybe RDT_RESOURCE_PERF?
> >
> > /* Must be the last */
> > RDT_NUM_RESOURCES,
> > diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
> > index ec3863d18f68..3a100007301d 100644
> > --- a/fs/resctrl/internal.h
> > +++ b/fs/resctrl/internal.h
> > @@ -240,6 +240,8 @@ struct rdtgroup {
> >
> > #define RFTYPE_DEBUG BIT(10)
> >
> > +#define RFTYPE_RES_PKG BIT(11)
>
> "PKG" is not the resource but instead the scope, no?
Correct. I missed this one when doing some renaming. It should
get a "perf" name (to match what we pick for the RDT_RESOURCE)
> > +
> > #define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL)
> >
> > #define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON)
> > diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> > index c316268b4442..c8cc3104f56c 100644
> > --- a/arch/x86/kernel/cpu/resctrl/core.c
> > +++ b/arch/x86/kernel/cpu/resctrl/core.c
> > @@ -99,6 +99,15 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
> > .schema_fmt = RESCTRL_SCHEMA_RANGE,
> > },
> > },
> > + [RDT_RESOURCE_INTEL_AET] =
> > + {
> > + .r_resctrl = {
> > + .rid = RDT_RESOURCE_INTEL_AET,
> > + .name = "PKG",
>
> The resource name should not be the scope. It should be a name that reflects the
> resource being monitored. In this case it is "core"/"cpu"? I understand that this may
> create confusion since the data is provided at package scope so perhaps the "resource"
> can be "perf" and then all the events can include "core" in their name? If the intention
> is to guide user space in how to interpret the domain IDs then we could consider
> something like "perf_pkg" or even "phys_pkg_perf" that prepares resctrl for future perf
> events that may need reporting at a different scope?
"perf_pkg" looks like a good option. Should it be "PERF_PKG". It appears
to the user in the names of the "mon_data/mon_%s_%.2d" directories. Also
in info/%s_MON.
Resctrl uses "L3" in captitals in those places.
> > + .mon_scope = RESCTRL_PACKAGE,
> > + .mon_domains = mon_domain_init(RDT_RESOURCE_INTEL_AET),
> > + },
> > + },
> > };
> >
> > u32 resctrl_arch_system_num_rmid_idx(void)
> > @@ -430,6 +439,8 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
> > return get_cpu_cacheinfo_id(cpu, scope);
> > case RESCTRL_L3_NODE:
> > return cpu_to_node(cpu);
> > + case RESCTRL_PACKAGE:
> > + return topology_physical_package_id(cpu);
> > default:
> > break;
> > }
> > diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> > index dbfb7d3bc3bc..a90291f57330 100644
> > --- a/fs/resctrl/rdtgroup.c
> > +++ b/fs/resctrl/rdtgroup.c
> > @@ -2179,6 +2179,8 @@ static unsigned long fflags_from_resource(struct rdt_resource *r)
> > case RDT_RESOURCE_MBA:
> > case RDT_RESOURCE_SMBA:
> > return RFTYPE_RES_MB;
> > + case RDT_RESOURCE_INTEL_AET:
> > + return RFTYPE_RES_PKG;
> > }
> >
> > return WARN_ON_ONCE(1);
>
>
> Reinette
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [PATCH v2 05/16] x86/resctrl: Add and initialize rdt_resource for package scope core monitor
2025-03-31 21:22 ` Luck, Tony
@ 2025-03-31 23:49 ` Reinette Chatre
0 siblings, 0 replies; 37+ messages in thread
From: Reinette Chatre @ 2025-03-31 23:49 UTC (permalink / raw)
To: Luck, Tony
Cc: Fenghua Yu, Maciej Wieczor-Retman, Peter Newman, James Morse,
Babu Moger, Drew Fustini, Dave Martin, linux-kernel, patches
Hi Tony,
On 3/31/25 2:22 PM, Luck, Tony wrote:
> On Mon, Mar 31, 2025 at 09:18:12AM -0700, Reinette Chatre wrote:
>> Hi Tony,
>>
>> On 3/21/25 4:15 PM, Tony Luck wrote:
>>> New resource for monitoring core events reported at package level.
>>
>> Could you please add proper (per Documentation/process/maintainer-tip.rst)
>> changelogs to all patches in the series?
>
> Will do.
>
>>>
>>> Signed-off-by: Tony Luck <tony.luck@intel.com>
>>> ---
>>> include/linux/resctrl.h | 1 +
>>> include/linux/resctrl_types.h | 1 +
>>> fs/resctrl/internal.h | 2 ++
>>> arch/x86/kernel/cpu/resctrl/core.c | 11 +++++++++++
>>> fs/resctrl/rdtgroup.c | 2 ++
>>> 5 files changed, 17 insertions(+)
>>>
>>> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
>>> index 25c3ee78de3d..6c895ec220fe 100644
>>> --- a/include/linux/resctrl.h
>>> +++ b/include/linux/resctrl.h
>>> @@ -233,6 +233,7 @@ enum resctrl_scope {
>>> RESCTRL_L2_CACHE = 2,
>>> RESCTRL_L3_CACHE = 3,
>>> RESCTRL_L3_NODE,
>>> + RESCTRL_PACKAGE,
>>> };
>>>
>>> /**
>>> diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
>>> index a7faf2cd5406..8f967e03af5a 100644
>>> --- a/include/linux/resctrl_types.h
>>> +++ b/include/linux/resctrl_types.h
>>> @@ -39,6 +39,7 @@ enum resctrl_res_level {
>>> RDT_RESOURCE_L2,
>>> RDT_RESOURCE_MBA,
>>> RDT_RESOURCE_SMBA,
>>> + RDT_RESOURCE_INTEL_AET,
>>
>> This is fs code so architecture specific code needs to be avoided. Any other
>> architecture that may need to report similar data would be forced to use this
>> resource name.
>
> Maybe RDT_RESOURCE_PERF?
>
Would be ideal if it matches the name of the resource (below).
>>>
>>> /* Must be the last */
>>> RDT_NUM_RESOURCES,
>>> diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
>>> index ec3863d18f68..3a100007301d 100644
>>> --- a/fs/resctrl/internal.h
>>> +++ b/fs/resctrl/internal.h
>>> @@ -240,6 +240,8 @@ struct rdtgroup {
>>>
>>> #define RFTYPE_DEBUG BIT(10)
>>>
>>> +#define RFTYPE_RES_PKG BIT(11)
>>
>> "PKG" is not the resource but instead the scope, no?
>
> Correct. I missed this one when doing some renaming. It should
> get a "perf" name (to match what we pick for the RDT_RESOURCE)
>
>>> +
>>> #define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL)
>>>
>>> #define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON)
>>> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
>>> index c316268b4442..c8cc3104f56c 100644
>>> --- a/arch/x86/kernel/cpu/resctrl/core.c
>>> +++ b/arch/x86/kernel/cpu/resctrl/core.c
>>> @@ -99,6 +99,15 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
>>> .schema_fmt = RESCTRL_SCHEMA_RANGE,
>>> },
>>> },
>>> + [RDT_RESOURCE_INTEL_AET] =
>>> + {
>>> + .r_resctrl = {
>>> + .rid = RDT_RESOURCE_INTEL_AET,
>>> + .name = "PKG",
>>
>> The resource name should not be the scope. It should be a name that reflects the
>> resource being monitored. In this case it is "core"/"cpu"? I understand that this may
>> create confusion since the data is provided at package scope so perhaps the "resource"
>> can be "perf" and then all the events can include "core" in their name? If the intention
>> is to guide user space in how to interpret the domain IDs then we could consider
>> something like "perf_pkg" or even "phys_pkg_perf" that prepares resctrl for future perf
>> events that may need reporting at a different scope?
>
> "perf_pkg" looks like a good option. Should it be "PERF_PKG". It appears
> to the user in the names of the "mon_data/mon_%s_%.2d" directories. Also
> in info/%s_MON.
>
> Resctrl uses "L3" in captitals in those places.
I do not know where the capitalization custom originated but PERF_PKG does seem like a good start.
Reinette
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH v2 06/16] x86/resctrl: Prepare for resource specific event ids
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
` (4 preceding siblings ...)
2025-03-21 23:15 ` [PATCH v2 05/16] x86/resctrl: Add and initialize rdt_resource for package scope core monitor Tony Luck
@ 2025-03-21 23:15 ` Tony Luck
2025-03-21 23:15 ` [PATCH v2 07/16] x86/resctrl: Add initialization hook for Intel PMT events Tony Luck
` (9 subsequent siblings)
15 siblings, 0 replies; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:15 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
Event ids ought to be specific to each resource. Keep enum resctrl_event_id
for event ids in the RDT_RESOURCE_L3. But change the type in generic
areas to unsigned int so that new resources can have their own event
ids.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
fs/resctrl/internal.h | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index 3a100007301d..422f36573db7 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -69,13 +69,13 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
/**
* struct mon_evt - Entry in the event list of a resource
- * @evtid: event id
+ * @evtid: per resource event id
* @name: name of the event
* @configurable: true if the event is configurable
* @list: entry in &rdt_resource->evt_list
*/
struct mon_evt {
- enum resctrl_event_id evtid;
+ unsigned int evtid;
char *name;
bool configurable;
struct list_head list;
@@ -84,7 +84,7 @@ struct mon_evt {
/**
* struct mon_data - Monitoring details for each event file.
* @rid: Resource id associated with the event file.
- * @evtid: Event id associated with the event file.
+ * @evtid: Per resource event id associated with the event file.
* @sum: Set when event must be summed across multiple
* domains.
* @domid: When @sum is zero this is the domain to which
@@ -97,7 +97,7 @@ struct mon_evt {
*/
struct mon_data {
unsigned int rid;
- enum resctrl_event_id evtid;
+ unsigned int evtid;
unsigned int sum;
unsigned int domid;
};
@@ -124,7 +124,7 @@ struct rmid_read {
struct rdtgroup *rgrp;
struct rdt_resource *r;
struct rdt_mon_domain *d;
- enum resctrl_event_id evtid;
+ unsigned int evtid;
bool first;
struct cacheinfo *ci;
int err;
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH v2 07/16] x86/resctrl: Add initialization hook for Intel PMT events
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
` (5 preceding siblings ...)
2025-03-21 23:15 ` [PATCH v2 06/16] x86/resctrl: Prepare for resource specific event ids Tony Luck
@ 2025-03-21 23:15 ` Tony Luck
2025-03-31 16:20 ` Reinette Chatre
2025-03-21 23:15 ` [PATCH v2 08/16] x86/resctrl: Add Intel PMT domain specific code Tony Luck
` (8 subsequent siblings)
15 siblings, 1 reply; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:15 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
Call the OOBMSM discovery code to find out if there are any
event groups that match unique identifiers understood by resctrl.
Note that initiialzation must happen in two phases because the
OOBMSM VSEC discovery process is not complete at resctrl
"lateinit()" initialization time. So there is an initial hook
that assumes that Intel PMT will exist, called early so that
package scoped domain groups are initialized.
At first mount the remainder of initialization is done. If there
are no Intel PMT events, the package domain lists are removed.
Move definition of struct mon_evt to <linux/resctrl_types.h>
Events for specific systems to be added by a separate patch.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
include/linux/resctrl.h | 14 ++
include/linux/resctrl_types.h | 14 ++
arch/x86/kernel/cpu/resctrl/internal.h | 6 +
fs/resctrl/internal.h | 14 --
arch/x86/kernel/cpu/resctrl/core.c | 9 +-
arch/x86/kernel/cpu/resctrl/intel_aet.c | 211 ++++++++++++++++++++++++
fs/resctrl/rdtgroup.c | 3 +
arch/x86/kernel/cpu/resctrl/Makefile | 1 +
8 files changed, 255 insertions(+), 17 deletions(-)
create mode 100644 arch/x86/kernel/cpu/resctrl/intel_aet.c
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 6c895ec220fe..999e0802a26e 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -170,6 +170,14 @@ struct rdt_mon_domain {
int cqm_work_cpu;
};
+/**
+ * struct rdt_core_mon_domain - CPUs sharing an Intel-PMT-scoped resctrl monitor resource
+ * @hdr: common header for different domain types
+ */
+struct rdt_core_mon_domain {
+ struct rdt_domain_hdr hdr;
+};
+
/**
* struct resctrl_cache - Cache allocation related data
* @cbm_len: Length of the cache bit mask
@@ -522,4 +530,10 @@ extern unsigned int resctrl_rmid_realloc_limit;
int resctrl_init(void);
void resctrl_exit(void);
+#ifdef CONFIG_INTEL_AET_RESCTRL
+void rdt_get_intel_aet_mount(void);
+#else
+static inline void rdt_get_intel_aet_mount(void) { }
+#endif
+
#endif /* _RESCTRL_H */
diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
index 8f967e03af5a..d56fcd082d2c 100644
--- a/include/linux/resctrl_types.h
+++ b/include/linux/resctrl_types.h
@@ -57,4 +57,18 @@ enum resctrl_event_id {
#define QOS_NUM_EVENTS (QOS_L3_MBM_LOCAL_EVENT_ID + 1)
+/**
+ * struct mon_evt - Entry in the event list of a resource
+ * @evtid: per resource event id
+ * @name: name of the event
+ * @configurable: true if the event is configurable
+ * @list: entry in &rdt_resource->evt_list
+ */
+struct mon_evt {
+ unsigned int evtid;
+ char *name;
+ bool configurable;
+ struct list_head list;
+};
+
#endif /* __LINUX_RESCTRL_TYPES_H */
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 521db28efb3f..ada402c7678b 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -168,6 +168,12 @@ void rdt_ctrl_update(void *arg);
int rdt_get_mon_l3_config(struct rdt_resource *r);
+#ifdef CONFIG_INTEL_AET_RESCTRL
+int rdt_get_intel_aet_mon_config(void);
+#else
+static inline int rdt_get_intel_aet_mon_config(void) { return 0; }
+#endif
+
bool rdt_cpu_has(int flag);
void __init intel_rdt_mbm_apply_quirk(void);
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index 422f36573db7..f5a698b49e97 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -67,20 +67,6 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
return container_of(kfc, struct rdt_fs_context, kfc);
}
-/**
- * struct mon_evt - Entry in the event list of a resource
- * @evtid: per resource event id
- * @name: name of the event
- * @configurable: true if the event is configurable
- * @list: entry in &rdt_resource->evt_list
- */
-struct mon_evt {
- unsigned int evtid;
- char *name;
- bool configurable;
- struct list_head list;
-};
-
/**
* struct mon_data - Monitoring details for each event file.
* @rid: Resource id associated with the event file.
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index c8cc3104f56c..1ab0f5eec244 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -882,6 +882,7 @@ static __init bool get_rdt_alloc_resources(void)
static __init bool get_rdt_mon_resources(void)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ int ret1 = -EINVAL, ret2;
if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
@@ -890,10 +891,12 @@ static __init bool get_rdt_mon_resources(void)
if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID);
- if (!rdt_mon_features)
- return false;
+ if (rdt_mon_features)
+ ret1 = rdt_get_mon_l3_config(r);
+
+ ret2 = rdt_get_intel_aet_mon_config();
- return !rdt_get_mon_l3_config(r);
+ return ret1 == 0 || ret2;
}
static __init void __check_quirks_intel(void)
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
new file mode 100644
index 000000000000..9a8ccb62b4ab
--- /dev/null
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Resource Director Technology(RDT)
+ * - Intel Application Energy Telemetry
+ *
+ * Copyright (C) 2025 Intel Corporation
+ *
+ * Author:
+ * Tony Luck <tony.luck@intel.com>
+ */
+
+#define pr_fmt(fmt) "resctrl: " fmt
+
+#include <linux/cpu.h>
+#include <linux/cleanup.h>
+#include <linux/slab.h>
+#include "fake_intel_aet_features.h"
+#include <linux/intel_vsec.h>
+#include <asm/resctrl.h>
+
+#include "internal.h"
+
+static struct pmt_feature_group *feat_energy;
+static struct pmt_feature_group *feat_perf;
+
+/* All telemetry events from Intel CPUs */
+enum pmt_event_id {
+ PMT_EVENT_ENERGY,
+ PMT_EVENT_ACTIVITY,
+ PMT_EVENT_STALLS_LLC_HIT,
+ PMT_EVENT_C1_RES,
+ PMT_EVENT_UNHALTED_CORE_CYCLES,
+ PMT_EVENT_STALLS_LLC_MISS,
+ PMT_EVENT_AUTO_C6_RES,
+ PMT_EVENT_UNHALTED_REF_CYCLES,
+ PMT_EVENT_UOPS_RETIRED,
+
+ PMT_NUM_EVENTS
+};
+
+/**
+ * enum evt_type - Type for values for each event.
+ * @EVT_U64: Integer up to 64 bits
+ * @EVT_U46_18: Fixed point binary, 46 bits for integer, 18 binary place bits
+ */
+enum evt_type {
+ EVT_U64,
+ EVT_U46_18,
+};
+
+#define EVT(id, evtname, offset, t) \
+ { \
+ .evt = { \
+ .evtid = id, \
+ .name = evtname \
+ }, \
+ .evt_offset = offset, \
+ .evt_type = t \
+ }
+
+/**
+ * struct pmt_event - Telemetry event.
+ * @evt: Resctrl event
+ * @evt_offset: MMIO offset of counter
+ * @evt_type: Type
+ */
+struct pmt_event {
+ struct mon_evt evt;
+ int evt_offset;
+ enum evt_type evt_type;
+};
+
+/**
+ * struct telem_entry - Summarized form from XML telemetry description
+ * @name: Name for this group of events
+ * @guid: Unique ID for this group
+ * @num_rmids: Number of RMIDS supported
+ * @stride: Number of bytes of counters for each RMID
+ * @overflow_counter_off: Offset od overflow count
+ * @last_overflow_tstamp_off: Offset of overflow timestamp
+ * @last_update_tstamp_off: Offset of last update timestamp
+ * @active: Marks this group as active on current CPU
+ * @evts: Telemetry events in this group
+ */
+struct telem_entry {
+ char *name;
+ int guid;
+ int num_rmids;
+ int stride;
+ int overflow_counter_off;
+ int last_overflow_tstamp_off;
+ int last_update_tstamp_off;
+ bool active;
+ struct pmt_event evts[];
+};
+
+/* All known telemetry event groups */
+static struct telem_entry *telem_entry[] = {
+ NULL
+};
+
+/* Per-package event groups active on this machine */
+static struct pkg_info {
+ int count;
+ struct telemetry_region *regions;
+} *pkg_info;
+
+/*
+ * Scan a feature group looking for guids recognized by this code
+ * and update the per-package counts of known groups.
+ */
+static bool count_events(struct pkg_info *pkg, int max_pkgs, struct pmt_feature_group *p)
+{
+ struct telem_entry **tentry;
+ bool found = false;
+
+ for (int i = 0; i < p->count; i++) {
+ struct telemetry_region *tr = &p->regions[i];
+
+ for (tentry = telem_entry; *tentry; tentry++) {
+ if (tr->guid == (*tentry)->guid) {
+ if (tr->plat_info.package_id > max_pkgs) {
+ pr_warn_once("Bad package %d\n", tr->plat_info.package_id);
+ continue;
+ }
+ found = true;
+ (*tentry)->active = true;
+ pkg[tr->plat_info.package_id].count++;
+ break;
+ }
+ }
+ }
+
+ return found;
+}
+
+DEFINE_FREE(intel_pmt_put_feature_group, struct pmt_feature_group *, \
+ if (!IS_ERR_OR_NULL(_T)) \
+ intel_pmt_put_feature_group(_T))
+
+/*
+ * Ask OOBMSM discovery driver for all the RMID based telemetry groups
+ * that it supports.
+ */
+static bool get_events(void)
+{
+ struct pmt_feature_group *p1 __free(intel_pmt_put_feature_group) = NULL;
+ struct pmt_feature_group *p2 __free(intel_pmt_put_feature_group) = NULL;
+ int num_pkgs = topology_max_packages();
+ struct pkg_info *pkg __free(kfree) = NULL;
+
+ pkg = kmalloc_array(num_pkgs, sizeof(*pkg_info), GFP_KERNEL | __GFP_ZERO);
+ if (!pkg)
+ return false;
+
+ p1 = intel_pmt_get_regions_by_feature(FEATURE_PER_RMID_ENERGY_TELEM);
+ p2 = intel_pmt_get_regions_by_feature(FEATURE_PER_RMID_PERF_TELEM);
+
+ if (IS_ERR_VALUE(p1) && IS_ERR_VALUE(p1))
+ return false;
+
+ if (!IS_ERR_VALUE(p1))
+ if (!count_events(pkg, num_pkgs, p1))
+ intel_pmt_put_feature_group(no_free_ptr(p1));
+ if (!IS_ERR_VALUE(p2))
+ if (!count_events(pkg, num_pkgs, p2))
+ intel_pmt_put_feature_group(no_free_ptr(p2));
+
+ if (!IS_ERR_OR_NULL(p1))
+ feat_energy = no_free_ptr(p1);
+ if (!IS_ERR_OR_NULL(p2))
+ feat_perf = no_free_ptr(p2);
+ pkg_info = no_free_ptr(pkg);
+
+ return true;
+}
+
+/*
+ * Early initialization. Cannot do much here because OOBMSM has not
+ * completed enumeration of telemetry event groups.
+ */
+int rdt_get_intel_aet_mon_config(void)
+{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_INTEL_AET].r_resctrl;
+
+ INIT_LIST_HEAD(&r->evt_list);
+
+ return 1;
+}
+
+/*
+ * Late (first mount) initialization. Safe to ask OOBMSM which telemetry
+ * event groups are supported.
+ */
+void rdt_get_intel_aet_mount(void)
+{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_INTEL_AET].r_resctrl;
+ struct rdt_core_mon_domain *d, *tmp;
+ static int do_one_time;
+
+ if (do_one_time)
+ return;
+
+ do_one_time = 1;
+
+ if (!get_events()) {
+ list_for_each_entry_safe(d, tmp, &r->mon_domains, hdr.list)
+ kfree(d);
+ r->mon_capable = false;
+ }
+}
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index a90291f57330..4833dfa08ce3 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -2572,6 +2572,9 @@ static int rdt_get_tree(struct fs_context *fc)
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
+
+ rdt_get_intel_aet_mount();
+
/*
* resctrl file system can only be mounted once.
*/
diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
index 2c3b09f95127..a47e1c214087 100644
--- a/arch/x86/kernel/cpu/resctrl/Makefile
+++ b/arch/x86/kernel/cpu/resctrl/Makefile
@@ -2,6 +2,7 @@
obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o
obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o
obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK) += pseudo_lock.o
+obj-$(CONFIG_INTEL_AET_RESCTRL) += intel_aet.o
obj-$(CONFIG_INTEL_AET_RESCTRL) += fake_intel_aet_features.o
# To allow define_trace.h's recursive include:
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* Re: [PATCH v2 07/16] x86/resctrl: Add initialization hook for Intel PMT events
2025-03-21 23:15 ` [PATCH v2 07/16] x86/resctrl: Add initialization hook for Intel PMT events Tony Luck
@ 2025-03-31 16:20 ` Reinette Chatre
2025-03-31 21:53 ` Luck, Tony
0 siblings, 1 reply; 37+ messages in thread
From: Reinette Chatre @ 2025-03-31 16:20 UTC (permalink / raw)
To: Tony Luck, Fenghua Yu, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches
Hi Tony,
On 3/21/25 4:15 PM, Tony Luck wrote:
> Call the OOBMSM discovery code to find out if there are any
> event groups that match unique identifiers understood by resctrl.
>
> Note that initiialzation must happen in two phases because the
"initiialzation" -> "initialization"
> OOBMSM VSEC discovery process is not complete at resctrl
> "lateinit()" initialization time. So there is an initial hook
> that assumes that Intel PMT will exist, called early so that
> package scoped domain groups are initialized.
>
> At first mount the remainder of initialization is done. If there
> are no Intel PMT events, the package domain lists are removed.
>
> Move definition of struct mon_evt to <linux/resctrl_types.h>
Why not include/resctrl.h?
>
> Events for specific systems to be added by a separate patch.
>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
> include/linux/resctrl.h | 14 ++
> include/linux/resctrl_types.h | 14 ++
> arch/x86/kernel/cpu/resctrl/internal.h | 6 +
> fs/resctrl/internal.h | 14 --
> arch/x86/kernel/cpu/resctrl/core.c | 9 +-
> arch/x86/kernel/cpu/resctrl/intel_aet.c | 211 ++++++++++++++++++++++++
> fs/resctrl/rdtgroup.c | 3 +
> arch/x86/kernel/cpu/resctrl/Makefile | 1 +
> 8 files changed, 255 insertions(+), 17 deletions(-)
> create mode 100644 arch/x86/kernel/cpu/resctrl/intel_aet.c
>
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 6c895ec220fe..999e0802a26e 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -170,6 +170,14 @@ struct rdt_mon_domain {
> int cqm_work_cpu;
> };
>
> +/**
> + * struct rdt_core_mon_domain - CPUs sharing an Intel-PMT-scoped resctrl monitor resource
Please no arch specific references in fs code. I think it will help to explain and consume this
work if patches are split into fs and arch patches.
> + * @hdr: common header for different domain types
> + */
> +struct rdt_core_mon_domain {
> + struct rdt_domain_hdr hdr;
> +};
> +
> /**
> * struct resctrl_cache - Cache allocation related data
> * @cbm_len: Length of the cache bit mask
> @@ -522,4 +530,10 @@ extern unsigned int resctrl_rmid_realloc_limit;
> int resctrl_init(void);
> void resctrl_exit(void);
>
> +#ifdef CONFIG_INTEL_AET_RESCTRL
> +void rdt_get_intel_aet_mount(void);
Please no arch specific helpers in fs code. This could instead be a
generic "resctrl_arch_*" helper that resctrl fs calls at beginning of
fs mount for architectures to do what is needed.
> +#else
> +static inline void rdt_get_intel_aet_mount(void) { }
> +#endif
> +
> #endif /* _RESCTRL_H */
> diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
> index 8f967e03af5a..d56fcd082d2c 100644
> --- a/include/linux/resctrl_types.h
> +++ b/include/linux/resctrl_types.h
> @@ -57,4 +57,18 @@ enum resctrl_event_id {
>
> #define QOS_NUM_EVENTS (QOS_L3_MBM_LOCAL_EVENT_ID + 1)
>
> +/**
> + * struct mon_evt - Entry in the event list of a resource
> + * @evtid: per resource event id
> + * @name: name of the event
> + * @configurable: true if the event is configurable
> + * @list: entry in &rdt_resource->evt_list
> + */
> +struct mon_evt {
> + unsigned int evtid;
> + char *name;
> + bool configurable;
> + struct list_head list;
> +};
> +
> #endif /* __LINUX_RESCTRL_TYPES_H */
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 521db28efb3f..ada402c7678b 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -168,6 +168,12 @@ void rdt_ctrl_update(void *arg);
>
> int rdt_get_mon_l3_config(struct rdt_resource *r);
>
> +#ifdef CONFIG_INTEL_AET_RESCTRL
> +int rdt_get_intel_aet_mon_config(void);
> +#else
> +static inline int rdt_get_intel_aet_mon_config(void) { return 0; }
> +#endif
> +
> bool rdt_cpu_has(int flag);
>
> void __init intel_rdt_mbm_apply_quirk(void);
> diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
> index 422f36573db7..f5a698b49e97 100644
> --- a/fs/resctrl/internal.h
> +++ b/fs/resctrl/internal.h
> @@ -67,20 +67,6 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
> return container_of(kfc, struct rdt_fs_context, kfc);
> }
>
> -/**
> - * struct mon_evt - Entry in the event list of a resource
> - * @evtid: per resource event id
> - * @name: name of the event
> - * @configurable: true if the event is configurable
> - * @list: entry in &rdt_resource->evt_list
> - */
> -struct mon_evt {
> - unsigned int evtid;
> - char *name;
> - bool configurable;
> - struct list_head list;
> -};
> -
> /**
> * struct mon_data - Monitoring details for each event file.
> * @rid: Resource id associated with the event file.
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index c8cc3104f56c..1ab0f5eec244 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -882,6 +882,7 @@ static __init bool get_rdt_alloc_resources(void)
> static __init bool get_rdt_mon_resources(void)
> {
> struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> + int ret1 = -EINVAL, ret2;
>
> if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
> rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
> @@ -890,10 +891,12 @@ static __init bool get_rdt_mon_resources(void)
> if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
> rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID);
>
> - if (!rdt_mon_features)
> - return false;
> + if (rdt_mon_features)
> + ret1 = rdt_get_mon_l3_config(r);
> +
> + ret2 = rdt_get_intel_aet_mon_config();
>
> - return !rdt_get_mon_l3_config(r);
> + return ret1 == 0 || ret2;
> }
>
> static __init void __check_quirks_intel(void)
> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> new file mode 100644
> index 000000000000..9a8ccb62b4ab
> --- /dev/null
> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> @@ -0,0 +1,211 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Resource Director Technology(RDT)
> + * - Intel Application Energy Telemetry
> + *
> + * Copyright (C) 2025 Intel Corporation
> + *
> + * Author:
> + * Tony Luck <tony.luck@intel.com>
> + */
> +
> +#define pr_fmt(fmt) "resctrl: " fmt
> +
> +#include <linux/cpu.h>
> +#include <linux/cleanup.h>
> +#include <linux/slab.h>
> +#include "fake_intel_aet_features.h"
> +#include <linux/intel_vsec.h>
> +#include <asm/resctrl.h>
> +
> +#include "internal.h"
> +
> +static struct pmt_feature_group *feat_energy;
> +static struct pmt_feature_group *feat_perf;
> +
> +/* All telemetry events from Intel CPUs */
> +enum pmt_event_id {
> + PMT_EVENT_ENERGY,
> + PMT_EVENT_ACTIVITY,
> + PMT_EVENT_STALLS_LLC_HIT,
> + PMT_EVENT_C1_RES,
> + PMT_EVENT_UNHALTED_CORE_CYCLES,
> + PMT_EVENT_STALLS_LLC_MISS,
> + PMT_EVENT_AUTO_C6_RES,
> + PMT_EVENT_UNHALTED_REF_CYCLES,
> + PMT_EVENT_UOPS_RETIRED,
> +
> + PMT_NUM_EVENTS
> +};
I expect the above to become part of resctrl fs. Actually, I think
all properties of the new events, the id, name and how to display it,
should be part of resctrl fs.
We do not want other architectures to create their own display names for
the same events. I expect that this will require more plumbing between
arch and fs code to communicate which events are supported, similar to
what exists for the L3 events (for example, resctrl_arch_is_mbm_total_enabled()).
This may result in struct rdt_core_mon_domain to no longer be empty but instead
for resctrl to use it to manage state of which events are enabled or not. Theoretically
all could be managed by the architecture but I think that could result in inconsistent
error codes to user space.
> +
> +/**
> + * enum evt_type - Type for values for each event.
> + * @EVT_U64: Integer up to 64 bits
> + * @EVT_U46_18: Fixed point binary, 46 bits for integer, 18 binary place bits
> + */
> +enum evt_type {
> + EVT_U64,
> + EVT_U46_18,
> +};
> +
> +#define EVT(id, evtname, offset, t) \
> + { \
> + .evt = { \
> + .evtid = id, \
> + .name = evtname \
> + }, \
> + .evt_offset = offset, \
> + .evt_type = t \
> + }
> +
> +/**
> + * struct pmt_event - Telemetry event.
> + * @evt: Resctrl event
> + * @evt_offset: MMIO offset of counter
> + * @evt_type: Type
> + */
> +struct pmt_event {
> + struct mon_evt evt;
> + int evt_offset;
> + enum evt_type evt_type;
> +};
> +
> +/**
> + * struct telem_entry - Summarized form from XML telemetry description
It is not clear to me how useful it is to document that this is
"Summarized form from XML telemetry description". Either more detail should
be added to help reader understand what XML is being talked about or
the description should be a summary of what this data structure represents.
> + * @name: Name for this group of events
> + * @guid: Unique ID for this group
> + * @num_rmids: Number of RMIDS supported
> + * @stride: Number of bytes of counters for each RMID
> + * @overflow_counter_off: Offset od overflow count
od -> of
> + * @last_overflow_tstamp_off: Offset of overflow timestamp
> + * @last_update_tstamp_off: Offset of last update timestamp
> + * @active: Marks this group as active on current CPU
Could you please elaborate what "on current CPU" implies with the events
being "per package"?
> + * @evts: Telemetry events in this group
Since this is an array, could this comment also describe how the number of
entries are determined?
> + */
> +struct telem_entry {
> + char *name;
> + int guid;
> + int num_rmids;
> + int stride;
> + int overflow_counter_off;
> + int last_overflow_tstamp_off;
> + int last_update_tstamp_off;
> + bool active;
> + struct pmt_event evts[];
> +};
> +
> +/* All known telemetry event groups */
> +static struct telem_entry *telem_entry[] = {
> + NULL
> +};
> +
> +/* Per-package event groups active on this machine */
> +static struct pkg_info {
> + int count;
> + struct telemetry_region *regions;
> +} *pkg_info;
> +
> +/*
> + * Scan a feature group looking for guids recognized by this code
"this code" can be dropped
> + * and update the per-package counts of known groups.
> + */
> +static bool count_events(struct pkg_info *pkg, int max_pkgs, struct pmt_feature_group *p)
> +{
> + struct telem_entry **tentry;
> + bool found = false;
> +
> + for (int i = 0; i < p->count; i++) {
> + struct telemetry_region *tr = &p->regions[i];
> +
> + for (tentry = telem_entry; *tentry; tentry++) {
> + if (tr->guid == (*tentry)->guid) {
> + if (tr->plat_info.package_id > max_pkgs) {
> + pr_warn_once("Bad package %d\n", tr->plat_info.package_id);
> + continue;
> + }
> + found = true;
> + (*tentry)->active = true;
> + pkg[tr->plat_info.package_id].count++;
There seems to be some duplicate information between the structures. For example,
telem_entry::num_rmids and the num_rmids from the pmt_feature_group. Are these
expected to be identical? Since the num_rmids from telem_entry are hardcoded and
the ones from pmt_feature_group discovered then this sounds like opportunity to
add some sanity checking.
Similarly, could there be a check here to ensure that the size of the memory region
provided matches the expected size based on all the hardcoded properties?
> + break;
> + }
> + }
> + }
> +
> + return found;
> +}
> +
> +DEFINE_FREE(intel_pmt_put_feature_group, struct pmt_feature_group *, \
> + if (!IS_ERR_OR_NULL(_T)) \
> + intel_pmt_put_feature_group(_T))
> +
> +/*
> + * Ask OOBMSM discovery driver for all the RMID based telemetry groups
> + * that it supports.
> + */
> +static bool get_events(void)
> +{
> + struct pmt_feature_group *p1 __free(intel_pmt_put_feature_group) = NULL;
> + struct pmt_feature_group *p2 __free(intel_pmt_put_feature_group) = NULL;
> + int num_pkgs = topology_max_packages();
> + struct pkg_info *pkg __free(kfree) = NULL;
> +
> + pkg = kmalloc_array(num_pkgs, sizeof(*pkg_info), GFP_KERNEL | __GFP_ZERO);
kcalloc() ?
> + if (!pkg)
> + return false;
> +
> + p1 = intel_pmt_get_regions_by_feature(FEATURE_PER_RMID_ENERGY_TELEM);
> + p2 = intel_pmt_get_regions_by_feature(FEATURE_PER_RMID_PERF_TELEM);
> +
> + if (IS_ERR_VALUE(p1) && IS_ERR_VALUE(p1))
> + return false;
> +
> + if (!IS_ERR_VALUE(p1))
> + if (!count_events(pkg, num_pkgs, p1))
> + intel_pmt_put_feature_group(no_free_ptr(p1));
This seems to defeat the purpose of the cleanup handler.
> + if (!IS_ERR_VALUE(p2))
> + if (!count_events(pkg, num_pkgs, p2))
> + intel_pmt_put_feature_group(no_free_ptr(p2));
> +
> + if (!IS_ERR_OR_NULL(p1))
> + feat_energy = no_free_ptr(p1);
> + if (!IS_ERR_OR_NULL(p2))
> + feat_perf = no_free_ptr(p2);
> + pkg_info = no_free_ptr(pkg);
> +
> + return true;
> +}
> +
> +/*
> + * Early initialization. Cannot do much here because OOBMSM has not
> + * completed enumeration of telemetry event groups.
> + */
> +int rdt_get_intel_aet_mon_config(void)
> +{
> + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_INTEL_AET].r_resctrl;
> +
> + INIT_LIST_HEAD(&r->evt_list);
> +
> + return 1;
> +}
> +
> +/*
> + * Late (first mount) initialization. Safe to ask OOBMSM which telemetry
> + * event groups are supported.
> + */
> +void rdt_get_intel_aet_mount(void)
> +{
> + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_INTEL_AET].r_resctrl;
> + struct rdt_core_mon_domain *d, *tmp;
> + static int do_one_time;
> +
> + if (do_one_time)
> + return;
> +
> + do_one_time = 1;
> +
> + if (!get_events()) {
> + list_for_each_entry_safe(d, tmp, &r->mon_domains, hdr.list)
> + kfree(d);
> + r->mon_capable = false;
> + }
> +}
> diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> index a90291f57330..4833dfa08ce3 100644
> --- a/fs/resctrl/rdtgroup.c
> +++ b/fs/resctrl/rdtgroup.c
> @@ -2572,6 +2572,9 @@ static int rdt_get_tree(struct fs_context *fc)
>
> cpus_read_lock();
> mutex_lock(&rdtgroup_mutex);
> +
> + rdt_get_intel_aet_mount();
> +
> /*
> * resctrl file system can only be mounted once.
> */
> diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
> index 2c3b09f95127..a47e1c214087 100644
> --- a/arch/x86/kernel/cpu/resctrl/Makefile
> +++ b/arch/x86/kernel/cpu/resctrl/Makefile
> @@ -2,6 +2,7 @@
> obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o
> obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o
> obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK) += pseudo_lock.o
> +obj-$(CONFIG_INTEL_AET_RESCTRL) += intel_aet.o
> obj-$(CONFIG_INTEL_AET_RESCTRL) += fake_intel_aet_features.o
>
> # To allow define_trace.h's recursive include:
Reinette
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [PATCH v2 07/16] x86/resctrl: Add initialization hook for Intel PMT events
2025-03-31 16:20 ` Reinette Chatre
@ 2025-03-31 21:53 ` Luck, Tony
2025-04-01 0:07 ` Reinette Chatre
0 siblings, 1 reply; 37+ messages in thread
From: Luck, Tony @ 2025-03-31 21:53 UTC (permalink / raw)
To: Reinette Chatre
Cc: Fenghua Yu, Maciej Wieczor-Retman, Peter Newman, James Morse,
Babu Moger, Drew Fustini, Dave Martin, linux-kernel, patches
On Mon, Mar 31, 2025 at 09:20:07AM -0700, Reinette Chatre wrote:
> Hi Tony,
>
> On 3/21/25 4:15 PM, Tony Luck wrote:
> > Call the OOBMSM discovery code to find out if there are any
> > event groups that match unique identifiers understood by resctrl.
> >
> > Note that initiialzation must happen in two phases because the
>
> "initiialzation" -> "initialization"
Wil fix.
> > OOBMSM VSEC discovery process is not complete at resctrl
> > "lateinit()" initialization time. So there is an initial hook
> > that assumes that Intel PMT will exist, called early so that
> > package scoped domain groups are initialized.
> >
> > At first mount the remainder of initialization is done. If there
> > are no Intel PMT events, the package domain lists are removed.
> >
> > Move definition of struct mon_evt to <linux/resctrl_types.h>
>
> Why not include/resctrl.h?
I put in in resctrl_types.h because it is a type definition?
I'm not sure of the criteria James used to decide what goes
in resctrl_types.h and what goes in resctrl.h
> >
> > Events for specific systems to be added by a separate patch.
> >
> > Signed-off-by: Tony Luck <tony.luck@intel.com>
> > ---
> > include/linux/resctrl.h | 14 ++
> > include/linux/resctrl_types.h | 14 ++
> > arch/x86/kernel/cpu/resctrl/internal.h | 6 +
> > fs/resctrl/internal.h | 14 --
> > arch/x86/kernel/cpu/resctrl/core.c | 9 +-
> > arch/x86/kernel/cpu/resctrl/intel_aet.c | 211 ++++++++++++++++++++++++
> > fs/resctrl/rdtgroup.c | 3 +
> > arch/x86/kernel/cpu/resctrl/Makefile | 1 +
> > 8 files changed, 255 insertions(+), 17 deletions(-)
> > create mode 100644 arch/x86/kernel/cpu/resctrl/intel_aet.c
> >
> > diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> > index 6c895ec220fe..999e0802a26e 100644
> > --- a/include/linux/resctrl.h
> > +++ b/include/linux/resctrl.h
> > @@ -170,6 +170,14 @@ struct rdt_mon_domain {
> > int cqm_work_cpu;
> > };
> >
> > +/**
> > + * struct rdt_core_mon_domain - CPUs sharing an Intel-PMT-scoped resctrl monitor resource
>
> Please no arch specific references in fs code. I think it will help to explain and consume this
> work if patches are split into fs and arch patches.
Domain add/remove needs more thought on the hooks between FS and ARCH
layers. Splitting the patches into "new infrastructure for FS" and
"new ARCH implememtations" will help me get something cleaner.
> > + * @hdr: common header for different domain types
> > + */
> > +struct rdt_core_mon_domain {
> > + struct rdt_domain_hdr hdr;
> > +};
> > +
> > /**
> > * struct resctrl_cache - Cache allocation related data
> > * @cbm_len: Length of the cache bit mask
> > @@ -522,4 +530,10 @@ extern unsigned int resctrl_rmid_realloc_limit;
> > int resctrl_init(void);
> > void resctrl_exit(void);
> >
> > +#ifdef CONFIG_INTEL_AET_RESCTRL
> > +void rdt_get_intel_aet_mount(void);
>
> Please no arch specific helpers in fs code. This could instead be a
> generic "resctrl_arch_*" helper that resctrl fs calls at beginning of
> fs mount for architectures to do what is needed.
Agreed.
>
> > +#else
> > +static inline void rdt_get_intel_aet_mount(void) { }
> > +#endif
> > +
> > #endif /* _RESCTRL_H */
> > diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
> > index 8f967e03af5a..d56fcd082d2c 100644
> > --- a/include/linux/resctrl_types.h
> > +++ b/include/linux/resctrl_types.h
> > @@ -57,4 +57,18 @@ enum resctrl_event_id {
> >
> > #define QOS_NUM_EVENTS (QOS_L3_MBM_LOCAL_EVENT_ID + 1)
> >
> > +/**
> > + * struct mon_evt - Entry in the event list of a resource
> > + * @evtid: per resource event id
> > + * @name: name of the event
> > + * @configurable: true if the event is configurable
> > + * @list: entry in &rdt_resource->evt_list
> > + */
> > +struct mon_evt {
> > + unsigned int evtid;
> > + char *name;
> > + bool configurable;
> > + struct list_head list;
> > +};
> > +
> > #endif /* __LINUX_RESCTRL_TYPES_H */
> > diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> > index 521db28efb3f..ada402c7678b 100644
> > --- a/arch/x86/kernel/cpu/resctrl/internal.h
> > +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> > @@ -168,6 +168,12 @@ void rdt_ctrl_update(void *arg);
> >
> > int rdt_get_mon_l3_config(struct rdt_resource *r);
> >
> > +#ifdef CONFIG_INTEL_AET_RESCTRL
> > +int rdt_get_intel_aet_mon_config(void);
> > +#else
> > +static inline int rdt_get_intel_aet_mon_config(void) { return 0; }
> > +#endif
> > +
> > bool rdt_cpu_has(int flag);
> >
> > void __init intel_rdt_mbm_apply_quirk(void);
> > diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
> > index 422f36573db7..f5a698b49e97 100644
> > --- a/fs/resctrl/internal.h
> > +++ b/fs/resctrl/internal.h
> > @@ -67,20 +67,6 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
> > return container_of(kfc, struct rdt_fs_context, kfc);
> > }
> >
> > -/**
> > - * struct mon_evt - Entry in the event list of a resource
> > - * @evtid: per resource event id
> > - * @name: name of the event
> > - * @configurable: true if the event is configurable
> > - * @list: entry in &rdt_resource->evt_list
> > - */
> > -struct mon_evt {
> > - unsigned int evtid;
> > - char *name;
> > - bool configurable;
> > - struct list_head list;
> > -};
> > -
> > /**
> > * struct mon_data - Monitoring details for each event file.
> > * @rid: Resource id associated with the event file.
> > diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> > index c8cc3104f56c..1ab0f5eec244 100644
> > --- a/arch/x86/kernel/cpu/resctrl/core.c
> > +++ b/arch/x86/kernel/cpu/resctrl/core.c
> > @@ -882,6 +882,7 @@ static __init bool get_rdt_alloc_resources(void)
> > static __init bool get_rdt_mon_resources(void)
> > {
> > struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> > + int ret1 = -EINVAL, ret2;
> >
> > if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
> > rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
> > @@ -890,10 +891,12 @@ static __init bool get_rdt_mon_resources(void)
> > if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
> > rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID);
> >
> > - if (!rdt_mon_features)
> > - return false;
> > + if (rdt_mon_features)
> > + ret1 = rdt_get_mon_l3_config(r);
> > +
> > + ret2 = rdt_get_intel_aet_mon_config();
> >
> > - return !rdt_get_mon_l3_config(r);
> > + return ret1 == 0 || ret2;
> > }
> >
> > static __init void __check_quirks_intel(void)
> > diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> > new file mode 100644
> > index 000000000000..9a8ccb62b4ab
> > --- /dev/null
> > +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> > @@ -0,0 +1,211 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Resource Director Technology(RDT)
> > + * - Intel Application Energy Telemetry
> > + *
> > + * Copyright (C) 2025 Intel Corporation
> > + *
> > + * Author:
> > + * Tony Luck <tony.luck@intel.com>
> > + */
> > +
> > +#define pr_fmt(fmt) "resctrl: " fmt
> > +
> > +#include <linux/cpu.h>
> > +#include <linux/cleanup.h>
> > +#include <linux/slab.h>
> > +#include "fake_intel_aet_features.h"
> > +#include <linux/intel_vsec.h>
> > +#include <asm/resctrl.h>
> > +
> > +#include "internal.h"
> > +
> > +static struct pmt_feature_group *feat_energy;
> > +static struct pmt_feature_group *feat_perf;
> > +
> > +/* All telemetry events from Intel CPUs */
> > +enum pmt_event_id {
> > + PMT_EVENT_ENERGY,
> > + PMT_EVENT_ACTIVITY,
> > + PMT_EVENT_STALLS_LLC_HIT,
> > + PMT_EVENT_C1_RES,
> > + PMT_EVENT_UNHALTED_CORE_CYCLES,
> > + PMT_EVENT_STALLS_LLC_MISS,
> > + PMT_EVENT_AUTO_C6_RES,
> > + PMT_EVENT_UNHALTED_REF_CYCLES,
> > + PMT_EVENT_UOPS_RETIRED,
> > +
> > + PMT_NUM_EVENTS
> > +};
>
> I expect the above to become part of resctrl fs. Actually, I think
> all properties of the new events, the id, name and how to display it,
> should be part of resctrl fs.
I'm not convinced about the amount of re-use that there will be
for these events. I took a quick look at the current plan for a
processor that follows Clearwater Forest and I see 22 events, only
3 of them match events in the above list.
> We do not want other architectures to create their own display names for
> the same events. I expect that this will require more plumbing between
> arch and fs code to communicate which events are supported, similar to
> what exists for the L3 events (for example, resctrl_arch_is_mbm_total_enabled()).
Supported monitor events are indicated by setting bits in "rdt_mon_features"
currently "unsigned int" but could become "long" or a longer bitmap if
we really want the FS layer to keep track of every possible event for
every architecture.
>
> This may result in struct rdt_core_mon_domain to no longer be empty but instead
> for resctrl to use it to manage state of which events are enabled or not. Theoretically
> all could be managed by the architecture but I think that could result in inconsistent
> error codes to user space.
rdt_core_mon_domain seems like the wrong level (unless we expect to have
different enabled events in different domains). rdt_resource seems
plausible ... or just expand "rdt_mon_features".
> > +
> > +/**
> > + * enum evt_type - Type for values for each event.
> > + * @EVT_U64: Integer up to 64 bits
> > + * @EVT_U46_18: Fixed point binary, 46 bits for integer, 18 binary place bits
> > + */
> > +enum evt_type {
> > + EVT_U64,
> > + EVT_U46_18,
> > +};
> > +
> > +#define EVT(id, evtname, offset, t) \
> > + { \
> > + .evt = { \
> > + .evtid = id, \
> > + .name = evtname \
> > + }, \
> > + .evt_offset = offset, \
> > + .evt_type = t \
> > + }
> > +
> > +/**
> > + * struct pmt_event - Telemetry event.
> > + * @evt: Resctrl event
> > + * @evt_offset: MMIO offset of counter
> > + * @evt_type: Type
> > + */
> > +struct pmt_event {
> > + struct mon_evt evt;
> > + int evt_offset;
> > + enum evt_type evt_type;
> > +};
> > +
> > +/**
> > + * struct telem_entry - Summarized form from XML telemetry description
>
> It is not clear to me how useful it is to document that this is
> "Summarized form from XML telemetry description". Either more detail should
> be added to help reader understand what XML is being talked about or
> the description should be a summary of what this data structure represents.
More detail here is the right direction.
> > + * @name: Name for this group of events
> > + * @guid: Unique ID for this group
> > + * @num_rmids: Number of RMIDS supported
> > + * @stride: Number of bytes of counters for each RMID
> > + * @overflow_counter_off: Offset od overflow count
>
> od -> of
>
> > + * @last_overflow_tstamp_off: Offset of overflow timestamp
> > + * @last_update_tstamp_off: Offset of last update timestamp
> > + * @active: Marks this group as active on current CPU
>
> Could you please elaborate what "on current CPU" implies with the events
> being "per package"?
I should have said "on current system" rather than using the overworked
"CPU" word.
> > + * @evts: Telemetry events in this group
>
> Since this is an array, could this comment also describe how the number of
> entries are determined?
Will add comment that it is terminated with a null entry.
>
> > + */
> > +struct telem_entry {
> > + char *name;
> > + int guid;
> > + int num_rmids;
> > + int stride;
> > + int overflow_counter_off;
> > + int last_overflow_tstamp_off;
> > + int last_update_tstamp_off;
> > + bool active;
> > + struct pmt_event evts[];
> > +};
> > +
> > +/* All known telemetry event groups */
> > +static struct telem_entry *telem_entry[] = {
> > + NULL
> > +};
> > +
> > +/* Per-package event groups active on this machine */
> > +static struct pkg_info {
> > + int count;
> > + struct telemetry_region *regions;
> > +} *pkg_info;
> > +
> > +/*
> > + * Scan a feature group looking for guids recognized by this code
>
> "this code" can be dropped
Agreed.
> > + * and update the per-package counts of known groups.
> > + */
> > +static bool count_events(struct pkg_info *pkg, int max_pkgs, struct pmt_feature_group *p)
> > +{
> > + struct telem_entry **tentry;
> > + bool found = false;
> > +
> > + for (int i = 0; i < p->count; i++) {
> > + struct telemetry_region *tr = &p->regions[i];
> > +
> > + for (tentry = telem_entry; *tentry; tentry++) {
> > + if (tr->guid == (*tentry)->guid) {
> > + if (tr->plat_info.package_id > max_pkgs) {
> > + pr_warn_once("Bad package %d\n", tr->plat_info.package_id);
> > + continue;
> > + }
> > + found = true;
> > + (*tentry)->active = true;
> > + pkg[tr->plat_info.package_id].count++;
>
> There seems to be some duplicate information between the structures. For example,
> telem_entry::num_rmids and the num_rmids from the pmt_feature_group. Are these
> expected to be identical? Since the num_rmids from telem_entry are hardcoded and
> the ones from pmt_feature_group discovered then this sounds like opportunity to
> add some sanity checking.
> Similarly, could there be a check here to ensure that the size of the memory region
> provided matches the expected size based on all the hardcoded properties?
I should also sanity check against CPUID(0xF,0x0).EBX
>
> > + break;
> > + }
> > + }
> > + }
> > +
> > + return found;
> > +}
> > +
> > +DEFINE_FREE(intel_pmt_put_feature_group, struct pmt_feature_group *, \
> > + if (!IS_ERR_OR_NULL(_T)) \
> > + intel_pmt_put_feature_group(_T))
> > +
> > +/*
> > + * Ask OOBMSM discovery driver for all the RMID based telemetry groups
> > + * that it supports.
> > + */
> > +static bool get_events(void)
> > +{
> > + struct pmt_feature_group *p1 __free(intel_pmt_put_feature_group) = NULL;
> > + struct pmt_feature_group *p2 __free(intel_pmt_put_feature_group) = NULL;
> > + int num_pkgs = topology_max_packages();
> > + struct pkg_info *pkg __free(kfree) = NULL;
> > +
> > + pkg = kmalloc_array(num_pkgs, sizeof(*pkg_info), GFP_KERNEL | __GFP_ZERO);
>
> kcalloc() ?
Yes.
> > + if (!pkg)
> > + return false;
> > +
> > + p1 = intel_pmt_get_regions_by_feature(FEATURE_PER_RMID_ENERGY_TELEM);
> > + p2 = intel_pmt_get_regions_by_feature(FEATURE_PER_RMID_PERF_TELEM);
> > +
> > + if (IS_ERR_VALUE(p1) && IS_ERR_VALUE(p1))
> > + return false;
> > +
> > + if (!IS_ERR_VALUE(p1))
> > + if (!count_events(pkg, num_pkgs, p1))
> > + intel_pmt_put_feature_group(no_free_ptr(p1));
>
> This seems to defeat the purpose of the cleanup handler.
Maybe this isn't needed. I'll have to dig through the various cases
of return values, and whether the .guid matches a known value.
> > + if (!IS_ERR_VALUE(p2))
> > + if (!count_events(pkg, num_pkgs, p2))
> > + intel_pmt_put_feature_group(no_free_ptr(p2));
> > +
> > + if (!IS_ERR_OR_NULL(p1))
> > + feat_energy = no_free_ptr(p1);
> > + if (!IS_ERR_OR_NULL(p2))
> > + feat_perf = no_free_ptr(p2);
> > + pkg_info = no_free_ptr(pkg);
> > +
> > + return true;
> > +}
> > +
> > +/*
> > + * Early initialization. Cannot do much here because OOBMSM has not
> > + * completed enumeration of telemetry event groups.
> > + */
> > +int rdt_get_intel_aet_mon_config(void)
> > +{
> > + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_INTEL_AET].r_resctrl;
> > +
> > + INIT_LIST_HEAD(&r->evt_list);
> > +
> > + return 1;
> > +}
> > +
> > +/*
> > + * Late (first mount) initialization. Safe to ask OOBMSM which telemetry
> > + * event groups are supported.
> > + */
> > +void rdt_get_intel_aet_mount(void)
> > +{
> > + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_INTEL_AET].r_resctrl;
> > + struct rdt_core_mon_domain *d, *tmp;
> > + static int do_one_time;
> > +
> > + if (do_one_time)
> > + return;
> > +
> > + do_one_time = 1;
> > +
> > + if (!get_events()) {
> > + list_for_each_entry_safe(d, tmp, &r->mon_domains, hdr.list)
> > + kfree(d);
> > + r->mon_capable = false;
> > + }
> > +}
> > diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> > index a90291f57330..4833dfa08ce3 100644
> > --- a/fs/resctrl/rdtgroup.c
> > +++ b/fs/resctrl/rdtgroup.c
> > @@ -2572,6 +2572,9 @@ static int rdt_get_tree(struct fs_context *fc)
> >
> > cpus_read_lock();
> > mutex_lock(&rdtgroup_mutex);
> > +
> > + rdt_get_intel_aet_mount();
> > +
> > /*
> > * resctrl file system can only be mounted once.
> > */
> > diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
> > index 2c3b09f95127..a47e1c214087 100644
> > --- a/arch/x86/kernel/cpu/resctrl/Makefile
> > +++ b/arch/x86/kernel/cpu/resctrl/Makefile
> > @@ -2,6 +2,7 @@
> > obj-$(CONFIG_X86_CPU_RESCTRL) += core.o rdtgroup.o monitor.o
> > obj-$(CONFIG_X86_CPU_RESCTRL) += ctrlmondata.o
> > obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK) += pseudo_lock.o
> > +obj-$(CONFIG_INTEL_AET_RESCTRL) += intel_aet.o
> > obj-$(CONFIG_INTEL_AET_RESCTRL) += fake_intel_aet_features.o
> >
> > # To allow define_trace.h's recursive include:
>
> Reinette
-Tony
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [PATCH v2 07/16] x86/resctrl: Add initialization hook for Intel PMT events
2025-03-31 21:53 ` Luck, Tony
@ 2025-04-01 0:07 ` Reinette Chatre
0 siblings, 0 replies; 37+ messages in thread
From: Reinette Chatre @ 2025-04-01 0:07 UTC (permalink / raw)
To: Luck, Tony
Cc: Fenghua Yu, Maciej Wieczor-Retman, Peter Newman, James Morse,
Babu Moger, Drew Fustini, Dave Martin, linux-kernel, patches
Hi Tony,
On 3/31/25 2:53 PM, Luck, Tony wrote:
> On Mon, Mar 31, 2025 at 09:20:07AM -0700, Reinette Chatre wrote:
>> Hi Tony,
>>
>> On 3/21/25 4:15 PM, Tony Luck wrote:
>>> Call the OOBMSM discovery code to find out if there are any
>>> event groups that match unique identifiers understood by resctrl.
>>>
>>> Note that initiialzation must happen in two phases because the
>>
>> "initiialzation" -> "initialization"
>
> Wil fix.
>
>>> OOBMSM VSEC discovery process is not complete at resctrl
>>> "lateinit()" initialization time. So there is an initial hook
>>> that assumes that Intel PMT will exist, called early so that
>>> package scoped domain groups are initialized.
>>>
>>> At first mount the remainder of initialization is done. If there
>>> are no Intel PMT events, the package domain lists are removed.
>>>
>>> Move definition of struct mon_evt to <linux/resctrl_types.h>
>>
>> Why not include/resctrl.h?
>
> I put in in resctrl_types.h because it is a type definition?
> I'm not sure of the criteria James used to decide what goes
> in resctrl_types.h and what goes in resctrl.h
Changelog of
commit f16adbaf9272 ("x86/resctrl: Move resctrl types to a separate header")
explains it well.
Essentially I expect a new entry in resctrl_types.h to have a user in
arch/x86/include/asm/resctrl.h and since cover letter shows no changes to
this file the addition is unexpected to me.
>>> static __init void __check_quirks_intel(void)
>>> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
>>> new file mode 100644
>>> index 000000000000..9a8ccb62b4ab
>>> --- /dev/null
>>> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
>>> @@ -0,0 +1,211 @@
>>> +// SPDX-License-Identifier: GPL-2.0-only
>>> +/*
>>> + * Resource Director Technology(RDT)
>>> + * - Intel Application Energy Telemetry
>>> + *
>>> + * Copyright (C) 2025 Intel Corporation
>>> + *
>>> + * Author:
>>> + * Tony Luck <tony.luck@intel.com>
>>> + */
>>> +
>>> +#define pr_fmt(fmt) "resctrl: " fmt
>>> +
>>> +#include <linux/cpu.h>
>>> +#include <linux/cleanup.h>
>>> +#include <linux/slab.h>
>>> +#include "fake_intel_aet_features.h"
>>> +#include <linux/intel_vsec.h>
>>> +#include <asm/resctrl.h>
>>> +
>>> +#include "internal.h"
>>> +
>>> +static struct pmt_feature_group *feat_energy;
>>> +static struct pmt_feature_group *feat_perf;
>>> +
>>> +/* All telemetry events from Intel CPUs */
>>> +enum pmt_event_id {
>>> + PMT_EVENT_ENERGY,
>>> + PMT_EVENT_ACTIVITY,
>>> + PMT_EVENT_STALLS_LLC_HIT,
>>> + PMT_EVENT_C1_RES,
>>> + PMT_EVENT_UNHALTED_CORE_CYCLES,
>>> + PMT_EVENT_STALLS_LLC_MISS,
>>> + PMT_EVENT_AUTO_C6_RES,
>>> + PMT_EVENT_UNHALTED_REF_CYCLES,
>>> + PMT_EVENT_UOPS_RETIRED,
>>> +
>>> + PMT_NUM_EVENTS
>>> +};
>>
>> I expect the above to become part of resctrl fs. Actually, I think
>> all properties of the new events, the id, name and how to display it,
>> should be part of resctrl fs.
>
> I'm not convinced about the amount of re-use that there will be
> for these events. I took a quick look at the current plan for a
> processor that follows Clearwater Forest and I see 22 events, only
> 3 of them match events in the above list.
>
This is not about re-use but instead these events becoming part of resctrl fs ABI.
>> We do not want other architectures to create their own display names for
>> the same events. I expect that this will require more plumbing between
>> arch and fs code to communicate which events are supported, similar to
>> what exists for the L3 events (for example, resctrl_arch_is_mbm_total_enabled()).
>
> Supported monitor events are indicated by setting bits in "rdt_mon_features"
> currently "unsigned int" but could become "long" or a longer bitmap if
> we really want the FS layer to keep track of every possible event for
> every architecture.
That sounds fine. rdt_mon_features is x86 arch code and that can change as needed.
Previous discussions ended up with rdt_mon_features staying with x86 arch code
and accessed from resctrl fs with helpers.
>>
>> This may result in struct rdt_core_mon_domain to no longer be empty but instead
>> for resctrl to use it to manage state of which events are enabled or not. Theoretically
>> all could be managed by the architecture but I think that could result in inconsistent
>> error codes to user space.
>
> rdt_core_mon_domain seems like the wrong level (unless we expect to have
> different enabled events in different domains). rdt_resource seems
> plausible ... or just expand "rdt_mon_features".
Understood on the domain level. This work introduced quite a few new data structures that
I have not yet digested. I cannot tell at this time if new resctrl fs helpers will have
easier time with unique perf related data structure or if x86's internal rdt_mon_features
need changes in addition to all these data structures.
Reinette
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH v2 08/16] x86/resctrl: Add Intel PMT domain specific code
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
` (6 preceding siblings ...)
2025-03-21 23:15 ` [PATCH v2 07/16] x86/resctrl: Add initialization hook for Intel PMT events Tony Luck
@ 2025-03-21 23:15 ` Tony Luck
2025-03-21 23:15 ` [PATCH v2 09/16] x86/resctrl: Add detailed descriptions for Clearwater Forest events Tony Luck
` (7 subsequent siblings)
15 siblings, 0 replies; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:15 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
Domain specific initialization and tear down. Very simple as
there are no domain specific elements that need to be allocated
or initiailazed. Just the domain itself.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/resctrl/core.c | 32 ++++++++++++++++++++++++++++++
fs/resctrl/rdtgroup.c | 11 ++++++++--
2 files changed, 41 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 1ab0f5eec244..2adf40d8de32 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -541,6 +541,29 @@ static void setup_l3_mon_domain(int cpu, int id, struct rdt_resource *r, struct
}
}
+static void setup_intel_aet_mon_domain(int cpu, int id, struct rdt_resource *r,
+ struct list_head *add_pos)
+{
+ struct rdt_core_mon_domain *d;
+ int err;
+
+ d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu));
+ if (!d)
+ return;
+
+ d->hdr.id = id;
+ d->hdr.type = RESCTRL_MON_DOMAIN;
+ cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
+ list_add_tail_rcu(&d->hdr.list, add_pos);
+
+ err = resctrl_online_mon_domain(r, &d->hdr);
+ if (err) {
+ list_del_rcu(&d->hdr.list);
+ synchronize_rcu();
+ kfree(d);
+ }
+}
+
static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
{
int id = get_domain_id_from_scope(cpu, r->mon_scope);
@@ -565,6 +588,9 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
case RDT_RESOURCE_L3:
setup_l3_mon_domain(cpu, id, r, add_pos);
break;
+ case RDT_RESOURCE_INTEL_AET:
+ setup_intel_aet_mon_domain(cpu, id, r, add_pos);
+ break;
default:
WARN_ON_ONCE(1);
}
@@ -661,6 +687,12 @@ static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
synchronize_rcu();
mon_domain_free(hw_dom);
break;
+ case RDT_RESOURCE_INTEL_AET:
+ resctrl_offline_mon_domain(r, d);
+ list_del_rcu(&hdr->list);
+ synchronize_rcu();
+ kfree(container_of(hdr, struct rdt_core_mon_domain, hdr));
+ break;
}
}
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index 4833dfa08ce3..d3919642aa9b 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -4077,6 +4077,9 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d
if (resctrl_mounted && resctrl_arch_mon_capable())
rmdir_mondata_subdir_allrdtgrp(r, &d->hdr);
+ if (r->rid == RDT_RESOURCE_INTEL_AET)
+ goto done;
+
if (resctrl_is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) {
@@ -4093,7 +4096,7 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d
}
domain_destroy_mon_state(d);
-
+done:
mutex_unlock(&rdtgroup_mutex);
}
@@ -4160,11 +4163,14 @@ int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d
int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr)
{
struct rdt_mon_domain *d;
- int err;
+ int err = 0;
d = container_of(hdr, struct rdt_mon_domain, hdr);
mutex_lock(&rdtgroup_mutex);
+ if (r->rid == RDT_RESOURCE_INTEL_AET)
+ goto do_mkdir;
+
err = domain_setup_mon_state(r, d);
if (err)
goto out_unlock;
@@ -4178,6 +4184,7 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr
if (resctrl_arch_is_llc_occupancy_enabled())
INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
+do_mkdir:
/*
* If the filesystem is not mounted then only the default resource group
* exists. Creation of its directories is deferred until mount time
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH v2 09/16] x86/resctrl: Add detailed descriptions for Clearwater Forest events
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
` (7 preceding siblings ...)
2025-03-21 23:15 ` [PATCH v2 08/16] x86/resctrl: Add Intel PMT domain specific code Tony Luck
@ 2025-03-21 23:15 ` Tony Luck
2025-03-31 16:21 ` Reinette Chatre
2025-03-21 23:16 ` [PATCH v2 10/16] x86/resctrl: Allocate per-package structures for known events Tony Luck
` (6 subsequent siblings)
15 siblings, 1 reply; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:15 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
There are two event groups one for energy reporting and another
for "perf" events.
See the XML description files in https://github.com/intel/Intel-PMT
in the xml/CWF/OOBMSM/{RMID-ENERGY,RMID-PERF}/ for the detailed
descriptions that were used to derive these descriptions.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/resctrl/intel_aet.c | 54 +++++++++++++++++++++++++
1 file changed, 54 insertions(+)
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index 9a8ccb62b4ab..67862e81b9e0 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -94,8 +94,62 @@ struct telem_entry {
struct pmt_event evts[];
};
+/*
+ * Known events from Intel Clearwater Forest CPU.
+ */
+#define CWF_NUM_RMIDS 576
+#define CWF_ENERGY_GUID 0x26696143
+#define CWF_PERF_GUID 0x26557651
+#define CWF_ENERGY_STRIDE 0x10
+#define CWF_PERF_STRIDE 0x38
+
+/*
+ * https://github.com/intel/Intel-PMT
+ * xml/CWF/OOBMSM/RMID-ENERGY *.xml
+ */
+static struct telem_entry cwf_energy = {
+ .name = "energy",
+ .guid = CWF_ENERGY_GUID,
+ .num_rmids = CWF_NUM_RMIDS,
+ .stride = CWF_ENERGY_STRIDE,
+ .overflow_counter_off = CWF_NUM_RMIDS * CWF_ENERGY_STRIDE,
+ .last_overflow_tstamp_off = CWF_NUM_RMIDS * CWF_ENERGY_STRIDE + 8,
+ .last_update_tstamp_off = CWF_NUM_RMIDS * CWF_ENERGY_STRIDE + 16,
+ .evts = {
+ EVT(PMT_EVENT_ENERGY, "core_energy", 0x0, EVT_U46_18),
+ EVT(PMT_EVENT_ACTIVITY, "activity", 0x8, EVT_U46_18),
+ { }
+ }
+};
+
+/*
+ * https://github.com/intel/Intel-PMT
+ * xml/CWF/OOBMSM/RMID-PERF *.xml
+ */
+static struct telem_entry cwf_perf = {
+ .name = "perf",
+ .guid = CWF_PERF_GUID,
+ .num_rmids = CWF_NUM_RMIDS,
+ .stride = CWF_PERF_STRIDE,
+ .overflow_counter_off = CWF_NUM_RMIDS * CWF_PERF_STRIDE,
+ .last_overflow_tstamp_off = CWF_NUM_RMIDS * CWF_PERF_STRIDE + 8,
+ .last_update_tstamp_off = CWF_NUM_RMIDS * CWF_PERF_STRIDE + 16,
+ .evts = {
+ EVT(PMT_EVENT_STALLS_LLC_HIT, "stalls_llc_hit", 0x0, EVT_U64),
+ EVT(PMT_EVENT_C1_RES, "c1_res", 0x8, EVT_U64),
+ EVT(PMT_EVENT_UNHALTED_CORE_CYCLES, "unhalted_core_cycles", 0x10, EVT_U64),
+ EVT(PMT_EVENT_STALLS_LLC_MISS, "stalls_llc_miss", 0x18, EVT_U64),
+ EVT(PMT_EVENT_AUTO_C6_RES, "c6_res", 0x20, EVT_U64),
+ EVT(PMT_EVENT_UNHALTED_REF_CYCLES, "unhalted_ref_cycles", 0x28, EVT_U64),
+ EVT(PMT_EVENT_UOPS_RETIRED, "uops_retired", 0x30, EVT_U64),
+ { }
+ }
+};
+
/* All known telemetry event groups */
static struct telem_entry *telem_entry[] = {
+ &cwf_energy,
+ &cwf_perf,
NULL
};
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* Re: [PATCH v2 09/16] x86/resctrl: Add detailed descriptions for Clearwater Forest events
2025-03-21 23:15 ` [PATCH v2 09/16] x86/resctrl: Add detailed descriptions for Clearwater Forest events Tony Luck
@ 2025-03-31 16:21 ` Reinette Chatre
2025-03-31 22:07 ` Luck, Tony
0 siblings, 1 reply; 37+ messages in thread
From: Reinette Chatre @ 2025-03-31 16:21 UTC (permalink / raw)
To: Tony Luck, Fenghua Yu, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches
Hi Tony,
On 3/21/25 4:15 PM, Tony Luck wrote:
> There are two event groups one for energy reporting and another
> for "perf" events.
>
> See the XML description files in https://github.com/intel/Intel-PMT
> in the xml/CWF/OOBMSM/{RMID-ENERGY,RMID-PERF}/ for the detailed
> descriptions that were used to derive these descriptions.
It is unexpected to me that this is made model specific while the
implementation is built around a guid. What will happen when
a new system using the same event layout arrives? Will the url
above be duplicated for this new system's acronym and contain
duplicate data? How will the resctrl support change? If I understand
correctly resctrl will not need to be changed but instead the "CWF"
events will just automatically be used for this new hypothetical
system? This makes me think that this should not be so CWF specific.
Reinette
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [PATCH v2 09/16] x86/resctrl: Add detailed descriptions for Clearwater Forest events
2025-03-31 16:21 ` Reinette Chatre
@ 2025-03-31 22:07 ` Luck, Tony
2025-04-01 0:13 ` Reinette Chatre
0 siblings, 1 reply; 37+ messages in thread
From: Luck, Tony @ 2025-03-31 22:07 UTC (permalink / raw)
To: Reinette Chatre
Cc: Fenghua Yu, Maciej Wieczor-Retman, Peter Newman, James Morse,
Babu Moger, Drew Fustini, Dave Martin, linux-kernel, patches
On Mon, Mar 31, 2025 at 09:21:11AM -0700, Reinette Chatre wrote:
> Hi Tony,
>
> On 3/21/25 4:15 PM, Tony Luck wrote:
> > There are two event groups one for energy reporting and another
> > for "perf" events.
> >
> > See the XML description files in https://github.com/intel/Intel-PMT
> > in the xml/CWF/OOBMSM/{RMID-ENERGY,RMID-PERF}/ for the detailed
> > descriptions that were used to derive these descriptions.
>
> It is unexpected to me that this is made model specific while the
> implementation is built around a guid. What will happen when
> a new system using the same event layout arrives? Will the url
> above be duplicated for this new system's acronym and contain
> duplicate data? How will the resctrl support change? If I understand
> correctly resctrl will not need to be changed but instead the "CWF"
> events will just automatically be used for this new hypothetical
> system? This makes me think that this should not be so CWF specific.
I was told that we might expect to see new guid values to describe
different event register layouts for the same CPU model. The event
aggregators are all firmware driven. So a BIOS update could make changes.
So I've left open the option to add additional structure defintions for
Clearwater Forest with some future firmware update.
If a future processor uses the exact same layout with the same guid value,
then no Linux update would be needed. We'd just have the slight oddity
that a structure named "cwf_*" would match and be used.
Next system to implement these telemetry events has a very different
list of supported events.
> Reinette
-Tony
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [PATCH v2 09/16] x86/resctrl: Add detailed descriptions for Clearwater Forest events
2025-03-31 22:07 ` Luck, Tony
@ 2025-04-01 0:13 ` Reinette Chatre
0 siblings, 0 replies; 37+ messages in thread
From: Reinette Chatre @ 2025-04-01 0:13 UTC (permalink / raw)
To: Luck, Tony
Cc: Fenghua Yu, Maciej Wieczor-Retman, Peter Newman, James Morse,
Babu Moger, Drew Fustini, Dave Martin, linux-kernel, patches
Hi Tony,
On 3/31/25 3:07 PM, Luck, Tony wrote:
> On Mon, Mar 31, 2025 at 09:21:11AM -0700, Reinette Chatre wrote:
>> Hi Tony,
>>
>> On 3/21/25 4:15 PM, Tony Luck wrote:
>>> There are two event groups one for energy reporting and another
>>> for "perf" events.
>>>
>>> See the XML description files in https://github.com/intel/Intel-PMT
>>> in the xml/CWF/OOBMSM/{RMID-ENERGY,RMID-PERF}/ for the detailed
>>> descriptions that were used to derive these descriptions.
>>
>> It is unexpected to me that this is made model specific while the
>> implementation is built around a guid. What will happen when
>> a new system using the same event layout arrives? Will the url
>> above be duplicated for this new system's acronym and contain
>> duplicate data? How will the resctrl support change? If I understand
>> correctly resctrl will not need to be changed but instead the "CWF"
>> events will just automatically be used for this new hypothetical
>> system? This makes me think that this should not be so CWF specific.
>
> I was told that we might expect to see new guid values to describe
> different event register layouts for the same CPU model. The event
> aggregators are all firmware driven. So a BIOS update could make changes.
Sounds like this supports my point. Naming these data structures based on
the CPU would create naming challenges when this scenario arrives, no?
>
> So I've left open the option to add additional structure defintions for
> Clearwater Forest with some future firmware update.
Why not use the guid as part of the naming to make clear that CPU model
does not dictate the layout?
>
> If a future processor uses the exact same layout with the same guid value,
> then no Linux update would be needed. We'd just have the slight oddity
> that a structure named "cwf_*" would match and be used.
>
> Next system to implement these telemetry events has a very different
> list of supported events.
>
Reinette
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH v2 10/16] x86/resctrl: Allocate per-package structures for known events
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
` (8 preceding siblings ...)
2025-03-21 23:15 ` [PATCH v2 09/16] x86/resctrl: Add detailed descriptions for Clearwater Forest events Tony Luck
@ 2025-03-21 23:16 ` Tony Luck
2025-03-31 16:21 ` Reinette Chatre
2025-03-21 23:16 ` [PATCH v2 11/16] x86/resctrl: Link known events onto RDT_RESOURCE_INTEL_AET.evt_list Tony Luck
` (5 subsequent siblings)
15 siblings, 1 reply; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:16 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
Use the per-package counts of known events to allocate arrays to
make a copy of just the known events.
Add hook into resctrl_exit() to cleanup.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/resctrl/internal.h | 2 +
arch/x86/kernel/cpu/resctrl/core.c | 2 +
arch/x86/kernel/cpu/resctrl/intel_aet.c | 60 ++++++++++++++++++++++++-
3 files changed, 63 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index ada402c7678b..2503a24e4177 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -170,8 +170,10 @@ int rdt_get_mon_l3_config(struct rdt_resource *r);
#ifdef CONFIG_INTEL_AET_RESCTRL
int rdt_get_intel_aet_mon_config(void);
+void rdt_intel_aet_exit(void);
#else
static inline int rdt_get_intel_aet_mon_config(void) { return 0; }
+static inline void rdt_intel_aet_exit(void) { };
#endif
bool rdt_cpu_has(int flag);
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 2adf40d8de32..d011c095aafa 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -1095,6 +1095,8 @@ static void __exit resctrl_arch_exit(void)
{
cpuhp_remove_state(rdt_online);
+ rdt_intel_aet_exit();
+
resctrl_exit();
}
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index 67862e81b9e0..e2d8eab997fc 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -188,6 +188,26 @@ static bool count_events(struct pkg_info *pkg, int max_pkgs, struct pmt_feature_
return found;
}
+static int setup(struct pkg_info *pkg, int pkgnum, struct pmt_feature_group *p, int slot)
+{
+ struct telem_entry **tentry;
+
+ for (int i = 0; i < p->count; i++) {
+ for (tentry = telem_entry; *tentry; tentry++) {
+ if (!(*tentry)->active)
+ continue;
+ if (pkgnum != p->regions[i].plat_info.package_id)
+ continue;
+ if (p->regions[i].guid != (*tentry)->guid)
+ continue;
+
+ pkg[pkgnum].regions[slot++] = p->regions[i];
+ }
+ }
+
+ return slot;
+}
+
DEFINE_FREE(intel_pmt_put_feature_group, struct pmt_feature_group *, \
if (!IS_ERR_OR_NULL(_T)) \
intel_pmt_put_feature_group(_T))
@@ -202,6 +222,8 @@ static bool get_events(void)
struct pmt_feature_group *p2 __free(intel_pmt_put_feature_group) = NULL;
int num_pkgs = topology_max_packages();
struct pkg_info *pkg __free(kfree) = NULL;
+ bool found_known_features = false;
+ int i, slot;
pkg = kmalloc_array(num_pkgs, sizeof(*pkg_info), GFP_KERNEL | __GFP_ZERO);
if (!pkg)
@@ -220,13 +242,32 @@ static bool get_events(void)
if (!count_events(pkg, num_pkgs, p2))
intel_pmt_put_feature_group(no_free_ptr(p2));
+ for (i = 0; i < num_pkgs; i++) {
+ if (!pkg[i].count)
+ continue;
+ found_known_features = true;
+ pkg[i].regions = kmalloc_array(pkg[i].count, sizeof(*pkg[i].regions), GFP_KERNEL);
+ if (!pkg[i].regions)
+ goto fail;
+
+ slot = 0;
+ if (!IS_ERR_VALUE(p1))
+ slot = setup(pkg, i, p1, slot);
+ if (!IS_ERR_VALUE(p2))
+ slot = setup(pkg, i, p2, slot);
+ }
+
if (!IS_ERR_OR_NULL(p1))
feat_energy = no_free_ptr(p1);
if (!IS_ERR_OR_NULL(p2))
feat_perf = no_free_ptr(p2);
pkg_info = no_free_ptr(pkg);
- return true;
+ return found_known_features;
+fail:
+ while (--i > 0)
+ kfree(pkg[i].regions);
+ return false;
}
/*
@@ -242,6 +283,23 @@ int rdt_get_intel_aet_mon_config(void)
return 1;
}
+/* Clean up when resctrl shuts down completely */
+void rdt_intel_aet_exit(void)
+{
+ int num_pkgs = topology_max_packages();
+
+ if (pkg_info) {
+ for (int i = 0; i < num_pkgs; i++)
+ kfree(pkg_info[i].regions);
+ kfree(pkg_info);
+ }
+
+ if (feat_energy)
+ intel_pmt_put_feature_group(feat_energy);
+ if (feat_perf)
+ intel_pmt_put_feature_group(feat_perf);
+}
+
/*
* Late (first mount) initialization. Safe to ask OOBMSM which telemetry
* event groups are supported.
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* Re: [PATCH v2 10/16] x86/resctrl: Allocate per-package structures for known events
2025-03-21 23:16 ` [PATCH v2 10/16] x86/resctrl: Allocate per-package structures for known events Tony Luck
@ 2025-03-31 16:21 ` Reinette Chatre
2025-03-31 22:23 ` Luck, Tony
0 siblings, 1 reply; 37+ messages in thread
From: Reinette Chatre @ 2025-03-31 16:21 UTC (permalink / raw)
To: Tony Luck, Fenghua Yu, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches
hi Tony,
On 3/21/25 4:16 PM, Tony Luck wrote:
> Use the per-package counts of known events to allocate arrays to
> make a copy of just the known events.
>
> Add hook into resctrl_exit() to cleanup.
(above is not done in patch)
>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
> arch/x86/kernel/cpu/resctrl/internal.h | 2 +
> arch/x86/kernel/cpu/resctrl/core.c | 2 +
> arch/x86/kernel/cpu/resctrl/intel_aet.c | 60 ++++++++++++++++++++++++-
> 3 files changed, 63 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index ada402c7678b..2503a24e4177 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -170,8 +170,10 @@ int rdt_get_mon_l3_config(struct rdt_resource *r);
>
> #ifdef CONFIG_INTEL_AET_RESCTRL
> int rdt_get_intel_aet_mon_config(void);
> +void rdt_intel_aet_exit(void);
> #else
> static inline int rdt_get_intel_aet_mon_config(void) { return 0; }
> +static inline void rdt_intel_aet_exit(void) { };
> #endif
>
> bool rdt_cpu_has(int flag);
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index 2adf40d8de32..d011c095aafa 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -1095,6 +1095,8 @@ static void __exit resctrl_arch_exit(void)
> {
> cpuhp_remove_state(rdt_online);
>
> + rdt_intel_aet_exit();
> +
> resctrl_exit();
> }
>
> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> index 67862e81b9e0..e2d8eab997fc 100644
> --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> @@ -188,6 +188,26 @@ static bool count_events(struct pkg_info *pkg, int max_pkgs, struct pmt_feature_
> return found;
> }
>
> +static int setup(struct pkg_info *pkg, int pkgnum, struct pmt_feature_group *p, int slot)
Could you please add a comment to this function to explain what it does?
> +{
> + struct telem_entry **tentry;
> +
> + for (int i = 0; i < p->count; i++) {
> + for (tentry = telem_entry; *tentry; tentry++) {
> + if (!(*tentry)->active)
> + continue;
> + if (pkgnum != p->regions[i].plat_info.package_id)
> + continue;
> + if (p->regions[i].guid != (*tentry)->guid)
> + continue;
> +
> + pkg[pkgnum].regions[slot++] = p->regions[i];
> + }
> + }
> +
> + return slot;
> +}
> +
> DEFINE_FREE(intel_pmt_put_feature_group, struct pmt_feature_group *, \
> if (!IS_ERR_OR_NULL(_T)) \
> intel_pmt_put_feature_group(_T))
> @@ -202,6 +222,8 @@ static bool get_events(void)
> struct pmt_feature_group *p2 __free(intel_pmt_put_feature_group) = NULL;
> int num_pkgs = topology_max_packages();
> struct pkg_info *pkg __free(kfree) = NULL;
> + bool found_known_features = false;
> + int i, slot;
>
> pkg = kmalloc_array(num_pkgs, sizeof(*pkg_info), GFP_KERNEL | __GFP_ZERO);
> if (!pkg)
> @@ -220,13 +242,32 @@ static bool get_events(void)
> if (!count_events(pkg, num_pkgs, p2))
> intel_pmt_put_feature_group(no_free_ptr(p2));
>
> + for (i = 0; i < num_pkgs; i++) {
> + if (!pkg[i].count)
> + continue;
> + found_known_features = true;
> + pkg[i].regions = kmalloc_array(pkg[i].count, sizeof(*pkg[i].regions), GFP_KERNEL);
> + if (!pkg[i].regions)
> + goto fail;
> +
> + slot = 0;
> + if (!IS_ERR_VALUE(p1))
> + slot = setup(pkg, i, p1, slot);
> + if (!IS_ERR_VALUE(p2))
> + slot = setup(pkg, i, p2, slot);
> + }
> +
> if (!IS_ERR_OR_NULL(p1))
> feat_energy = no_free_ptr(p1);
> if (!IS_ERR_OR_NULL(p2))
> feat_perf = no_free_ptr(p2);
> pkg_info = no_free_ptr(pkg);
>
> - return true;
> + return found_known_features;
> +fail:
include/linux/cleanup.h has this to say about mixing goto and free helpers:
"convert all resources that need a "goto" cleanup to scope-based cleanup, or convert
none of them"
> + while (--i > 0)
> + kfree(pkg[i].regions);
> + return false;
> }
>
> /*
> @@ -242,6 +283,23 @@ int rdt_get_intel_aet_mon_config(void)
> return 1;
> }
>
> +/* Clean up when resctrl shuts down completely */
> +void rdt_intel_aet_exit(void)
> +{
> + int num_pkgs = topology_max_packages();
> +
> + if (pkg_info) {
> + for (int i = 0; i < num_pkgs; i++)
> + kfree(pkg_info[i].regions);
> + kfree(pkg_info);
> + }
> +
> + if (feat_energy)
> + intel_pmt_put_feature_group(feat_energy);
> + if (feat_perf)
> + intel_pmt_put_feature_group(feat_perf);
> +}
> +
> /*
> * Late (first mount) initialization. Safe to ask OOBMSM which telemetry
> * event groups are supported.
Reinette
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [PATCH v2 10/16] x86/resctrl: Allocate per-package structures for known events
2025-03-31 16:21 ` Reinette Chatre
@ 2025-03-31 22:23 ` Luck, Tony
2025-04-01 0:22 ` Luck, Tony
0 siblings, 1 reply; 37+ messages in thread
From: Luck, Tony @ 2025-03-31 22:23 UTC (permalink / raw)
To: Reinette Chatre
Cc: Fenghua Yu, Maciej Wieczor-Retman, Peter Newman, James Morse,
Babu Moger, Drew Fustini, Dave Martin, linux-kernel, patches
On Mon, Mar 31, 2025 at 09:21:49AM -0700, Reinette Chatre wrote:
> hi Tony,
>
> On 3/21/25 4:16 PM, Tony Luck wrote:
> > Use the per-package counts of known events to allocate arrays to
> > make a copy of just the known events.
> >
> > Add hook into resctrl_exit() to cleanup.
>
> (above is not done in patch)
I think I shuffled it to a different patch. I will track it
down and move this comment to the right place.
> >
> > Signed-off-by: Tony Luck <tony.luck@intel.com>
> > ---
> > arch/x86/kernel/cpu/resctrl/internal.h | 2 +
> > arch/x86/kernel/cpu/resctrl/core.c | 2 +
> > arch/x86/kernel/cpu/resctrl/intel_aet.c | 60 ++++++++++++++++++++++++-
> > 3 files changed, 63 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> > index ada402c7678b..2503a24e4177 100644
> > --- a/arch/x86/kernel/cpu/resctrl/internal.h
> > +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> > @@ -170,8 +170,10 @@ int rdt_get_mon_l3_config(struct rdt_resource *r);
> >
> > #ifdef CONFIG_INTEL_AET_RESCTRL
> > int rdt_get_intel_aet_mon_config(void);
> > +void rdt_intel_aet_exit(void);
> > #else
> > static inline int rdt_get_intel_aet_mon_config(void) { return 0; }
> > +static inline void rdt_intel_aet_exit(void) { };
> > #endif
> >
> > bool rdt_cpu_has(int flag);
> > diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> > index 2adf40d8de32..d011c095aafa 100644
> > --- a/arch/x86/kernel/cpu/resctrl/core.c
> > +++ b/arch/x86/kernel/cpu/resctrl/core.c
> > @@ -1095,6 +1095,8 @@ static void __exit resctrl_arch_exit(void)
> > {
> > cpuhp_remove_state(rdt_online);
> >
> > + rdt_intel_aet_exit();
> > +
> > resctrl_exit();
> > }
> >
> > diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> > index 67862e81b9e0..e2d8eab997fc 100644
> > --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
> > +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> > @@ -188,6 +188,26 @@ static bool count_events(struct pkg_info *pkg, int max_pkgs, struct pmt_feature_
> > return found;
> > }
> >
> > +static int setup(struct pkg_info *pkg, int pkgnum, struct pmt_feature_group *p, int slot)
>
> Could you please add a comment to this function to explain what it does?
Indeed yes, it really needs some description.
>
> > +{
> > + struct telem_entry **tentry;
> > +
> > + for (int i = 0; i < p->count; i++) {
> > + for (tentry = telem_entry; *tentry; tentry++) {
> > + if (!(*tentry)->active)
> > + continue;
> > + if (pkgnum != p->regions[i].plat_info.package_id)
> > + continue;
> > + if (p->regions[i].guid != (*tentry)->guid)
> > + continue;
> > +
> > + pkg[pkgnum].regions[slot++] = p->regions[i];
> > + }
> > + }
> > +
> > + return slot;
> > +}
> > +
> > DEFINE_FREE(intel_pmt_put_feature_group, struct pmt_feature_group *, \
> > if (!IS_ERR_OR_NULL(_T)) \
> > intel_pmt_put_feature_group(_T))
> > @@ -202,6 +222,8 @@ static bool get_events(void)
> > struct pmt_feature_group *p2 __free(intel_pmt_put_feature_group) = NULL;
> > int num_pkgs = topology_max_packages();
> > struct pkg_info *pkg __free(kfree) = NULL;
> > + bool found_known_features = false;
> > + int i, slot;
> >
> > pkg = kmalloc_array(num_pkgs, sizeof(*pkg_info), GFP_KERNEL | __GFP_ZERO);
> > if (!pkg)
> > @@ -220,13 +242,32 @@ static bool get_events(void)
> > if (!count_events(pkg, num_pkgs, p2))
> > intel_pmt_put_feature_group(no_free_ptr(p2));
> >
> > + for (i = 0; i < num_pkgs; i++) {
> > + if (!pkg[i].count)
> > + continue;
> > + found_known_features = true;
> > + pkg[i].regions = kmalloc_array(pkg[i].count, sizeof(*pkg[i].regions), GFP_KERNEL);
> > + if (!pkg[i].regions)
> > + goto fail;
> > +
> > + slot = 0;
> > + if (!IS_ERR_VALUE(p1))
> > + slot = setup(pkg, i, p1, slot);
> > + if (!IS_ERR_VALUE(p2))
> > + slot = setup(pkg, i, p2, slot);
> > + }
> > +
> > if (!IS_ERR_OR_NULL(p1))
> > feat_energy = no_free_ptr(p1);
> > if (!IS_ERR_OR_NULL(p2))
> > feat_perf = no_free_ptr(p2);
> > pkg_info = no_free_ptr(pkg);
> >
> > - return true;
> > + return found_known_features;
> > +fail:
>
> include/linux/cleanup.h has this to say about mixing goto and free helpers:
> "convert all resources that need a "goto" cleanup to scope-based cleanup, or convert
> none of them"
Seems an awkward restriction for this case. "pkg" is a pointer to
a dynamic array, and each of the elements of the array might have
been initialized by another allocation. "pkg" is under control of
the __free() cleanup function.
Maybe I could define a custom cleanup (syntax of multi-statement
action to be figured out):
DEFINE_FREE(pkg_free, struct pkg_info *,
if (_T)
for (int i = 0; i < num_pkgs; i++)
kfree(_T[i].regions);
kfree(_T)
)
struct pkg_info *pkg __free(pkg_free) = NULL;
>
> > + while (--i > 0)
> > + kfree(pkg[i].regions);
> > + return false;
> > }
> >
> > /*
> > @@ -242,6 +283,23 @@ int rdt_get_intel_aet_mon_config(void)
> > return 1;
> > }
> >
> > +/* Clean up when resctrl shuts down completely */
> > +void rdt_intel_aet_exit(void)
> > +{
> > + int num_pkgs = topology_max_packages();
> > +
> > + if (pkg_info) {
> > + for (int i = 0; i < num_pkgs; i++)
> > + kfree(pkg_info[i].regions);
> > + kfree(pkg_info);
> > + }
> > +
> > + if (feat_energy)
> > + intel_pmt_put_feature_group(feat_energy);
> > + if (feat_perf)
> > + intel_pmt_put_feature_group(feat_perf);
> > +}
> > +
> > /*
> > * Late (first mount) initialization. Safe to ask OOBMSM which telemetry
> > * event groups are supported.
>
> Reinette
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [PATCH v2 10/16] x86/resctrl: Allocate per-package structures for known events
2025-03-31 22:23 ` Luck, Tony
@ 2025-04-01 0:22 ` Luck, Tony
0 siblings, 0 replies; 37+ messages in thread
From: Luck, Tony @ 2025-04-01 0:22 UTC (permalink / raw)
To: Reinette Chatre
Cc: Fenghua Yu, Maciej Wieczor-Retman, Peter Newman, James Morse,
Babu Moger, Drew Fustini, Dave Martin, linux-kernel, patches
On Mon, Mar 31, 2025 at 03:23:53PM -0700, Luck, Tony wrote:
> On Mon, Mar 31, 2025 at 09:21:49AM -0700, Reinette Chatre wrote:
> > include/linux/cleanup.h has this to say about mixing goto and free helpers:
> > "convert all resources that need a "goto" cleanup to scope-based cleanup, or convert
> > none of them"
>
> Seems an awkward restriction for this case. "pkg" is a pointer to
> a dynamic array, and each of the elements of the array might have
> been initialized by another allocation. "pkg" is under control of
> the __free() cleanup function.
>
> Maybe I could define a custom cleanup (syntax of multi-statement
> action to be figured out):
>
> DEFINE_FREE(pkg_free, struct pkg_info *,
> if (_T)
> for (int i = 0; i < num_pkgs; i++)
> kfree(_T[i].regions);
> kfree(_T)
> )
>
> struct pkg_info *pkg __free(pkg_free) = NULL;
Tried this out and it works (without any odd syntax for the multi-line
action (though I did miss that num_pkgs was a local variable, thankfully
simply initialized from topology_max_packages().
DEFINE_FREE(free_pkg_info, struct pkg_info *, \
if (_T) \
for (int i = 0; i < topology_max_packages(); i++) \
kfree(_T[i].regions); \
kfree(_T))
...
struct pkg_info *pkg __free(free_pkg_info) = NULL;
-Tony
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH v2 11/16] x86/resctrl: Link known events onto RDT_RESOURCE_INTEL_AET.evt_list
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
` (9 preceding siblings ...)
2025-03-21 23:16 ` [PATCH v2 10/16] x86/resctrl: Allocate per-package structures for known events Tony Luck
@ 2025-03-21 23:16 ` Tony Luck
2025-03-31 16:23 ` Reinette Chatre
2025-03-21 23:16 ` [PATCH v2 12/16] x86/resctrl: Build lookup table for package events Tony Luck
` (4 subsequent siblings)
15 siblings, 1 reply; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:16 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
Core code uses this list to populate "mon_data" directories.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/resctrl/intel_aet.c | 23 ++++++++++++++++++++++-
1 file changed, 22 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index e2d8eab997fc..9ac912742ef1 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -13,6 +13,7 @@
#include <linux/cpu.h>
#include <linux/cleanup.h>
+#include <linux/minmax.h>
#include <linux/slab.h>
#include "fake_intel_aet_features.h"
#include <linux/intel_vsec.h>
@@ -308,14 +309,34 @@ void rdt_get_intel_aet_mount(void)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_INTEL_AET].r_resctrl;
struct rdt_core_mon_domain *d, *tmp;
+ struct telem_entry **tentry;
static int do_one_time;
+ struct mon_evt *evt;
+ bool ret = false;
if (do_one_time)
return;
do_one_time = 1;
- if (!get_events()) {
+ if (!get_events())
+ goto done;
+
+ for (tentry = telem_entry; *tentry; tentry++) {
+ if (!(*tentry)->active)
+ continue;
+ for (int i = 0; (*tentry)->evts[i].evt.name; i++) {
+ evt = &(*tentry)->evts[i].evt;
+ list_add_tail(&evt->list, &r->evt_list);
+ ret = true;
+ }
+ if (!r->num_rmid)
+ r->num_rmid = (*tentry)->num_rmids;
+ else
+ r->num_rmid = min(r->num_rmid, (*tentry)->num_rmids);
+ }
+done:
+ if (!ret) {
list_for_each_entry_safe(d, tmp, &r->mon_domains, hdr.list)
kfree(d);
r->mon_capable = false;
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* Re: [PATCH v2 11/16] x86/resctrl: Link known events onto RDT_RESOURCE_INTEL_AET.evt_list
2025-03-21 23:16 ` [PATCH v2 11/16] x86/resctrl: Link known events onto RDT_RESOURCE_INTEL_AET.evt_list Tony Luck
@ 2025-03-31 16:23 ` Reinette Chatre
2025-03-31 22:29 ` Luck, Tony
0 siblings, 1 reply; 37+ messages in thread
From: Reinette Chatre @ 2025-03-31 16:23 UTC (permalink / raw)
To: Tony Luck, Fenghua Yu, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches
Hi Tony,
On 3/21/25 4:16 PM, Tony Luck wrote:
> Core code uses this list to populate "mon_data" directories.
>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
> arch/x86/kernel/cpu/resctrl/intel_aet.c | 23 ++++++++++++++++++++++-
> 1 file changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> index e2d8eab997fc..9ac912742ef1 100644
> --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> @@ -13,6 +13,7 @@
>
> #include <linux/cpu.h>
> #include <linux/cleanup.h>
> +#include <linux/minmax.h>
> #include <linux/slab.h>
> #include "fake_intel_aet_features.h"
> #include <linux/intel_vsec.h>
> @@ -308,14 +309,34 @@ void rdt_get_intel_aet_mount(void)
> {
> struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_INTEL_AET].r_resctrl;
> struct rdt_core_mon_domain *d, *tmp;
> + struct telem_entry **tentry;
> static int do_one_time;
> + struct mon_evt *evt;
> + bool ret = false;
>
> if (do_one_time)
> return;
>
> do_one_time = 1;
>
> - if (!get_events()) {
> + if (!get_events())
> + goto done;
> +
> + for (tentry = telem_entry; *tentry; tentry++) {
> + if (!(*tentry)->active)
> + continue;
> + for (int i = 0; (*tentry)->evts[i].evt.name; i++) {
> + evt = &(*tentry)->evts[i].evt;
> + list_add_tail(&evt->list, &r->evt_list);
> + ret = true;
> + }
Architecture code should not be doing this. I expect this will be something
similar to l3_mon_evt_init() done by fs code after the architecture had
opportunity to configure which events are supported.
I wonder if resctrl_mon_resource_init() could be moved to rdt_get_tree()
to be called after the new (yet to be named) "resctrl_arch_for_arch_to_do_needed_on_resctrl_mount()"
resctrl_mon_resource_init() could be enhanced to do any needed resctrl fs
initialization for this new feature. This will include being able to
learn the accurate counts of rmid supported by the system to be able to
create monitor groups that can be supported by all monitoring resources?
> + if (!r->num_rmid)
> + r->num_rmid = (*tentry)->num_rmids;
> + else
> + r->num_rmid = min(r->num_rmid, (*tentry)->num_rmids);
> + }
> +done:
> + if (!ret) {
> list_for_each_entry_safe(d, tmp, &r->mon_domains, hdr.list)
> kfree(d);
> r->mon_capable = false;
Reinette
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [PATCH v2 11/16] x86/resctrl: Link known events onto RDT_RESOURCE_INTEL_AET.evt_list
2025-03-31 16:23 ` Reinette Chatre
@ 2025-03-31 22:29 ` Luck, Tony
0 siblings, 0 replies; 37+ messages in thread
From: Luck, Tony @ 2025-03-31 22:29 UTC (permalink / raw)
To: Reinette Chatre
Cc: Fenghua Yu, Maciej Wieczor-Retman, Peter Newman, James Morse,
Babu Moger, Drew Fustini, Dave Martin, linux-kernel, patches
On Mon, Mar 31, 2025 at 09:23:14AM -0700, Reinette Chatre wrote:
> Hi Tony,
>
> On 3/21/25 4:16 PM, Tony Luck wrote:
> > Core code uses this list to populate "mon_data" directories.
> >
> > Signed-off-by: Tony Luck <tony.luck@intel.com>
> > ---
> > arch/x86/kernel/cpu/resctrl/intel_aet.c | 23 ++++++++++++++++++++++-
> > 1 file changed, 22 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> > index e2d8eab997fc..9ac912742ef1 100644
> > --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
> > +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> > @@ -13,6 +13,7 @@
> >
> > #include <linux/cpu.h>
> > #include <linux/cleanup.h>
> > +#include <linux/minmax.h>
> > #include <linux/slab.h>
> > #include "fake_intel_aet_features.h"
> > #include <linux/intel_vsec.h>
> > @@ -308,14 +309,34 @@ void rdt_get_intel_aet_mount(void)
> > {
> > struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_INTEL_AET].r_resctrl;
> > struct rdt_core_mon_domain *d, *tmp;
> > + struct telem_entry **tentry;
> > static int do_one_time;
> > + struct mon_evt *evt;
> > + bool ret = false;
> >
> > if (do_one_time)
> > return;
> >
> > do_one_time = 1;
> >
> > - if (!get_events()) {
> > + if (!get_events())
> > + goto done;
> > +
> > + for (tentry = telem_entry; *tentry; tentry++) {
> > + if (!(*tentry)->active)
> > + continue;
> > + for (int i = 0; (*tentry)->evts[i].evt.name; i++) {
> > + evt = &(*tentry)->evts[i].evt;
> > + list_add_tail(&evt->list, &r->evt_list);
> > + ret = true;
> > + }
>
> Architecture code should not be doing this. I expect this will be something
> similar to l3_mon_evt_init() done by fs code after the architecture had
> opportunity to configure which events are supported.
If we do put all possible events into the architecture code, then it can
do something like:
for_each_set_bit(evt, &rdt_mon_features, NUM_EVENTS)
list_add(...);
My speculative patch:
https://lore.kernel.org/all/20250314202609.5753-1-tony.luck@intel.com/
did this for the existing events, so could build on it.
> I wonder if resctrl_mon_resource_init() could be moved to rdt_get_tree()
> to be called after the new (yet to be named) "resctrl_arch_for_arch_to_do_needed_on_resctrl_mount()"
> resctrl_mon_resource_init() could be enhanced to do any needed resctrl fs
> initialization for this new feature. This will include being able to
> learn the accurate counts of rmid supported by the system to be able to
> create monitor groups that can be supported by all monitoring resources?
>
> > + if (!r->num_rmid)
> > + r->num_rmid = (*tentry)->num_rmids;
> > + else
> > + r->num_rmid = min(r->num_rmid, (*tentry)->num_rmids);
> > + }
> > +done:
> > + if (!ret) {
> > list_for_each_entry_safe(d, tmp, &r->mon_domains, hdr.list)
> > kfree(d);
> > r->mon_capable = false;
>
> Reinette
-Tony
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH v2 12/16] x86/resctrl: Build lookup table for package events
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
` (10 preceding siblings ...)
2025-03-21 23:16 ` [PATCH v2 11/16] x86/resctrl: Link known events onto RDT_RESOURCE_INTEL_AET.evt_list Tony Luck
@ 2025-03-21 23:16 ` Tony Luck
2025-03-21 23:16 ` [PATCH v2 13/16] x86/resctrl: Add code to display core telemetry events Tony Luck
` (3 subsequent siblings)
15 siblings, 0 replies; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:16 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
The resctrl filesystem saves the evt_type in the private data pointer
of the kernfs_node of each file in the mon_data directories.
To print the values for each file the show() function will need to map
from this evtid to:
num_rmids - to make sure data for this file exists
guid - to pick the pmt_event(s) for each package
stride/offset - to compute MMIO offset for this RMID/event
Build a lookup table for each event to save searching through
lists and add macros for ease of use.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/resctrl/intel_aet.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index 9ac912742ef1..bab8e4de26b3 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -39,6 +39,18 @@ enum pmt_event_id {
PMT_NUM_EVENTS
};
+static struct evtinfo {
+ struct telem_entry *telem_entry;
+ struct pmt_event *pmt_event;
+} evtinfo[PMT_NUM_EVENTS];
+
+#define EVT_NUM_RMIDS(evtid) (evtinfo[evtid].telem_entry->num_rmids)
+#define EVT_STRIDE(evtid) (evtinfo[evtid].telem_entry->stride)
+#define EVT_GUID(evtid) (evtinfo[evtid].telem_entry->guid)
+
+#define EVT_OFFSET(evtid) (evtinfo[evtid].pmt_event->evt_offset)
+#define EVT_TYPE(evtid) (evtinfo[evtid].pmt_event->evt_type)
+
/**
* enum evt_type - Type for values for each event.
* @EVT_U64: Integer up to 64 bits
@@ -328,6 +340,9 @@ void rdt_get_intel_aet_mount(void)
for (int i = 0; (*tentry)->evts[i].evt.name; i++) {
evt = &(*tentry)->evts[i].evt;
list_add_tail(&evt->list, &r->evt_list);
+
+ evtinfo[evt->evtid].telem_entry = *tentry;
+ evtinfo[evt->evtid].pmt_event = &(*tentry)->evts[i];
ret = true;
}
if (!r->num_rmid)
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH v2 13/16] x86/resctrl: Add code to display core telemetry events
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
` (11 preceding siblings ...)
2025-03-21 23:16 ` [PATCH v2 12/16] x86/resctrl: Build lookup table for package events Tony Luck
@ 2025-03-21 23:16 ` Tony Luck
2025-03-31 16:23 ` Reinette Chatre
2025-03-21 23:16 ` [PATCH v2 14/16] x86/resctrl: Add status files to info/PKG_MON Tony Luck
` (2 subsequent siblings)
15 siblings, 1 reply; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:16 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
These can be read from any CPU. Rely on the smp_call*() functions
picking the current CPU when given a free choice from cpu_online_mask.
There may be multiple devices tracking each package, so scan all of them
and add up counters.
Output format depends on the data type. Either a 63 bit integer, or a
fixed point decimal.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
include/linux/resctrl.h | 3 ++
fs/resctrl/internal.h | 4 +-
arch/x86/kernel/cpu/resctrl/intel_aet.c | 53 +++++++++++++++++++++++++
fs/resctrl/ctrlmondata.c | 23 ++++++++++-
fs/resctrl/monitor.c | 23 +++++++++--
5 files changed, 100 insertions(+), 6 deletions(-)
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 999e0802a26e..e900764393f4 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -532,8 +532,11 @@ void resctrl_exit(void);
#ifdef CONFIG_INTEL_AET_RESCTRL
void rdt_get_intel_aet_mount(void);
+bool intel_aet_read_event(int domid, int rmid, int evtid, u64 *val, bool *fptype);
#else
static inline void rdt_get_intel_aet_mount(void) { }
+static inline bool intel_aet_read_event(int domid, int rmid, int evtid, u64 *val,
+ bool *fptype) { return false; }
#endif
#endif /* _RESCTRL_H */
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index f5a698b49e97..4d65a781034e 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -98,6 +98,7 @@ struct mon_data {
* domains in @r sharing L3 @ci.id
* @evtid: Which monitor event to read.
* @first: Initialize MBM counter when true.
+ * @fptype:If true indicates @val is in 46.18 fixed point format
* @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
* @err: Error encountered when reading counter.
* @val: Returned value of event counter. If @rgrp is a parent resource group,
@@ -112,6 +113,7 @@ struct rmid_read {
struct rdt_mon_domain *d;
unsigned int evtid;
bool first;
+ bool fptype;
struct cacheinfo *ci;
int err;
u64 val;
@@ -343,7 +345,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg);
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
- cpumask_t *cpumask, int evtid, int first);
+ const cpumask_t *cpumask, int evtid, int first);
int resctrl_mon_resource_init(void);
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index bab8e4de26b3..41ebb2ee9b41 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -357,3 +357,56 @@ void rdt_get_intel_aet_mount(void)
r->mon_capable = false;
}
}
+
+#define VALID_BIT BIT_ULL(63)
+#define DATA_BITS GENMASK_ULL(62, 0)
+
+/*
+ * Walk the array of telemetry groups on a specific package.
+ * Read and sum values for a specific counter (described by
+ * guid and offset).
+ * Return failure (~0x0ull) if any counter isn't valid.
+ */
+static u64 scan_pmt_devs(int package, int guid, int offset)
+{
+ u64 rval, val;
+ int ndev = 0;
+
+ rval = 0;
+
+ for (int i = 0; i < pkg_info[package].count; i++) {
+ if (pkg_info[package].regions[i].guid != guid)
+ continue;
+ ndev++;
+ val = readq(pkg_info[package].regions[i].addr + offset);
+
+ if (!(val & VALID_BIT))
+ return ~0ull;
+ rval += val & DATA_BITS;
+ }
+
+ return ndev ? rval : ~0ull;
+}
+
+/*
+ * Read counter for an event on a domain (summing all aggregators
+ * on the domain).
+ */
+bool intel_aet_read_event(int domid, int rmid, int evtid, u64 *val, bool *fptype)
+{
+ u64 evtcount;
+ int offset;
+
+ if (rmid >= EVT_NUM_RMIDS(evtid))
+ return false;
+
+ offset = rmid * EVT_STRIDE(evtid);
+ offset += EVT_OFFSET(evtid);
+ evtcount = scan_pmt_devs(domid, EVT_GUID(evtid), offset);
+ *fptype = evtid == PMT_EVENT_ENERGY || evtid == PMT_EVENT_ACTIVITY;
+
+ if (evtcount != ~0ull || *val == 0)
+ *val += evtcount;
+
+ return evtcount != ~0ull;
+}
diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
index d56b78450a99..5612f5f64574 100644
--- a/fs/resctrl/ctrlmondata.c
+++ b/fs/resctrl/ctrlmondata.c
@@ -548,7 +548,7 @@ struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
- cpumask_t *cpumask, int evtid, int first)
+ const cpumask_t *cpumask, int evtid, int first)
{
int cpu;
@@ -585,6 +585,21 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx);
}
+#define NUM_FRAC_BITS 18
+#define FRAC_MASK GENMASK(NUM_FRAC_BITS - 1, 0)
+
+static void show_fp_value(struct seq_file *m, u64 val)
+{
+ u64 frac;
+
+ frac = val & FRAC_MASK;
+ frac = frac * 1000000;
+ frac += 1ul << (NUM_FRAC_BITS - 1);
+ frac >>= NUM_FRAC_BITS;
+
+ seq_printf(m, "%llu.%06llu\n", val >> NUM_FRAC_BITS, frac);
+}
+
int rdtgroup_mondata_show(struct seq_file *m, void *arg)
{
struct kernfs_open_file *of = m->private;
@@ -594,6 +609,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
u32 resid, evtid, domid;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
+ const cpumask_t *mask;
struct mon_data *md;
int ret = 0;
@@ -642,7 +658,8 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
goto out;
}
d = container_of(hdr, struct rdt_mon_domain, hdr);
- mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false);
+ mask = (resid == RDT_RESOURCE_L3) ? &d->hdr.cpu_mask : cpu_online_mask;
+ mon_event_read(&rr, r, d, rdtgrp, mask, evtid, false);
}
checkresult:
@@ -651,6 +668,8 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
seq_puts(m, "Error\n");
else if (rr.err == -EINVAL)
seq_puts(m, "Unavailable\n");
+ else if (rr.fptype)
+ show_fp_value(m, rr.val);
else
seq_printf(m, "%llu\n", rr.val);
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index 3fe21dcf0fde..f4049ae5344c 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -447,6 +447,24 @@ static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
m->prev_bw = cur_bw;
}
+static int mon_event_count_one_group(int closid, int rmid, struct rmid_read *rr)
+{
+ bool ret;
+
+ switch (rr->r->rid) {
+ case RDT_RESOURCE_L3:
+ rr->fptype = false;
+ return __mon_event_count(closid, rmid, rr);
+ case RDT_RESOURCE_INTEL_AET:
+ ret = intel_aet_read_event(rr->d->hdr.id, rmid, rr->evtid, &rr->val, &rr->fptype);
+ if (!ret)
+ rr->err = -EINVAL;
+ return ret ? 0 : -EINVAL;
+ }
+
+ return -EINVAL;
+}
+
/*
* This is scheduled by mon_event_read() to read the CQM/MBM counters
* on a domain.
@@ -460,7 +478,7 @@ void mon_event_count(void *info)
rdtgrp = rr->rgrp;
- ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr);
+ ret = mon_event_count_one_group(rdtgrp->closid, rdtgrp->mon.rmid, rr);
/*
* For Ctrl groups read data from child monitor groups and
@@ -471,8 +489,7 @@ void mon_event_count(void *info)
if (rdtgrp->type == RDTCTRL_GROUP) {
list_for_each_entry(entry, head, mon.crdtgrp_list) {
- if (__mon_event_count(entry->closid, entry->mon.rmid,
- rr) == 0)
+ if (mon_event_count_one_group(entry->closid, entry->mon.rmid, rr) == 0)
ret = 0;
}
}
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* Re: [PATCH v2 13/16] x86/resctrl: Add code to display core telemetry events
2025-03-21 23:16 ` [PATCH v2 13/16] x86/resctrl: Add code to display core telemetry events Tony Luck
@ 2025-03-31 16:23 ` Reinette Chatre
2025-03-31 22:42 ` Luck, Tony
0 siblings, 1 reply; 37+ messages in thread
From: Reinette Chatre @ 2025-03-31 16:23 UTC (permalink / raw)
To: Tony Luck, Fenghua Yu, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches
Hi Tony,
(nit: "Add code to" can be dropped from shortlog)
On 3/21/25 4:16 PM, Tony Luck wrote:
> These can be read from any CPU. Rely on the smp_call*() functions
> picking the current CPU when given a free choice from cpu_online_mask.
>
> There may be multiple devices tracking each package, so scan all of them
> and add up counters.
>
> Output format depends on the data type. Either a 63 bit integer, or a
> fixed point decimal.
>
At this point the architecture and fs code is very intertwined. I hope that
some of the items I mentioned in earlier patches will help to support a clear
separation that will make the code that follows from here on easier to split
between arch and fs.
For example, I think this may end up with the new event enums defined in
include/linux/resctrl_types.h to support new architectural helpers
that take the enum as argument that the fs code can use to request the
event value from the architecture.
Reinette
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH v2 13/16] x86/resctrl: Add code to display core telemetry events
2025-03-31 16:23 ` Reinette Chatre
@ 2025-03-31 22:42 ` Luck, Tony
0 siblings, 0 replies; 37+ messages in thread
From: Luck, Tony @ 2025-03-31 22:42 UTC (permalink / raw)
To: Reinette Chatre
Cc: Fenghua Yu, Maciej Wieczor-Retman, Peter Newman, James Morse,
Babu Moger, Drew Fustini, Dave Martin, linux-kernel, patches
On Mon, Mar 31, 2025 at 09:23:48AM -0700, Reinette Chatre wrote:
> Hi Tony,
>
> (nit: "Add code to" can be dropped from shortlog)
>
> On 3/21/25 4:16 PM, Tony Luck wrote:
> > These can be read from any CPU. Rely on the smp_call*() functions
> > picking the current CPU when given a free choice from cpu_online_mask.
> >
> > There may be multiple devices tracking each package, so scan all of them
> > and add up counters.
> >
> > Output format depends on the data type. Either a 63 bit integer, or a
> > fixed point decimal.
> >
>
> At this point the architecture and fs code is very intertwined. I hope that
> some of the items I mentioned in earlier patches will help to support a clear
> separation that will make the code that follows from here on easier to split
> between arch and fs.
> For example, I think this may end up with the new event enums defined in
> include/linux/resctrl_types.h to support new architectural helpers
> that take the enum as argument that the fs code can use to request the
> event value from the architecture.
I have a solution for the separtion for this. Each mon_evt structure
gets two new fields.
The first is "bool any_cpu;" if this is set to true the event can
be read from any CPU (and we can use James suggestion to pick from
"online_cpu_mask" instead of "d->hdr.cpu_mask" and let the optimizations
in smp_call*() avoid the IPI.
The second is a "enum format" field that specifies how to display the
value returned to FS code from architecture. Existing events all
print as a decimal number. I need to add the binary fixed point with
18 binary places to be printed as a floating point number.
Note that this solution needs to copy these fields from the
mon_evt structure to the mon_data_bits union (this is easier
if James' pending patch to convert to a mon_data structure removes the
restriction that all fields fit into 32-bits).
>
> Reinette
-Tony
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH v2 14/16] x86/resctrl: Add status files to info/PKG_MON
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
` (12 preceding siblings ...)
2025-03-21 23:16 ` [PATCH v2 13/16] x86/resctrl: Add code to display core telemetry events Tony Luck
@ 2025-03-21 23:16 ` Tony Luck
2025-03-21 23:16 ` [PATCH v2 15/16] x86/resctrl: Enable package event monitoring Tony Luck
2025-03-21 23:16 ` [PATCH v2 16/16] x86/resctrl: Update Documentation for package events Tony Luck
15 siblings, 0 replies; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:16 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
Each of the package event groups includes status about the number
of missed events and timestamps (from a 25 MHz clock) on last time
an update was missed and last time an update was processed.
Add a three info files to report per-aggregator values of these
status values.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
include/linux/resctrl.h | 12 ++++
fs/resctrl/internal.h | 2 +
arch/x86/kernel/cpu/resctrl/intel_aet.c | 90 ++++++++++++++++++++++---
fs/resctrl/rdtgroup.c | 21 ++++++
4 files changed, 117 insertions(+), 8 deletions(-)
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index e900764393f4..9f6a0b26eec3 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -533,10 +533,22 @@ void resctrl_exit(void);
#ifdef CONFIG_INTEL_AET_RESCTRL
void rdt_get_intel_aet_mount(void);
bool intel_aet_read_event(int domid, int rmid, int evtid, u64 *val, bool *fptype);
+int rdtgroup_last_update_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v);
+int rdtgroup_overflows_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v);
+int rdtgroup_overflow_timestamp_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v);
#else
static inline void rdt_get_intel_aet_mount(void) { }
static inline bool intel_aet_read_event(int domid, int rmid, int evtid, u64 *val,
bool *fptype) { return false; }
+static inline int rdtgroup_last_update_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v) { return 0; };
+static inline int rdtgroup_overflows_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v) { return 0; };
+static inline int rdtgroup_overflow_timestamp_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v) { return 0; };
#endif
#endif /* _RESCTRL_H */
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index 4d65a781034e..53590b1fa8c3 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -236,6 +236,8 @@ struct rdtgroup {
#define RFTYPE_TOP_INFO (RFTYPE_INFO | RFTYPE_TOP)
+#define RFTYPE_PKG_INFO (RFTYPE_INFO | RFTYPE_RES_PKG)
+
#define RFTYPE_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL)
#define RFTYPE_MON_BASE (RFTYPE_BASE | RFTYPE_MON)
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index 41ebb2ee9b41..28692c6ec425 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -358,6 +358,11 @@ void rdt_get_intel_aet_mount(void)
}
}
+enum ops {
+ DO_SUM_EVENT,
+ DO_PRINTVALS
+};
+
#define VALID_BIT BIT_ULL(63)
#define DATA_BITS GENMASK_ULL(62, 0)
@@ -367,22 +372,29 @@ void rdt_get_intel_aet_mount(void)
* guid and offset).
* Return failure (~0x0ull) if any counter isn't valid.
*/
-static u64 scan_pmt_devs(int package, int guid, int offset)
+static u64 scan_pmt_devs(struct seq_file *m, int package, int guid, int offset, enum ops op)
{
- u64 rval, val;
+ u64 rval = 0, val;
+ char *sep = "";
int ndev = 0;
- rval = 0;
-
for (int i = 0; i < pkg_info[package].count; i++) {
if (pkg_info[package].regions[i].guid != guid)
continue;
ndev++;
val = readq(pkg_info[package].regions[i].addr + offset);
- if (!(val & VALID_BIT))
- return ~0ull;
- rval += val & DATA_BITS;
+ switch (op) {
+ case DO_SUM_EVENT:
+ if (!(val & VALID_BIT))
+ return ~0ull;
+ rval += val & DATA_BITS;
+ break;
+ case DO_PRINTVALS:
+ seq_printf(m, "%s0x%llx", sep, val);
+ sep = ",";
+ break;
+ }
}
return ndev ? rval : ~0ull;
@@ -402,7 +414,7 @@ bool intel_aet_read_event(int domid, int rmid, int evtid, u64 *val, bool *fptype
offset = rmid * EVT_STRIDE(evtid);
offset += EVT_OFFSET(evtid);
- evtcount = scan_pmt_devs(domid, EVT_GUID(evtid), offset);
+ evtcount = scan_pmt_devs(NULL, domid, EVT_GUID(evtid), offset, DO_SUM_EVENT);
*fptype = evtid == PMT_EVENT_ENERGY || evtid == PMT_EVENT_ACTIVITY;
if (evtcount != ~0ull || *val == 0)
@@ -410,3 +422,65 @@ bool intel_aet_read_event(int domid, int rmid, int evtid, u64 *val, bool *fptype
return evtcount != ~0ull;
}
+
+static void status_show(struct seq_file *seq, char *name, int guid, int offset)
+{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_INTEL_AET].r_resctrl;
+ struct rdt_mon_domain *d;
+ char *sep = "";
+
+ seq_printf(seq, "%s: ", name);
+
+ cpus_read_lock();
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
+ seq_printf(seq, "%s%d=[", sep, d->hdr.id);
+ scan_pmt_devs(seq, d->hdr.id, guid, offset, DO_PRINTVALS);
+ seq_puts(seq, "]");
+ sep = ";";
+ }
+ cpus_read_unlock();
+
+ seq_puts(seq, "\n");
+}
+
+int rdtgroup_last_update_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct telem_entry **t;
+
+ for (t = telem_entry; *t; t++) {
+ if (!(*t)->active)
+ continue;
+ status_show(seq, (*t)->name, (*t)->guid, (*t)->last_update_tstamp_off);
+ }
+
+ return 0;
+}
+
+int rdtgroup_overflows_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct telem_entry **t;
+
+ for (t = telem_entry; *t; t++) {
+ if (!(*t)->active)
+ continue;
+ status_show(seq, (*t)->name, (*t)->guid, (*t)->overflow_counter_off);
+ }
+
+ return 0;
+}
+
+int rdtgroup_overflow_timestamp_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct telem_entry **t;
+
+ for (t = telem_entry; *t; t++) {
+ if (!(*t)->active)
+ continue;
+ status_show(seq, (*t)->name, (*t)->guid, (*t)->last_overflow_tstamp_off);
+ }
+
+ return 0;
+}
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index d3919642aa9b..9021c60fd05b 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -1971,6 +1971,27 @@ static struct rftype res_common_files[] = {
.seq_show = rdtgroup_closid_show,
.fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG,
},
+ {
+ .name = "last_update",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdtgroup_last_update_show,
+ .fflags = RFTYPE_PKG_INFO,
+ },
+ {
+ .name = "overflows",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdtgroup_overflows_show,
+ .fflags = RFTYPE_PKG_INFO,
+ },
+ {
+ .name = "overflow_timestamp",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdtgroup_overflow_timestamp_show,
+ .fflags = RFTYPE_PKG_INFO,
+ },
};
static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH v2 15/16] x86/resctrl: Enable package event monitoring
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
` (13 preceding siblings ...)
2025-03-21 23:16 ` [PATCH v2 14/16] x86/resctrl: Add status files to info/PKG_MON Tony Luck
@ 2025-03-21 23:16 ` Tony Luck
2025-03-21 23:16 ` [PATCH v2 16/16] x86/resctrl: Update Documentation for package events Tony Luck
15 siblings, 0 replies; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:16 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
All the code to support this is in place. Unconditionally enable
the package resource during early initialization so that domain
lists are built.
Enumeration of the package events happens at first mount, if there
are no package scoped events, then this resource is disabled.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/resctrl/intel_aet.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index 28692c6ec425..593e838fe5b6 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -293,6 +293,8 @@ int rdt_get_intel_aet_mon_config(void)
INIT_LIST_HEAD(&r->evt_list);
+ r->mon_capable = true;
+
return 1;
}
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH v2 16/16] x86/resctrl: Update Documentation for package events
2025-03-21 23:15 [PATCH v2 00/16] x86/resctrl telemetry monitoring Tony Luck
` (14 preceding siblings ...)
2025-03-21 23:16 ` [PATCH v2 15/16] x86/resctrl: Enable package event monitoring Tony Luck
@ 2025-03-21 23:16 ` Tony Luck
15 siblings, 0 replies; 37+ messages in thread
From: Tony Luck @ 2025-03-21 23:16 UTC (permalink / raw)
To: Fenghua Yu, Reinette Chatre, Maciej Wieczor-Retman, Peter Newman,
James Morse, Babu Moger, Drew Fustini, Dave Martin
Cc: linux-kernel, patches, Tony Luck
Each "mon_data" directory is now divided between L3 events and package
events.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
Documentation/filesystems/resctrl.rst | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst
index 6768fc1fad16..0e915b7c55aa 100644
--- a/Documentation/filesystems/resctrl.rst
+++ b/Documentation/filesystems/resctrl.rst
@@ -366,15 +366,22 @@ When control is enabled all CTRL_MON groups will also contain:
When monitoring is enabled all MON groups will also contain:
"mon_data":
- This contains a set of files organized by L3 domain and by
- RDT event. E.g. on a system with two L3 domains there will
- be subdirectories "mon_L3_00" and "mon_L3_01". Each of these
- directories have one file per event (e.g. "llc_occupancy",
- "mbm_total_bytes", and "mbm_local_bytes"). In a MON group these
- files provide a read out of the current value of the event for
- all tasks in the group. In CTRL_MON groups these files provide
- the sum for all tasks in the CTRL_MON group and all tasks in
- MON groups. Please see example section for more details on usage.
+ This contains a set of directories, one for each instance
+ of an L3 cache, or of a processor package. The L3 cache
+ directories are named "mon_L3_00", "mon_L3_01" etc. The
+ package directories "mon_PKG_00", "mon_PKG_01" etc.
+
+ Within each directory there is one file per event. In
+ the L3 directories: "llc_occupancy", "mbm_total_bytes",
+ and "mbm_local_bytes". In the PKG directories: "core_energy",
+ "activity", etc.
+
+ In a MON group these files provide a read out of the current
+ value of the event for all tasks in the group. In CTRL_MON groups
+ these files provide the sum for all tasks in the CTRL_MON group
+ and all tasks in MON groups. Please see example section for more
+ details on usage.
+
On systems with Sub-NUMA Cluster (SNC) enabled there are extra
directories for each node (located within the "mon_L3_XX" directory
for the L3 cache they occupy). These are named "mon_sub_L3_YY"
--
2.48.1
^ permalink raw reply related [flat|nested] 37+ messages in thread