From: Peter Zijlstra <peterz@infradead.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>, Lin Ming <ming.m.lin@intel.com>,
Stephane Eranian <eranian@google.com>,
"robert.richter" <robert.richter@amd.com>,
Corey Ashford <cjashfor@linux.vnet.ibm.com>,
fweisbec <fweisbec@gmail.com>, paulus <paulus@samba.org>,
Greg Kroah-Hartman <gregkh@suse.de>,
Kay Sievers <kay.sievers@vrfy.org>,
"H. Peter Anvin" <hpa@zytor.com>
Subject: [RFC][PATCH] perf: sysfs type id
Date: Tue, 09 Nov 2010 22:45:19 +0100 [thread overview]
Message-ID: <1289339119.2191.92.camel@laptop> (raw)
The below is a RFC patch adding dynamic type ids to perf.
We need to represent PMUs in sysfs because we want to allow multiple
(loadable) PMUs and need a way to identify them.
This patch creates a new device class "pmu" and adds a single attribute
"type" to it. This device attribute will expose the dynamic type id as
required by perf_event_attr::type.
The sysfs layout looks like:
[root@westmere ~]# cd /sys/class/pmu/
[root@westmere pmu]# ls -la
total 0
drwxr-xr-x 2 root root 0 2010-11-09 22:22 .
drwxr-xr-x 47 root root 0 2010-11-09 22:22 ..
lrwxrwxrwx 1 root root 0 2010-11-09 22:22 breakpoint -> ../../devices/virtual/pmu/breakpoint
lrwxrwxrwx 1 root root 0 2010-11-09 22:22 cpu -> ../../devices/virtual/pmu/cpu
lrwxrwxrwx 1 root root 0 2010-11-09 22:22 frob -> ../../devices/virtual/pmu/frob
lrwxrwxrwx 1 root root 0 2010-11-09 22:22 software -> ../../devices/virtual/pmu/software
lrwxrwxrwx 1 root root 0 2010-11-09 22:22 tracepoint -> ../../devices/virtual/pmu/tracepoint
[root@westmere pmu]# cd frob/
[root@westmere frob]# ls -la
total 0
drwxr-xr-x 3 root root 0 2010-11-09 22:22 .
drwxr-xr-x 7 root root 0 2010-11-09 22:22 ..
drwxr-xr-x 2 root root 0 2010-11-09 22:23 power
lrwxrwxrwx 1 root root 0 2010-11-09 22:23 subsystem -> ../../../../class/pmu
-r--r--r-- 1 root root 4096 2010-11-09 22:23 type
-rw-r--r-- 1 root root 4096 2010-11-09 22:22 uevent
[root@westmere frob]# cat type
6
Not at all sure what all those power bits mean, Greg?
The idea is to populate the sysfs topology with symlinks to these
devices (have /sys/devices/system/cpu/pmu link to the "cpu" pmu device,
have /sys/devices/system/node/ link to a possible "node" pmu device --
intel uncore, etc..). I'll still have to look at how to create these
symlinks, if anybody got clue please holler ;-)
Furthermore, we can later add an event directory to these devices which
list available events and contain the value required by
perf_event_attr::config.
Comments?
---
arch/x86/include/asm/perf_event.h | 2 -
arch/x86/kernel/cpu/common.c | 2 -
arch/x86/kernel/cpu/perf_event.c | 11 ++-
include/linux/perf_event.h | 7 ++-
init/main.c | 2 +-
kernel/hw_breakpoint.c | 2 +-
kernel/perf_event.c | 121 ++++++++++++++++++++++++++++++++----
7 files changed, 122 insertions(+), 25 deletions(-)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 550e26b..d9d4dae 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -125,7 +125,6 @@ union cpuid10_edx {
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
#ifdef CONFIG_PERF_EVENTS
-extern void init_hw_perf_events(void);
extern void perf_events_lapic_init(void);
#define PERF_EVENT_INDEX_OFFSET 0
@@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
}
#else
-static inline void init_hw_perf_events(void) { }
static inline void perf_events_lapic_init(void) { }
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b68bda..9eb2248 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,7 +13,6 @@
#include <linux/io.h>
#include <asm/stackprotector.h>
-#include <asm/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
@@ -894,7 +893,6 @@ void __init identify_boot_cpu(void)
#else
vgetcpu_set_mode();
#endif
- init_hw_perf_events();
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ed63101..04d0f3c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1348,7 +1348,7 @@ static void __init pmu_check_apic(void)
pr_info("no hardware sampling interrupt available.\n");
}
-void __init init_hw_perf_events(void)
+static int __init init_hw_perf_events(void)
{
struct event_constraint *c;
int err;
@@ -1363,11 +1363,11 @@ void __init init_hw_perf_events(void)
err = amd_pmu_init();
break;
default:
- return;
+ return 0;
}
if (err != 0) {
pr_cont("no PMU driver, software events only.\n");
- return;
+ return 0;
}
pmu_check_apic();
@@ -1418,9 +1418,12 @@ void __init init_hw_perf_events(void)
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
- perf_pmu_register(&pmu);
+ perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
perf_cpu_notifier(x86_pmu_notifier);
+
+ return 0;
}
+early_initcall(init_hw_perf_events);
static inline void x86_pmu_read(struct perf_event *event)
{
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 057bf22..aa1117f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -578,6 +578,10 @@ struct perf_event;
struct pmu {
struct list_head entry;
+ struct device *dev;
+ char *name;
+ int type;
+
int * __percpu pmu_disable_count;
struct perf_cpu_context * __percpu pmu_cpu_context;
int task_ctx_nr;
@@ -876,6 +880,7 @@ struct perf_cpu_context {
int exclusive;
struct list_head rotation_list;
int jiffies_interval;
+ int disable_count;
};
struct perf_output_handle {
@@ -891,7 +896,7 @@ struct perf_output_handle {
#ifdef CONFIG_PERF_EVENTS
-extern int perf_pmu_register(struct pmu *pmu);
+extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
extern void perf_pmu_unregister(struct pmu *pmu);
extern int perf_num_counters(void);
diff --git a/init/main.c b/init/main.c
index e59af24..41a0c2f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -588,6 +588,7 @@ asmlinkage void __init start_kernel(void)
sort_main_extable();
trap_init();
mm_init();
+ idr_init_cache();
/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
@@ -659,7 +660,6 @@ asmlinkage void __init start_kernel(void)
enable_debug_pagealloc();
kmemleak_init();
debug_objects_mem_init();
- idr_init_cache();
setup_per_cpu_pageset();
numa_policy_init();
if (late_time_init)
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 2c9120f..a14ca35 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -641,7 +641,7 @@ static int __init init_hw_breakpoint(void)
constraints_initialized = 1;
- perf_pmu_register(&perf_breakpoint);
+ perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
return register_die_notifier(&hw_breakpoint_exceptions_nb);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 517d827..7f0d3ac 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -13,6 +13,7 @@
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/smp.h>
+#include <linux/idr.h>
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/slab.h>
@@ -22,6 +23,7 @@
#include <linux/percpu.h>
#include <linux/ptrace.h>
#include <linux/vmstat.h>
+#include <linux/device.h>
#include <linux/vmalloc.h>
#include <linux/hardirq.h>
#include <linux/rculist.h>
@@ -70,14 +72,16 @@ extern __weak const char *perf_pmu_name(void)
void perf_pmu_disable(struct pmu *pmu)
{
- int *count = this_cpu_ptr(pmu->pmu_disable_count);
+ struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ int *count = &cpuctx->disable_count;
if (!(*count)++)
pmu->pmu_disable(pmu);
}
void perf_pmu_enable(struct pmu *pmu)
{
- int *count = this_cpu_ptr(pmu->pmu_disable_count);
+ struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ int *count = &cpuctx->disable_count;
if (!--(*count))
pmu->pmu_enable(pmu);
}
@@ -4778,7 +4782,7 @@ static struct pmu perf_tracepoint = {
static inline void perf_tp_register(void)
{
- perf_pmu_register(&perf_tracepoint);
+ perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
}
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -5087,6 +5091,9 @@ static void *find_pmu_context(int ctxn)
return NULL;
}
+static struct class *pmu_class;
+static struct idr pmu_idr;
+
static void free_pmu_context(void * __percpu cpu_context)
{
struct pmu *pmu;
@@ -5102,26 +5109,59 @@ static void free_pmu_context(void * __percpu cpu_context)
free_percpu(cpu_context);
out:
+ if (pmu->type >= 0)
+ idr_remove(&pmu_idr, pmu->type);
+
mutex_unlock(&pmus_lock);
+
+ if (pmu->dev)
+ device_unregister(pmu->dev);
}
-int perf_pmu_register(struct pmu *pmu)
+int perf_pmu_register(struct pmu *pmu, char *name, int type)
{
int cpu, ret;
mutex_lock(&pmus_lock);
ret = -ENOMEM;
- pmu->pmu_disable_count = alloc_percpu(int);
- if (!pmu->pmu_disable_count)
- goto unlock;
+ pmu->type = -1;
+ if (!name)
+ goto nodev;
+
+ pmu->name = name;
+ if (type < 0) {
+ int err = idr_pre_get(&pmu_idr, GFP_KERNEL);
+ if (!err) {
+ printk(KERN_ERR "FOO! %d\n", err);
+ goto unlock;
+ }
+ err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type);
+ if (err) {
+ printk(KERN_ERR "BAR! %d\n", err);
+ ret = err;
+ goto unlock;
+ }
+ }
+ pmu->type = type;
+
+ if (pmu_class) {
+ pmu->dev = device_create(pmu_class, NULL, MKDEV(0, 0),
+ pmu, "%s", pmu->name);
+ if (IS_ERR(pmu->dev)) {
+ ret = PTR_ERR(pmu->dev);
+ goto free_idr;
+ }
+ }
+
+nodev:
pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
if (pmu->pmu_cpu_context)
goto got_cpu_context;
pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
if (!pmu->pmu_cpu_context)
- goto free_pdc;
+ goto free_dev;
for_each_possible_cpu(cpu) {
struct perf_cpu_context *cpuctx;
@@ -5132,6 +5172,7 @@ int perf_pmu_register(struct pmu *pmu)
cpuctx->ctx.pmu = pmu;
cpuctx->jiffies_interval = 1;
INIT_LIST_HEAD(&cpuctx->rotation_list);
+ cpuctx->disable_count = 0;
}
got_cpu_context:
@@ -5164,8 +5205,13 @@ unlock:
return ret;
-free_pdc:
- free_percpu(pmu->pmu_disable_count);
+free_dev:
+ if (pmu->dev)
+ device_unregister(pmu->dev);
+
+free_idr:
+ if (pmu->type >= 0)
+ idr_remove(&pmu_idr, pmu->type);
goto unlock;
}
@@ -5182,7 +5228,6 @@ void perf_pmu_unregister(struct pmu *pmu)
synchronize_srcu(&pmus_srcu);
synchronize_rcu();
- free_percpu(pmu->pmu_disable_count);
free_pmu_context(pmu->pmu_cpu_context);
}
@@ -5192,6 +5237,13 @@ struct pmu *perf_init_event(struct perf_event *event)
int idx;
idx = srcu_read_lock(&pmus_srcu);
+
+ rcu_read_lock();
+ pmu = idr_find(&pmu_idr, event->attr.type);
+ rcu_read_unlock();
+ if (pmu)
+ goto unlock;
+
list_for_each_entry_rcu(pmu, &pmus, entry) {
int ret = pmu->event_init(event);
if (!ret)
@@ -6293,13 +6345,54 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
+static ssize_t type_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+
+ return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
+}
+
+static struct device_attribute pmu_dev_attrs[] = {
+ __ATTR_RO(type),
+ __ATTR_NULL,
+};
+
void __init perf_event_init(void)
{
+ idr_init(&pmu_idr);
+
perf_event_init_all_cpus();
init_srcu_struct(&pmus_srcu);
- perf_pmu_register(&perf_swevent);
- perf_pmu_register(&perf_cpu_clock);
- perf_pmu_register(&perf_task_clock);
+ perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
+ perf_pmu_register(&perf_cpu_clock, "frob", -1); /* test the dynamic code */
+ perf_pmu_register(&perf_task_clock, NULL, -1);
perf_tp_register();
perf_cpu_notifier(perf_cpu_notify);
}
+
+int __init perf_event_sysfs_init(void)
+{
+ struct pmu *pmu;
+
+ mutex_lock(&pmus_lock);
+
+ pmu_class = class_create(THIS_MODULE, "pmu");
+ BUG_ON(IS_ERR(pmu_class));
+ pmu_class->dev_attrs = pmu_dev_attrs;
+
+ list_for_each_entry(pmu, &pmus, entry) {
+ if (!pmu->name || pmu->type < 0)
+ continue;
+
+ pmu->dev = device_create(pmu_class, NULL, MKDEV(0, 0),
+ pmu, "%s", pmu->name);
+ if (IS_ERR(pmu->dev))
+ pmu->dev = NULL; /* do we care about the failure? */
+ }
+
+ mutex_unlock(&pmus_lock);
+
+ return 0;
+}
+__initcall(perf_event_sysfs_init);
next reply other threads:[~2010-11-09 21:45 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-11-09 21:45 Peter Zijlstra [this message]
2010-11-09 22:11 ` [RFC][PATCH] perf: sysfs type id Kay Sievers
2010-11-09 22:22 ` Peter Zijlstra
2010-11-09 22:40 ` Kay Sievers
2010-11-09 22:13 ` Greg KH
2010-11-09 23:36 ` Michael Ellerman
[not found] ` <AANLkTi=UftgQn0ydRd2wszqFtpRrkEcW7dzfapKKix_V@mail.gmail.com>
[not found] ` <1289350360.22787.9.camel@concordia>
[not found] ` <AANLkTikGHNkUN6t9rPhdE6XOQiqb5xAzH_9eY6L9h2H2@mail.gmail.com>
2010-11-10 1:10 ` Michael Ellerman
2010-11-10 1:19 ` Kay Sievers
2010-11-10 1:45 ` Michael Ellerman
2010-11-10 1:59 ` Kay Sievers
2010-11-10 3:37 ` Michael Ellerman
2010-11-10 2:11 ` Kay Sievers
2010-11-10 17:31 ` Greg KH
2010-11-10 12:27 ` Peter Zijlstra
2010-11-10 13:36 ` sysfs: Add an 'events' class. (was: Re: [RFC][PATCH] perf: sysfs type id) Ingo Molnar
2010-11-10 14:14 ` Kay Sievers
2010-11-10 15:00 ` Ingo Molnar
2010-11-11 6:39 ` Kay Sievers
2010-11-10 13:01 ` [RFC][PATCH] perf: sysfs type id Stephane Eranian
2010-11-10 14:10 ` Peter Zijlstra
2010-11-10 14:19 ` Peter Zijlstra
2010-11-10 20:08 ` Stephane Eranian
2010-11-10 20:32 ` Peter Zijlstra
2010-11-10 20:53 ` Stephane Eranian
2010-11-10 21:05 ` Peter Zijlstra
2010-11-17 2:35 ` Corey Ashford
2010-11-17 7:02 ` Kyle Moffett
2010-11-17 11:30 ` Peter Zijlstra
2010-11-17 11:25 ` Peter Zijlstra
2010-11-17 19:47 ` Corey Ashford
2010-11-17 19:57 ` Peter Zijlstra
2010-11-17 20:01 ` Peter Zijlstra
2010-11-17 21:39 ` Corey Ashford
2010-11-10 14:24 ` Stephane Eranian
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1289339119.2191.92.camel@laptop \
--to=peterz@infradead.org \
--cc=cjashfor@linux.vnet.ibm.com \
--cc=eranian@google.com \
--cc=fweisbec@gmail.com \
--cc=gregkh@suse.de \
--cc=hpa@zytor.com \
--cc=kay.sievers@vrfy.org \
--cc=linux-kernel@vger.kernel.org \
--cc=ming.m.lin@intel.com \
--cc=mingo@elte.hu \
--cc=paulus@samba.org \
--cc=robert.richter@amd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox