[PATCH v2 0/4] perf python: Add missing infra pieces for counting perf events

linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v2 0/4] perf python: Add missing infra pieces for counting perf events
@ 2025-05-12  5:57 Gautam Menghani
  2025-05-12  5:57 ` [PATCH v2 1/4] perf python: Add support for perf_counts_values to return counter data Gautam Menghani
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Gautam Menghani @ 2025-05-12  5:57 UTC (permalink / raw)
  To: peterz, mingo, acme, namhyung, mark.rutland, alexander.shishkin,
	jolsa, irogers, adrian.hunter, kan.liang
  Cc: Gautam Menghani, linux-perf-users, linux-kernel, maddy

Add the missing infra code in the perf python bindings for measuring and
reading the counter values for the given perf event. Demonstrate the
usage of this with counting.py - a python version of counting.c

v1 -> v2:
1. Use the existing iteration support for evlist
2. Drop the use of next method
3. Use existing helper functions for python example

Gautam Menghani (4):
  perf python: Add support for perf_counts_values to return counter data
  perf python: Add evsel read method
  perf python: Add evlist close support
  perf python: Add counting.py as example for counting perf events

 tools/perf/python/counting.py |  33 +++++++++
 tools/perf/util/python.c      | 131 +++++++++++++++++++++++++++++++++-
 2 files changed, 163 insertions(+), 1 deletion(-)
 create mode 100755 tools/perf/python/counting.py

-- 
2.49.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2 1/4] perf python: Add support for perf_counts_values to return counter data
  2025-05-12  5:57 [PATCH v2 0/4] perf python: Add missing infra pieces for counting perf events Gautam Menghani
@ 2025-05-12  5:57 ` Gautam Menghani
  2025-05-12  5:57 ` [PATCH v2 2/4] perf python: Add evsel read method Gautam Menghani
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: Gautam Menghani @ 2025-05-12  5:57 UTC (permalink / raw)
  To: peterz, mingo, acme, namhyung, mark.rutland, alexander.shishkin,
	jolsa, irogers, adrian.hunter, kan.liang
  Cc: Gautam Menghani, linux-perf-users, linux-kernel, maddy

Add support for perf_counts_values struct to enable the python
bindings to read and return the counter data.

Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
---
 tools/perf/util/python.c | 92 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index f3c05da25b4a..011ee2e27797 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -626,6 +626,92 @@ static int pyrf_thread_map__setup_types(void)
 	return PyType_Ready(&pyrf_thread_map__type);
 }
 
+struct pyrf_counts_values {
+	PyObject_HEAD
+
+	struct perf_counts_values values;
+};
+
+static const char pyrf_counts_values__doc[] = PyDoc_STR("perf counts values object.");
+
+static void pyrf_counts_values__delete(struct pyrf_counts_values *pcounts_values)
+{
+	Py_TYPE(pcounts_values)->tp_free((PyObject *)pcounts_values);
+}
+
+#define counts_values_member_def(member, ptype, help) \
+	{ #member, ptype, \
+	  offsetof(struct pyrf_counts_values, values.member), \
+	  0, help }
+
+static PyMemberDef pyrf_counts_values_members[] = {
+	counts_values_member_def(val, Py_T_ULONG, "Value of event"),
+	counts_values_member_def(ena, Py_T_ULONG, "Time for which enabled"),
+	counts_values_member_def(run, Py_T_ULONG, "Time for which running"),
+	counts_values_member_def(id, Py_T_ULONG, "Unique ID for an event"),
+	counts_values_member_def(lost, Py_T_ULONG, "Num of lost samples"),
+	{NULL}
+};
+
+static PyObject *pyrf_counts_values_get_values(struct pyrf_counts_values *self, void *closure)
+{
+	PyObject *vals = PyList_New(5);
+
+	if (!vals)
+		return NULL;
+	for (int i = 0; i < 5; i++)
+		PyList_SetItem(vals, i, PyLong_FromLong(self->values.values[i]));
+
+	return vals;
+}
+
+static int pyrf_counts_values_set_values(struct pyrf_counts_values *self, PyObject *list,
+					 void *closure)
+{
+	Py_ssize_t size;
+	PyObject *item = NULL;
+
+	if (!PyList_Check(list)) {
+		PyErr_SetString(PyExc_TypeError, "Value assigned must be a list");
+		return -1;
+	}
+
+	size = PyList_Size(list);
+	for (Py_ssize_t i = 0; i < size; i++) {
+		item = PyList_GetItem(list, i);
+		if (!PyLong_Check(item)) {
+			PyErr_SetString(PyExc_TypeError, "List members should be numbers");
+			return -1;
+		}
+		self->values.values[i] = PyLong_AsLong(item);
+	}
+
+	return 0;
+}
+
+static PyGetSetDef pyrf_counts_values_getset[] = {
+	{"values", (getter)pyrf_counts_values_get_values, (setter)pyrf_counts_values_set_values,
+		"Name field", NULL},
+	{NULL}
+};
+
+static PyTypeObject pyrf_counts_values__type = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	.tp_name	= "perf.counts_values",
+	.tp_basicsize	= sizeof(struct pyrf_counts_values),
+	.tp_dealloc	= (destructor)pyrf_counts_values__delete,
+	.tp_flags	= Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+	.tp_doc		= pyrf_counts_values__doc,
+	.tp_members	= pyrf_counts_values_members,
+	.tp_getset	= pyrf_counts_values_getset,
+};
+
+static int pyrf_counts_values__setup_types(void)
+{
+	pyrf_counts_values__type.tp_new = PyType_GenericNew;
+	return PyType_Ready(&pyrf_counts_values__type);
+}
+
 struct pyrf_evsel {
 	PyObject_HEAD
 
@@ -1442,7 +1528,8 @@ PyMODINIT_FUNC PyInit_perf(void)
 	    pyrf_evlist__setup_types() < 0 ||
 	    pyrf_evsel__setup_types() < 0 ||
 	    pyrf_thread_map__setup_types() < 0 ||
-	    pyrf_cpu_map__setup_types() < 0)
+	    pyrf_cpu_map__setup_types() < 0 ||
+	    pyrf_counts_values__setup_types() < 0)
 		return module;
 
 	/* The page_size is placed in util object. */
@@ -1487,6 +1574,9 @@ PyMODINIT_FUNC PyInit_perf(void)
 	Py_INCREF(&pyrf_cpu_map__type);
 	PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type);
 
+	Py_INCREF(&pyrf_counts_values__type);
+	PyModule_AddObject(module, "counts_values", (PyObject *)&pyrf_counts_values__type);
+
 	dict = PyModule_GetDict(module);
 	if (dict == NULL)
 		goto error;
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 2/4] perf python: Add evsel read method
  2025-05-12  5:57 [PATCH v2 0/4] perf python: Add missing infra pieces for counting perf events Gautam Menghani
  2025-05-12  5:57 ` [PATCH v2 1/4] perf python: Add support for perf_counts_values to return counter data Gautam Menghani
@ 2025-05-12  5:57 ` Gautam Menghani
  2025-05-12  5:57 ` [PATCH v2 3/4] perf python: Add evlist close support Gautam Menghani
  2025-05-12  5:57 ` [PATCH v2 4/4] perf python: Add counting.py as example for counting perf events Gautam Menghani
  3 siblings, 0 replies; 12+ messages in thread
From: Gautam Menghani @ 2025-05-12  5:57 UTC (permalink / raw)
  To: peterz, mingo, acme, namhyung, mark.rutland, alexander.shishkin,
	jolsa, irogers, adrian.hunter, kan.liang
  Cc: Gautam Menghani, linux-perf-users, linux-kernel, maddy

Add the evsel read method to enable python to read counter data for the
given evsel.

Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
---
 tools/perf/util/python.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 011ee2e27797..5a4d2c9aaabd 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -867,6 +867,23 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
 	return Py_None;
 }
 
+static PyObject *pyrf_evsel__read(struct pyrf_evsel *pevsel,
+				  PyObject *args, PyObject *kwargs)
+{
+	struct evsel *evsel = &pevsel->evsel;
+	int cpu_map_idx = 0, thread = 0;
+	struct perf_counts_values counts;
+	struct pyrf_counts_values *count_values = PyObject_New(struct pyrf_counts_values,
+							       &pyrf_counts_values__type);
+
+	if (!PyArg_ParseTuple(args, "ii", &cpu_map_idx, &thread))
+		return NULL;
+
+	perf_evsel__read(&(evsel->core), cpu_map_idx, thread, &counts);
+	count_values->values = counts;
+	return (PyObject *)count_values;
+}
+
 static PyObject *pyrf_evsel__str(PyObject *self)
 {
 	struct pyrf_evsel *pevsel = (void *)self;
@@ -885,6 +902,12 @@ static PyMethodDef pyrf_evsel__methods[] = {
 		.ml_flags = METH_VARARGS | METH_KEYWORDS,
 		.ml_doc	  = PyDoc_STR("open the event selector file descriptor table.")
 	},
+	{
+		.ml_name  = "read",
+		.ml_meth  = (PyCFunction)pyrf_evsel__read,
+		.ml_flags = METH_VARARGS | METH_KEYWORDS,
+		.ml_doc	  = PyDoc_STR("read counters")
+	},
 	{ .ml_name = NULL, }
 };
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 3/4] perf python: Add evlist close support
  2025-05-12  5:57 [PATCH v2 0/4] perf python: Add missing infra pieces for counting perf events Gautam Menghani
  2025-05-12  5:57 ` [PATCH v2 1/4] perf python: Add support for perf_counts_values to return counter data Gautam Menghani
  2025-05-12  5:57 ` [PATCH v2 2/4] perf python: Add evsel read method Gautam Menghani
@ 2025-05-12  5:57 ` Gautam Menghani
  2025-05-12  5:57 ` [PATCH v2 4/4] perf python: Add counting.py as example for counting perf events Gautam Menghani
  3 siblings, 0 replies; 12+ messages in thread
From: Gautam Menghani @ 2025-05-12  5:57 UTC (permalink / raw)
  To: peterz, mingo, acme, namhyung, mark.rutland, alexander.shishkin,
	jolsa, irogers, adrian.hunter, kan.liang
  Cc: Gautam Menghani, linux-perf-users, linux-kernel, maddy

Add support for the evlist close function.

Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
---
v1 -> v2:
1. Drop the support for next function

 tools/perf/util/python.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 5a4d2c9aaabd..245ad4a4257a 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -1163,6 +1163,16 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist,
 	return Py_None;
 }
 
+static PyObject *pyrf_evlist__close(struct pyrf_evlist *pevlist)
+{
+	struct evlist *evlist = &pevlist->evlist;
+
+	evlist__close(evlist);
+
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
 static PyObject *pyrf_evlist__config(struct pyrf_evlist *pevlist)
 {
 	struct record_opts opts = {
@@ -1221,6 +1231,12 @@ static PyMethodDef pyrf_evlist__methods[] = {
 		.ml_flags = METH_VARARGS | METH_KEYWORDS,
 		.ml_doc	  = PyDoc_STR("open the file descriptors.")
 	},
+	{
+		.ml_name  = "close",
+		.ml_meth  = (PyCFunction)pyrf_evlist__close,
+		.ml_flags = METH_NOARGS,
+		.ml_doc	  = PyDoc_STR("close the file descriptors.")
+	},
 	{
 		.ml_name  = "poll",
 		.ml_meth  = (PyCFunction)pyrf_evlist__poll,
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 4/4] perf python: Add counting.py as example for counting perf events
  2025-05-12  5:57 [PATCH v2 0/4] perf python: Add missing infra pieces for counting perf events Gautam Menghani
                   ` (2 preceding siblings ...)
  2025-05-12  5:57 ` [PATCH v2 3/4] perf python: Add evlist close support Gautam Menghani
@ 2025-05-12  5:57 ` Gautam Menghani
  2025-05-12 17:23   ` Ian Rogers
  3 siblings, 1 reply; 12+ messages in thread
From: Gautam Menghani @ 2025-05-12  5:57 UTC (permalink / raw)
  To: peterz, mingo, acme, namhyung, mark.rutland, alexander.shishkin,
	jolsa, irogers, adrian.hunter, kan.liang
  Cc: Gautam Menghani, linux-perf-users, linux-kernel, maddy

Add counting.py - a python version of counting.c to demonstrate
measuring and reading of counts for given perf events.

Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
---
v1 -> v2:
1. Use existing iteration support instead of next
2. Read the counters on all cpus
3. Use existing helper functions

 tools/perf/python/counting.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100755 tools/perf/python/counting.py

diff --git a/tools/perf/python/counting.py b/tools/perf/python/counting.py
new file mode 100755
index 000000000000..e535e3ae8bdf
--- /dev/null
+++ b/tools/perf/python/counting.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- python -*-
+# -*- coding: utf-8 -*-
+
+import perf
+
+def main():
+        cpus = perf.cpu_map()
+        thread_map = perf.thread_map(-1)
+        evlist = perf.parse_events("cpu-clock,task-clock", cpus, thread_map)
+
+        for ev in evlist:
+            ev.read_format = perf.FORMAT_TOTAL_TIME_ENABLED | perf.FORMAT_TOTAL_TIME_RUNNING
+
+        evlist.open()
+        evlist.enable()
+
+        count = 100000
+        while count > 0:
+            count -= 1
+
+        evlist.disable()
+
+        for evsel in evlist:
+            for cpu in cpus:
+                for thread in range(len(thread_map)):
+                    counts = evsel.read(cpu, thread)
+                    print(f"For {evsel} val: {counts.val} enable: {counts.ena} run: {counts.run}")
+
+        evlist.close()
+
+if __name__ == '__main__':
+    main()
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 4/4] perf python: Add counting.py as example for counting perf events
  2025-05-12  5:57 ` [PATCH v2 4/4] perf python: Add counting.py as example for counting perf events Gautam Menghani
@ 2025-05-12 17:23   ` Ian Rogers
  2025-05-12 17:49     ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 12+ messages in thread
From: Ian Rogers @ 2025-05-12 17:23 UTC (permalink / raw)
  To: Gautam Menghani, namhyung, acme
  Cc: peterz, mingo, mark.rutland, alexander.shishkin, jolsa,
	adrian.hunter, kan.liang, linux-perf-users, linux-kernel, maddy

On Sun, May 11, 2025 at 10:58 PM Gautam Menghani <gautam@linux.ibm.com> wrote:
>
> Add counting.py - a python version of counting.c to demonstrate
> measuring and reading of counts for given perf events.
>
> Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
> ---
> v1 -> v2:
> 1. Use existing iteration support instead of next
> 2. Read the counters on all cpus
> 3. Use existing helper functions
>
>  tools/perf/python/counting.py | 34 ++++++++++++++++++++++++++++++++++
>  1 file changed, 34 insertions(+)
>  create mode 100755 tools/perf/python/counting.py
>
> diff --git a/tools/perf/python/counting.py b/tools/perf/python/counting.py
> new file mode 100755
> index 000000000000..e535e3ae8bdf
> --- /dev/null
> +++ b/tools/perf/python/counting.py
> @@ -0,0 +1,34 @@
> +#!/usr/bin/env python3
> +# SPDX-License-Identifier: GPL-2.0
> +# -*- python -*-
> +# -*- coding: utf-8 -*-
> +
> +import perf
> +
> +def main():
> +        cpus = perf.cpu_map()
> +        thread_map = perf.thread_map(-1)
> +        evlist = perf.parse_events("cpu-clock,task-clock", cpus, thread_map)

Thanks Gautam! I think this is really good. Perhaps the events could
be a command line option, but I can see why you want to keep this
similar to counting.c.

> +
> +        for ev in evlist:
> +            ev.read_format = perf.FORMAT_TOTAL_TIME_ENABLED | perf.FORMAT_TOTAL_TIME_RUNNING
> +
> +        evlist.open()
> +        evlist.enable()
> +
> +        count = 100000
> +        while count > 0:
> +            count -= 1
> +
> +        evlist.disable()
> +
> +        for evsel in evlist:
> +            for cpu in cpus:
> +                for thread in range(len(thread_map)):

I kind of wish, for the reason of being intention revealing, this could just be:

for thread in thread_map:

I can see the problem though, the counts lack the thread_map and the
thread_map is needed to turn a thread back into an index. Perhaps when
the python counts is created we hold onto the evsel so that this is
possible. I also suspect that in the code:

for cpu in cpus:

The CPU number is being used rather than its index, which is a similar
story/problem.

Arnaldo, could you give some input on what to do wrt indices, threads
and CPUs at the API level? Perhaps we need a refactor and objects for
perf CPU and perf thread, similar to the use of struct perf_cpu in the
C code. The original API all pre-dates that change. The issue is that
changing the API could break existing scripts and we can only fix
those that ship with perf.

Thanks,
Ian

> +                    counts = evsel.read(cpu, thread)
> +                    print(f"For {evsel} val: {counts.val} enable: {counts.ena} run: {counts.run}")
> +
> +        evlist.close()
> +
> +if __name__ == '__main__':
> +    main()
> --
> 2.49.0
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 4/4] perf python: Add counting.py as example for counting perf events
  2025-05-12 17:23   ` Ian Rogers
@ 2025-05-12 17:49     ` Arnaldo Carvalho de Melo
  2025-05-12 19:38       ` Ian Rogers
  0 siblings, 1 reply; 12+ messages in thread
From: Arnaldo Carvalho de Melo @ 2025-05-12 17:49 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Gautam Menghani, namhyung, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, adrian.hunter, kan.liang,
	linux-perf-users, linux-kernel, maddy

On Mon, May 12, 2025 at 10:23:39AM -0700, Ian Rogers wrote:
> On Sun, May 11, 2025 at 10:58 PM Gautam Menghani <gautam@linux.ibm.com> wrote:
> > Add counting.py - a python version of counting.c to demonstrate
> > measuring and reading of counts for given perf events.

> > Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
> > ---
> > v1 -> v2:
> > 1. Use existing iteration support instead of next
> > 2. Read the counters on all cpus
> > 3. Use existing helper functions
> >
> >  tools/perf/python/counting.py | 34 ++++++++++++++++++++++++++++++++++
> >  1 file changed, 34 insertions(+)
> >  create mode 100755 tools/perf/python/counting.py

> > diff --git a/tools/perf/python/counting.py b/tools/perf/python/counting.py
> > new file mode 100755
> > index 000000000000..e535e3ae8bdf
> > --- /dev/null
> > +++ b/tools/perf/python/counting.py
> > @@ -0,0 +1,34 @@
> > +#!/usr/bin/env python3
> > +# SPDX-License-Identifier: GPL-2.0
> > +# -*- python -*-
> > +# -*- coding: utf-8 -*-
> > +
> > +import perf
> > +
> > +def main():
> > +        cpus = perf.cpu_map()
> > +        thread_map = perf.thread_map(-1)
> > +        evlist = perf.parse_events("cpu-clock,task-clock", cpus, thread_map)
 
> Thanks Gautam! I think this is really good. Perhaps the events could
> be a command line option, but I can see why you want to keep this
> similar to counting.c.
 
> > +
> > +        for ev in evlist:
> > +            ev.read_format = perf.FORMAT_TOTAL_TIME_ENABLED | perf.FORMAT_TOTAL_TIME_RUNNING
> > +
> > +        evlist.open()
> > +        evlist.enable()
> > +
> > +        count = 100000
> > +        while count > 0:
> > +            count -= 1
> > +
> > +        evlist.disable()
> > +
> > +        for evsel in evlist:
> > +            for cpu in cpus:
> > +                for thread in range(len(thread_map)):

> I kind of wish, for the reason of being intention revealing, this could just be:
 
> for thread in thread_map:

> I can see the problem though, the counts lack the thread_map and the
> thread_map is needed to turn a thread back into an index. Perhaps when
> the python counts is created we hold onto the evsel so that this is
> possible. I also suspect that in the code:
 
> for cpu in cpus:
 
> The CPU number is being used rather than its index, which is a similar
> story/problem.

Lemme see the rest of this code...

+static PyObject *pyrf_evsel__read(struct pyrf_evsel *pevsel,
+                                 PyObject *args, PyObject *kwargs)
+{
+       struct evsel *evsel = &pevsel->evsel;
+       int cpu_map_idx = 0, thread = 0;
+       struct perf_counts_values counts;
+       struct pyrf_counts_values *count_values = PyObject_New(struct pyrf_counts_values,
+                                                              &pyrf_counts_values__type);
+
+       if (!PyArg_ParseTuple(args, "ii", &cpu_map_idx, &thread))
+               return NULL;
+
+       perf_evsel__read(&(evsel->core), cpu_map_idx, thread, &counts);
+       count_values->values = counts;
+       return (PyObject *)count_values;
+}

Yeah, it is expecting the cpu_map_idx but the cpu number is being used,
that is a bug.

The way perf_evsel__read() is implemented:

int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
                     struct perf_counts_values *count)

It expects a cpu_map index, not a cpu and then a thread that in its
prototype seems to imply its not an index? But it is an index as it ends
up being the 'y' for:

  xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread))

:-/

So probably its best to do it using indexes and when needing to know the
pid or cpu number then use some helper to get the entry at the given
entry? At least for the perf_evsel__read() API that seems to be the
case, right?
 
> Arnaldo, could you give some input on what to do wrt indices, threads
> and CPUs at the API level? Perhaps we need a refactor and objects for
> perf CPU and perf thread, similar to the use of struct perf_cpu in the
> C code. The original API all pre-dates that change. The issue is that
> changing the API could break existing scripts and we can only fix
> those that ship with perf.

So the original user of the perf python binding was:

https://git.kernel.org/pub/scm/utils/tuna/tuna.git/tree/tuna/gui/procview.py

That does basically what the above example does:

    def perf_init(self):
        self.cpu_map = perf.cpu_map()
        self.thread_map = perf.thread_map()
        self.evsel_cycles = perf.evsel(task=1, comm=1, wakeup_events=1, \
            watermark=1, sample_type=perf.SAMPLE_CPU | perf.SAMPLE_TID)
        self.evsel_cycles.open(cpus=self.cpu_map, threads=self.thread_map)
        self.evlist = perf.evlist(self.cpu_map, self.thread_map)
        self.evlist.add(self.evsel_cycles)
        self.evlist.mmap()
        self.pollfd = self.evlist.get_pollfd()
        for f in self.pollfd:
            GObject.io_add_watch(f, GObject.IO_IN, self.perf_process_events)
        self.perf_counter = {}

Then:

    def perf_process_events(self, source, condition):
        had_events = True
        while had_events: 
            had_events = False
            for cpu in self.cpu_map:
                event = self.evlist.read_on_cpu(cpu)
                if event:
                    had_events = True
                    if event.type == perf.RECORD_FORK:
                        if event.pid == event.tid:
                            try:
                                self.ps.processes[event.pid] = procfs.process(event.pid)
                            except: # short lived thread
                                pass
                        else: 
                            if event.pid in self.ps.processes:
                                try:
                                    self.ps.processes[event.pid].threads.processes[event.tid] = procfs.process(event.tid)
                                except (AttributeError, KeyError):
                                    try:
                                        self.ps.processes[event.pid].threads = procfs.pidstats("/proc/%d/task/" % event.pid)
                                    except:
                                        pass
                    elif event.type == perf.RECORD_EXIT:
                        del self.ps[int(event.tid)]
                    elif event.type == perf.RECORD_SAMPLE:
                        tid = event.sample_tid
                        if tid in self.perf_counter:
                            self.perf_counter[tid] += event.sample_period
                        else:
                            self.perf_counter[tid] = event.sample_period

        self.evlist_added = True  # Mark that event arrived, so next periodic show() will refresh GUI
        return True


So it was more for catching new/dead threads without having to process /proc.

- Arnaldo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 4/4] perf python: Add counting.py as example for counting perf events
  2025-05-12 17:49     ` Arnaldo Carvalho de Melo
@ 2025-05-12 19:38       ` Ian Rogers
  2025-05-13 20:50         ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 12+ messages in thread
From: Ian Rogers @ 2025-05-12 19:38 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Gautam Menghani, namhyung, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, adrian.hunter, kan.liang,
	linux-perf-users, linux-kernel, maddy

On Mon, May 12, 2025 at 10:49 AM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> On Mon, May 12, 2025 at 10:23:39AM -0700, Ian Rogers wrote:
> > On Sun, May 11, 2025 at 10:58 PM Gautam Menghani <gautam@linux.ibm.com> wrote:
> > > Add counting.py - a python version of counting.c to demonstrate
> > > measuring and reading of counts for given perf events.
>
> > > Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
> > > ---
> > > v1 -> v2:
> > > 1. Use existing iteration support instead of next
> > > 2. Read the counters on all cpus
> > > 3. Use existing helper functions
> > >
> > >  tools/perf/python/counting.py | 34 ++++++++++++++++++++++++++++++++++
> > >  1 file changed, 34 insertions(+)
> > >  create mode 100755 tools/perf/python/counting.py
>
> > > diff --git a/tools/perf/python/counting.py b/tools/perf/python/counting.py
> > > new file mode 100755
> > > index 000000000000..e535e3ae8bdf
> > > --- /dev/null
> > > +++ b/tools/perf/python/counting.py
> > > @@ -0,0 +1,34 @@
> > > +#!/usr/bin/env python3
> > > +# SPDX-License-Identifier: GPL-2.0
> > > +# -*- python -*-
> > > +# -*- coding: utf-8 -*-
> > > +
> > > +import perf
> > > +
> > > +def main():
> > > +        cpus = perf.cpu_map()
> > > +        thread_map = perf.thread_map(-1)
> > > +        evlist = perf.parse_events("cpu-clock,task-clock", cpus, thread_map)
>
> > Thanks Gautam! I think this is really good. Perhaps the events could
> > be a command line option, but I can see why you want to keep this
> > similar to counting.c.
>
> > > +
> > > +        for ev in evlist:
> > > +            ev.read_format = perf.FORMAT_TOTAL_TIME_ENABLED | perf.FORMAT_TOTAL_TIME_RUNNING
> > > +
> > > +        evlist.open()
> > > +        evlist.enable()
> > > +
> > > +        count = 100000
> > > +        while count > 0:
> > > +            count -= 1
> > > +
> > > +        evlist.disable()
> > > +
> > > +        for evsel in evlist:
> > > +            for cpu in cpus:
> > > +                for thread in range(len(thread_map)):
>
> > I kind of wish, for the reason of being intention revealing, this could just be:
>
> > for thread in thread_map:
>
> > I can see the problem though, the counts lack the thread_map and the
> > thread_map is needed to turn a thread back into an index. Perhaps when
> > the python counts is created we hold onto the evsel so that this is
> > possible. I also suspect that in the code:
>
> > for cpu in cpus:
>
> > The CPU number is being used rather than its index, which is a similar
> > story/problem.
>
> Lemme see the rest of this code...
>
> +static PyObject *pyrf_evsel__read(struct pyrf_evsel *pevsel,
> +                                 PyObject *args, PyObject *kwargs)
> +{
> +       struct evsel *evsel = &pevsel->evsel;
> +       int cpu_map_idx = 0, thread = 0;
> +       struct perf_counts_values counts;
> +       struct pyrf_counts_values *count_values = PyObject_New(struct pyrf_counts_values,
> +                                                              &pyrf_counts_values__type);
> +
> +       if (!PyArg_ParseTuple(args, "ii", &cpu_map_idx, &thread))
> +               return NULL;
> +
> +       perf_evsel__read(&(evsel->core), cpu_map_idx, thread, &counts);
> +       count_values->values = counts;
> +       return (PyObject *)count_values;
> +}
>
> Yeah, it is expecting the cpu_map_idx but the cpu number is being used,
> that is a bug.
>
> The way perf_evsel__read() is implemented:
>
> int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
>                      struct perf_counts_values *count)
>
> It expects a cpu_map index, not a cpu and then a thread that in its
> prototype seems to imply its not an index? But it is an index as it ends
> up being the 'y' for:
>
>   xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread))
>
> :-/

Yeah. In the C code we've pretty much committed to notions of cpu map
index and CPU. We're more ambiguous with threads, but generally thread
is actually thread index into the thread map. As you say it is for the
xyarray so that we can densely pack things by index rather than having
huge gaps, say between PIDs. For the python we don't have to have a
1:1 mapping with the C code, so I was wondering if we could just
remove the notions of index and have them be implementation details?
This would lead to an unfortunate O(log n) translation between
thread/CPU and index (perf_cpu_map__idx) at the API boundary in
python.c.

> So probably its best to do it using indexes and when needing to know the
> pid or cpu number then use some helper to get the entry at the given
> entry? At least for the perf_evsel__read() API that seems to be the
> case, right?
>
> > Arnaldo, could you give some input on what to do wrt indices, threads
> > and CPUs at the API level? Perhaps we need a refactor and objects for
> > perf CPU and perf thread, similar to the use of struct perf_cpu in the
> > C code. The original API all pre-dates that change. The issue is that
> > changing the API could break existing scripts and we can only fix
> > those that ship with perf.
>
> So the original user of the perf python binding was:
>
> https://git.kernel.org/pub/scm/utils/tuna/tuna.git/tree/tuna/gui/procview.py
>
> That does basically what the above example does:
>
>     def perf_init(self):
>         self.cpu_map = perf.cpu_map()
>         self.thread_map = perf.thread_map()
>         self.evsel_cycles = perf.evsel(task=1, comm=1, wakeup_events=1, \
>             watermark=1, sample_type=perf.SAMPLE_CPU | perf.SAMPLE_TID)
>         self.evsel_cycles.open(cpus=self.cpu_map, threads=self.thread_map)
>         self.evlist = perf.evlist(self.cpu_map, self.thread_map)
>         self.evlist.add(self.evsel_cycles)
>         self.evlist.mmap()
>         self.pollfd = self.evlist.get_pollfd()
>         for f in self.pollfd:
>             GObject.io_add_watch(f, GObject.IO_IN, self.perf_process_events)
>         self.perf_counter = {}
>
> Then:
>
>     def perf_process_events(self, source, condition):
>         had_events = True
>         while had_events:
>             had_events = False
>             for cpu in self.cpu_map:
>                 event = self.evlist.read_on_cpu(cpu)
>                 if event:
>                     had_events = True
>                     if event.type == perf.RECORD_FORK:
>                         if event.pid == event.tid:
>                             try:
>                                 self.ps.processes[event.pid] = procfs.process(event.pid)
>                             except: # short lived thread
>                                 pass
>                         else:
>                             if event.pid in self.ps.processes:
>                                 try:
>                                     self.ps.processes[event.pid].threads.processes[event.tid] = procfs.process(event.tid)
>                                 except (AttributeError, KeyError):
>                                     try:
>                                         self.ps.processes[event.pid].threads = procfs.pidstats("/proc/%d/task/" % event.pid)
>                                     except:
>                                         pass
>                     elif event.type == perf.RECORD_EXIT:
>                         del self.ps[int(event.tid)]
>                     elif event.type == perf.RECORD_SAMPLE:
>                         tid = event.sample_tid
>                         if tid in self.perf_counter:
>                             self.perf_counter[tid] += event.sample_period
>                         else:
>                             self.perf_counter[tid] = event.sample_period
>
>         self.evlist_added = True  # Mark that event arrived, so next periodic show() will refresh GUI
>         return True
>
>
> So it was more for catching new/dead threads without having to process /proc.

Yeah. I think the sampling API is okay. The nice thing with Gautum's
patches is adding support for counters for use-cases like perf stat.

Thanks,
Ian

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 4/4] perf python: Add counting.py as example for counting perf events
  2025-05-12 19:38       ` Ian Rogers
@ 2025-05-13 20:50         ` Arnaldo Carvalho de Melo
  2025-05-13 20:59           ` Ian Rogers
  0 siblings, 1 reply; 12+ messages in thread
From: Arnaldo Carvalho de Melo @ 2025-05-13 20:50 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Gautam Menghani, namhyung, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, adrian.hunter, kan.liang,
	linux-perf-users, linux-kernel, maddy

On Mon, May 12, 2025 at 12:38:23PM -0700, Ian Rogers wrote:
> On Mon, May 12, 2025 at 10:49 AM Arnaldo Carvalho de Melo
> <acme@kernel.org> wrote:
> >
> > On Mon, May 12, 2025 at 10:23:39AM -0700, Ian Rogers wrote:
> > > On Sun, May 11, 2025 at 10:58 PM Gautam Menghani <gautam@linux.ibm.com> wrote:
> > > > Add counting.py - a python version of counting.c to demonstrate
> > > > measuring and reading of counts for given perf events.
> >
> > > > Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
> > > > ---
> > > > v1 -> v2:
> > > > 1. Use existing iteration support instead of next
> > > > 2. Read the counters on all cpus
> > > > 3. Use existing helper functions
> > > >
> > > >  tools/perf/python/counting.py | 34 ++++++++++++++++++++++++++++++++++
> > > >  1 file changed, 34 insertions(+)
> > > >  create mode 100755 tools/perf/python/counting.py
> >
> > > > diff --git a/tools/perf/python/counting.py b/tools/perf/python/counting.py
> > > > new file mode 100755
> > > > index 000000000000..e535e3ae8bdf
> > > > --- /dev/null
> > > > +++ b/tools/perf/python/counting.py
> > > > @@ -0,0 +1,34 @@
> > > > +#!/usr/bin/env python3
> > > > +# SPDX-License-Identifier: GPL-2.0
> > > > +# -*- python -*-
> > > > +# -*- coding: utf-8 -*-
> > > > +
> > > > +import perf
> > > > +
> > > > +def main():
> > > > +        cpus = perf.cpu_map()
> > > > +        thread_map = perf.thread_map(-1)
> > > > +        evlist = perf.parse_events("cpu-clock,task-clock", cpus, thread_map)
> >
> > > Thanks Gautam! I think this is really good. Perhaps the events could
> > > be a command line option, but I can see why you want to keep this
> > > similar to counting.c.
> >
> > > > +
> > > > +        for ev in evlist:
> > > > +            ev.read_format = perf.FORMAT_TOTAL_TIME_ENABLED | perf.FORMAT_TOTAL_TIME_RUNNING
> > > > +
> > > > +        evlist.open()
> > > > +        evlist.enable()
> > > > +
> > > > +        count = 100000
> > > > +        while count > 0:
> > > > +            count -= 1
> > > > +
> > > > +        evlist.disable()
> > > > +
> > > > +        for evsel in evlist:
> > > > +            for cpu in cpus:
> > > > +                for thread in range(len(thread_map)):
> >
> > > I kind of wish, for the reason of being intention revealing, this could just be:
> >
> > > for thread in thread_map:
> >
> > > I can see the problem though, the counts lack the thread_map and the
> > > thread_map is needed to turn a thread back into an index. Perhaps when
> > > the python counts is created we hold onto the evsel so that this is
> > > possible. I also suspect that in the code:
> >
> > > for cpu in cpus:
> >
> > > The CPU number is being used rather than its index, which is a similar
> > > story/problem.
> >
> > Lemme see the rest of this code...
> >
> > +static PyObject *pyrf_evsel__read(struct pyrf_evsel *pevsel,
> > +                                 PyObject *args, PyObject *kwargs)
> > +{
> > +       struct evsel *evsel = &pevsel->evsel;
> > +       int cpu_map_idx = 0, thread = 0;
> > +       struct perf_counts_values counts;
> > +       struct pyrf_counts_values *count_values = PyObject_New(struct pyrf_counts_values,
> > +                                                              &pyrf_counts_values__type);
> > +
> > +       if (!PyArg_ParseTuple(args, "ii", &cpu_map_idx, &thread))
> > +               return NULL;
> > +
> > +       perf_evsel__read(&(evsel->core), cpu_map_idx, thread, &counts);
> > +       count_values->values = counts;
> > +       return (PyObject *)count_values;
> > +}
> >
> > Yeah, it is expecting the cpu_map_idx but the cpu number is being used,
> > that is a bug.
> >
> > The way perf_evsel__read() is implemented:
> >
> > int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
> >                      struct perf_counts_values *count)
> >
> > It expects a cpu_map index, not a cpu and then a thread that in its
> > prototype seems to imply its not an index? But it is an index as it ends
> > up being the 'y' for:
> >
> >   xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread))
> >
> > :-/
> 
> Yeah. In the C code we've pretty much committed to notions of cpu map
> index and CPU. We're more ambiguous with threads, but generally thread
> is actually thread index into the thread map. As you say it is for the
> xyarray so that we can densely pack things by index rather than having
> huge gaps, say between PIDs. For the python we don't have to have a
> 1:1 mapping with the C code, so I was wondering if we could just
> remove the notions of index and have them be implementation details?

Agreed, even in the C case I find it confusing to sometimes deal with
indexes and sometimes with real thread/cpu numbers, but if we try and
at least keep the variables/parameter naming reflecting that, then it
should be bearable.

> This would lead to an unfortunate O(log n) translation between
> thread/CPU and index (perf_cpu_map__idx) at the API boundary in
> python.c.

Maybe with some more thinking we can get something better? But I don't
have the bw now to think about it.
 
> > So probably its best to do it using indexes and when needing to know the
> > pid or cpu number then use some helper to get the entry at the given
> > entry? At least for the perf_evsel__read() API that seems to be the
> > case, right?

> > > Arnaldo, could you give some input on what to do wrt indices, threads
> > > and CPUs at the API level? Perhaps we need a refactor and objects for
> > > perf CPU and perf thread, similar to the use of struct perf_cpu in the
> > > C code. The original API all pre-dates that change. The issue is that
> > > changing the API could break existing scripts and we can only fix
> > > those that ship with perf.
> > So it was more for catching new/dead threads without having to process /proc.

<SNIP>
 
> Yeah. I think the sampling API is okay. The nice thing with Gautum's
> patches is adding support for counters for use-cases like perf stat.

Right, I like the effort he is making into having perf more usable in
python, and I encourage him to think about the issues you raised so that
we can come to some good abstractions.

- Arnaldo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 4/4] perf python: Add counting.py as example for counting perf events
  2025-05-13 20:50         ` Arnaldo Carvalho de Melo
@ 2025-05-13 20:59           ` Ian Rogers
  2025-05-13 21:16             ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 12+ messages in thread
From: Ian Rogers @ 2025-05-13 20:59 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Gautam Menghani, namhyung, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, adrian.hunter, kan.liang,
	linux-perf-users, linux-kernel, maddy

On Tue, May 13, 2025 at 1:50 PM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
[snip]
> Right, I like the effort he is making into having perf more usable in
> python, and I encourage him to think about the issues you raised so that
> we can come to some good abstractions.

Thanks Arnaldo, can we be tolerant to API changes in the python from a
"regression" point-of-view? Like avoiding the notion of indices?
Presumably such a fix would also need fixing in all the perf python
scripts, but the external users I worry about. My sense is the number
of external users is minimal, for example, toplev I don't believe is a
user [1].

Ian

[1] https://github.com/andikleen/pmu-tools

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 4/4] perf python: Add counting.py as example for counting perf events
  2025-05-13 20:59           ` Ian Rogers
@ 2025-05-13 21:16             ` Arnaldo Carvalho de Melo
  2025-05-13 21:47               ` Ian Rogers
  0 siblings, 1 reply; 12+ messages in thread
From: Arnaldo Carvalho de Melo @ 2025-05-13 21:16 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Gautam Menghani, namhyung, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, adrian.hunter, kan.liang,
	linux-perf-users, linux-kernel, maddy

On Tue, May 13, 2025 at 01:59:28PM -0700, Ian Rogers wrote:
> On Tue, May 13, 2025 at 1:50 PM Arnaldo Carvalho de Melo
> <acme@kernel.org> wrote:
> [snip]
> > Right, I like the effort he is making into having perf more usable in
> > python, and I encourage him to think about the issues you raised so that
> > we can come to some good abstractions.
> 
> Thanks Arnaldo, can we be tolerant to API changes in the python from a
> "regression" point-of-view? Like avoiding the notion of indices?

But correct me if I am missing something, aren't indices only introduced
with this new patchset?

- Arnaldo

> Presumably such a fix would also need fixing in all the perf python
> scripts, but the external users I worry about. My sense is the number
> of external users is minimal, for example, toplev I don't believe is a
> user [1].
> 
> Ian
> 
> [1] https://github.com/andikleen/pmu-tools

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 4/4] perf python: Add counting.py as example for counting perf events
  2025-05-13 21:16             ` Arnaldo Carvalho de Melo
@ 2025-05-13 21:47               ` Ian Rogers
  0 siblings, 0 replies; 12+ messages in thread
From: Ian Rogers @ 2025-05-13 21:47 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Gautam Menghani, namhyung, peterz, mingo, mark.rutland,
	alexander.shishkin, jolsa, adrian.hunter, kan.liang,
	linux-perf-users, linux-kernel, maddy

On Tue, May 13, 2025 at 2:16 PM Arnaldo Carvalho de Melo
<acme@kernel.org> wrote:
>
> On Tue, May 13, 2025 at 01:59:28PM -0700, Ian Rogers wrote:
> > On Tue, May 13, 2025 at 1:50 PM Arnaldo Carvalho de Melo
> > <acme@kernel.org> wrote:
> > [snip]
> > > Right, I like the effort he is making into having perf more usable in
> > > python, and I encourage him to think about the issues you raised so that
> > > we can come to some good abstractions.
> >
> > Thanks Arnaldo, can we be tolerant to API changes in the python from a
> > "regression" point-of-view? Like avoiding the notion of indices?
>
> But correct me if I am missing something, aren't indices only introduced
> with this new patchset?

Checking the code I think you're right. Unless you do something like
the range loop:
```
 for thread in range(len(thread_map)):
```
so I think we'd all prefer indices not to be a part of the python API.
On the C side we can get indices via helpers like perf_cpu_map__idx,
which will introduce O(log N) overhead - thread map is missing this
currently I believe. For compatibility a CPU and thread should remain
an int.

Thanks,
Ian

> - Arnaldo
>
> > Presumably such a fix would also need fixing in all the perf python
> > scripts, but the external users I worry about. My sense is the number
> > of external users is minimal, for example, toplev I don't believe is a
> > user [1].
> >
> > Ian
> >
> > [1] https://github.com/andikleen/pmu-tools

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2025-05-13 21:48 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-12  5:57 [PATCH v2 0/4] perf python: Add missing infra pieces for counting perf events Gautam Menghani
2025-05-12  5:57 ` [PATCH v2 1/4] perf python: Add support for perf_counts_values to return counter data Gautam Menghani
2025-05-12  5:57 ` [PATCH v2 2/4] perf python: Add evsel read method Gautam Menghani
2025-05-12  5:57 ` [PATCH v2 3/4] perf python: Add evlist close support Gautam Menghani
2025-05-12  5:57 ` [PATCH v2 4/4] perf python: Add counting.py as example for counting perf events Gautam Menghani
2025-05-12 17:23   ` Ian Rogers
2025-05-12 17:49     ` Arnaldo Carvalho de Melo
2025-05-12 19:38       ` Ian Rogers
2025-05-13 20:50         ` Arnaldo Carvalho de Melo
2025-05-13 20:59           ` Ian Rogers
2025-05-13 21:16             ` Arnaldo Carvalho de Melo
2025-05-13 21:47               ` Ian Rogers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).