linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 0/3] trace-cruncher: Initial support for perf
@ 2022-02-24 16:37 Tzvetomir Stoyanov (VMware)
  2022-02-24 16:37 ` [RFC PATCH 1/3] trace-cruncher: Logic for resolving address to function name Tzvetomir Stoyanov (VMware)
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2022-02-24 16:37 UTC (permalink / raw)
  To: y.karadz, acme, olsajiri, irogers
  Cc: rostedt, linux-trace-devel, linux-perf-users

Two major functionalities are introduced by this patch set:
 - VMA <-> function name resolving, using bfd library.
 - Support for Linux kernel perf framework, using perf library.

This is still a work in progress. Depends on this patch, not yet merged:
https://lore.kernel.org/linux-perf-users/20220221102628.43904-1-tz.stoyanov@gmail.com/

Tzvetomir Stoyanov (VMware) (3):
  trace-cruncher: Logic for resolving address to function name
  trace-cruncher: Support for perf
  trace-cruncher: perf example

 examples/perf_sampling.py |  51 +++
 setup.py                  |   9 +-
 src/perfpy-utils.c        | 699 ++++++++++++++++++++++++++++++
 src/perfpy-utils.h        |  41 ++
 src/perfpy.c              | 141 ++++++
 src/trace-obj-debug.c     | 873 ++++++++++++++++++++++++++++++++++++++
 src/trace-obj-debug.h     |  52 +++
 7 files changed, 1865 insertions(+), 1 deletion(-)
 create mode 100755 examples/perf_sampling.py
 create mode 100644 src/perfpy-utils.c
 create mode 100644 src/perfpy-utils.h
 create mode 100644 src/perfpy.c
 create mode 100644 src/trace-obj-debug.c
 create mode 100644 src/trace-obj-debug.h

-- 
2.34.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RFC PATCH 1/3] trace-cruncher: Logic for resolving address to function name
  2022-02-24 16:37 [RFC PATCH 0/3] trace-cruncher: Initial support for perf Tzvetomir Stoyanov (VMware)
@ 2022-02-24 16:37 ` Tzvetomir Stoyanov (VMware)
  2022-02-24 16:37 ` [RFC PATCH 2/3] trace-cruncher: Support for perf Tzvetomir Stoyanov (VMware)
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2022-02-24 16:37 UTC (permalink / raw)
  To: y.karadz, acme, olsajiri, irogers
  Cc: rostedt, linux-trace-devel, linux-perf-users

Resolving virtual address to function name and vise versa is useful
functionality for a trace application. Trace-cruncher can use it in two
use cases:
 - Resolving VMA to function name, when collecting user application
   performance traces with perf.
 - Resolving function name to VMA, when using ftarce uprobe dynamic
   events.

Proposed implementation uses the bfd library to parse the binary files
and read the symbol table. This information is available only if the
files are not stripped.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 src/trace-obj-debug.c | 873 ++++++++++++++++++++++++++++++++++++++++++
 src/trace-obj-debug.h |  52 +++
 2 files changed, 925 insertions(+)
 create mode 100644 src/trace-obj-debug.c
 create mode 100644 src/trace-obj-debug.h

diff --git a/src/trace-obj-debug.c b/src/trace-obj-debug.c
new file mode 100644
index 0000000..0d1015a
--- /dev/null
+++ b/src/trace-obj-debug.c
@@ -0,0 +1,873 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ *
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <bfd.h>
+#include <demangle.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <fnmatch.h>
+#include <ctype.h>
+
+#include "trace-obj-debug.h"
+
+//#define DEBUG_INTERNALS
+
+struct trace_debug_handle {
+	bfd *bfd;
+	unsigned long long addr_offset;
+};
+
+enum match_type {
+	MATCH_EXACT	= 0,
+	MATH_WILDCARD	= 1,
+};
+struct debug_symbols {
+	struct debug_symbols		*next;
+	struct tracecmd_debug_symbols	symbol;
+	enum match_type			match;
+};
+
+struct trace_debug_file {
+	struct trace_debug_file		*next;
+	char				*file_name;
+	unsigned long long		vmem_start;
+	unsigned long long		vmem_end;
+	struct trace_debug_handle	*dbg;
+	int				sym_count;
+	struct debug_symbols		*sym; /* symbols to resolve,
+					       * look in this file only
+					       */
+};
+
+struct trace_debug_object {
+	int				pid;
+	char				*fname;
+	struct pid_addr_maps		*fmaps;
+	int				sym_count;
+	struct debug_symbols		*sym;	/* symbols to resolve,
+						 * look into all files
+						 */
+	struct trace_debug_file		*files;
+};
+
+#define RESOLVE_NAME		(1 << 0)
+#define RESOLVE_VMA		(1 << 1)
+#define RESOLVE_FOFFSET		(1 << 2)
+struct trace_obj_job {
+	unsigned int flags;
+	unsigned long long addr_offset;
+	struct debug_symbols *symbols;
+};
+
+struct dwarf_bfd_context {
+	asymbol **table;
+	struct trace_obj_job *job;
+};
+
+static void process_bfd_section(bfd *abfd, asection *section, void *param)
+{
+	struct dwarf_bfd_context *context = (struct dwarf_bfd_context *)param;
+	unsigned int discriminator;
+	const char *functionname;
+	struct debug_symbols *s;
+	unsigned long long vma;
+	const char *filename;
+	unsigned int line;
+	bfd_boolean found;
+
+	if (!(section->flags & SEC_CODE))
+		return;
+
+	for (s = context->job->symbols; s; s = s->next) {
+		if (s->symbol.vma_near)
+			vma = s->symbol.vma_near;
+		else if (s->symbol.vma_start)
+			vma = s->symbol.vma_start;
+		else
+			continue;
+
+		if (abfd->flags & DYNAMIC)
+			vma -=  context->job->addr_offset;
+		if (vma && section->vma <= vma &&
+		    (section->vma + section->size) > vma) {
+			if (!s->symbol.fname)
+				s->symbol.fname = strdup(abfd->filename);
+			if (context->job->flags & RESOLVE_FOFFSET)
+				s->symbol.foffset = section->filepos + (vma - section->vma);
+			if (!s->symbol.name && (context->job->flags & RESOLVE_NAME)) {
+				found = bfd_find_nearest_line_discriminator(abfd, section, context->table,
+									    vma - section->vma, &filename,
+									    &functionname, &line, &discriminator);
+#ifdef DEBUG_INTERNALS
+				printf("\n\r Find addr near 0x%X, offset 0x%X - > vma - 0x%X in %s, found %s \n\r",
+					s->symbol.vma_near, context->job->addr_offset, vma, abfd->filename,
+					found ? functionname : "NA");
+#endif
+				if (found) {
+					s->symbol.name = bfd_demangle(abfd, functionname, DMGL_AUTO);
+					if (!s->symbol.name)
+						s->symbol.name = strdup(functionname);
+				}
+			}
+		}
+	}
+}
+
+static asymbol **get_sym_table(bfd *handle)
+{
+	long size, ssize, dsize;
+	asymbol **symtable;
+	long count;
+
+	if ((bfd_get_file_flags(handle) & HAS_SYMS) == 0)
+		return NULL;
+
+	dsize = bfd_get_dynamic_symtab_upper_bound(handle);
+	size = dsize > 0 ? dsize : 0;
+
+	ssize = bfd_get_symtab_upper_bound(handle);
+	size += ssize > 0 ? ssize : 0;
+
+	if (size <= 0)
+		return NULL;
+
+	symtable = (asymbol **) calloc(1, size);
+	if (!symtable)
+		return NULL;
+
+	count = bfd_canonicalize_symtab(handle, symtable);
+	count += bfd_canonicalize_dynamic_symtab(handle, symtable + count);
+	if (count <= 0) {
+		free(symtable);
+		return NULL;
+	}
+
+	return symtable;
+}
+
+static bool sym_match(char *pattern, enum match_type match, const char *symbol)
+{
+	bool ret = false;
+
+	switch (match) {
+	case MATCH_EXACT:
+		if (strlen(pattern) == strlen(symbol) &&
+		    !strcmp(pattern, symbol))
+			ret = true;
+		break;
+	case MATH_WILDCARD:
+		if (!fnmatch(pattern, symbol, 0))
+			ret = true;
+		break;
+	}
+
+	return ret;
+}
+
+static int lookup_bfd_sym(struct dwarf_bfd_context *context)
+{
+	struct debug_symbols *s, *last = NULL;
+	struct debug_symbols *new, *new_list = NULL;
+	unsigned long long vma;
+	asymbol **sp;
+	int res = 0;
+
+	for (sp = context->table; *sp != NULL; sp++) {
+		if (!((*sp)->flags & BSF_FUNCTION))
+			continue;
+		for (s = context->job->symbols; s; s = s->next) {
+			if (!s->symbol.name)
+				continue;
+			last = s;
+			if (!sym_match(s->symbol.name, s->match, (*sp)->name))
+				continue;
+#ifdef DEBUG_INTERNALS
+			printf("Matched %s, pattern %s\n\r", (*sp)->name, s->symbol.name);
+#endif
+			vma = (*sp)->value + (*sp)->section->vma;
+			if ((*sp)->the_bfd->flags & DYNAMIC)
+				vma += context->job->addr_offset;
+			if (s->match == MATCH_EXACT) { /* exact match */
+				s->symbol.vma_start = vma;
+			} else if (s->match == MATH_WILDCARD) { /* wildcard pattern match */
+				new = calloc(1, sizeof(struct debug_symbols));
+				if (!new)
+					break;
+				new->symbol.name = strdup((*sp)->name);
+				new->symbol.vma_start = vma;
+				new->symbol.vma_near = s->symbol.vma_near;
+				new->symbol.foffset = s->symbol.foffset;
+				if (s->symbol.fname)
+					new->symbol.fname = strdup(s->symbol.fname);
+				new->next = new_list;
+				new_list = new;
+			}
+			res++;
+		}
+	}
+	if (last && !last->next)
+		last->next = new_list;
+
+	return res;
+}
+
+static int process_bfd_object(bfd *abfd, struct trace_obj_job *job)
+{
+	struct dwarf_bfd_context context;
+	int ret = 0;
+
+	memset(&context, 0, sizeof(context));
+	context.job = job;
+	if (bfd_check_format_matches(abfd, bfd_object, NULL) ||
+	    bfd_check_format_matches(abfd, bfd_core, NULL)) {
+		context.table = get_sym_table(abfd);
+		if (!context.table)
+			return -1;
+		if (job->flags & RESOLVE_VMA)
+			lookup_bfd_sym(&context);
+		if ((job->flags & RESOLVE_NAME) || (job->flags & RESOLVE_FOFFSET))
+			bfd_map_over_sections(abfd, process_bfd_section, &context);
+		free(context.table);
+	} else {
+		ret = -1;
+	}
+
+	return ret;
+}
+
+static int read_all_bfd(bfd *abfd, struct trace_obj_job *job)
+{
+	bfd *last_arfile = NULL;
+	bfd *arfile = NULL;
+	int ret = 0;
+
+	if (bfd_check_format(abfd, bfd_archive)) {
+		for (;;) {
+			bfd_set_error(bfd_error_no_error);
+			arfile = bfd_openr_next_archived_file(abfd, arfile);
+			if (!arfile) {
+				if (bfd_get_error() != bfd_error_no_more_archived_files)
+					break;
+			}
+			ret = read_all_bfd(arfile, job);
+			if (last_arfile)
+				bfd_close(last_arfile);
+			last_arfile = arfile;
+		}
+		if (last_arfile)
+			bfd_close(last_arfile);
+	} else
+		ret = process_bfd_object(abfd, job);
+
+	return ret;
+}
+
+/**
+ * resolve_symbol_vma - name -> (vma, file offset) resolving
+ * @obj - pointer to object, returned by trace_obj_debug_create()
+ * @symbols - link list with desired symbols, with given name
+ *
+ * Get VMA and file offset of the symbols with given name
+ * Return 0 on success, -1 on error
+ */
+static int resolve_symbol_vma(struct trace_debug_handle *obj,
+			      struct debug_symbols *symbols)
+{
+	struct trace_obj_job job;
+	int ret;
+
+	memset(&job, 0, sizeof(job));
+	job.flags |= RESOLVE_VMA;
+	job.flags |= RESOLVE_FOFFSET;
+	job.symbols = symbols;
+	job.addr_offset = obj->addr_offset;
+	ret = read_all_bfd(obj->bfd, &job);
+
+	return ret;
+}
+
+/**
+ * resolve_symbol_name - vma -> name resolving
+ * @obj - pointer to object, returned by trace_obj_debug_create()
+ * @symbols - link list with desired symbols, with given VMA
+ *
+ * Get names of the symbols with given VMA, look for nearest symbol to that VMA
+ * Return 0 on success, -1 on error
+ */
+static int resolve_symbol_name(struct trace_debug_handle *obj,
+			       struct debug_symbols *symbols)
+{
+	struct trace_obj_job job;
+
+	if (!obj || !obj->bfd)
+		return -1;
+	memset(&job, 0, sizeof(job));
+	job.flags |= RESOLVE_NAME;
+	job.addr_offset = obj->addr_offset;
+	job.symbols = symbols;
+	return read_all_bfd(obj->bfd, &job);
+}
+
+/**
+ * debug_handle_destroy - Close file opened with trace_obj_debug_create()
+ * @obj - pointer to object, returned by trace_obj_debug_create()
+ *
+ * Close the file and free any allocated resources, related to file's debug
+ * information
+ */
+static void debug_handle_destroy(struct trace_debug_handle *obj)
+{
+	if (obj && obj->bfd)
+		bfd_close(obj->bfd);
+	free(obj);
+}
+
+/**
+ * debug_handle_create - Open binary file for parsing ELF and DWARF information
+ * @name: Name of the binary ELF file.
+ *
+ * Return pointer to trace_obj_debug structure, that can be passed to other APIs
+ * for extracting debug information from the file. NULL in case of an error.
+ */
+static struct trace_debug_handle *debug_handle_create(char *file)
+{
+	struct trace_debug_handle *obj = NULL;
+
+	obj = calloc(1, sizeof(*obj));
+	if (!obj)
+		return NULL;
+
+	bfd_init();
+	obj->bfd = bfd_openr(file, NULL);
+	if (!obj->bfd)
+		goto error;
+	obj->bfd->flags |= BFD_DECOMPRESS;
+
+	return obj;
+
+error:
+	debug_handle_destroy(obj);
+	return NULL;
+}
+
+static void set_vma_offset(struct trace_debug_handle *obj,
+				unsigned long long addr_offset)
+{
+	if (obj)
+		obj->addr_offset = addr_offset;
+}
+
+static char *get_full_name(int pid)
+{
+	char mapname[PATH_MAX+1];
+	char fname[PATH_MAX+1];
+	int ret;
+
+	sprintf(fname, "/proc/%d/exe", pid);
+	ret = readlink(fname, mapname, PATH_MAX);
+	if (ret >= PATH_MAX || ret < 0)
+		return NULL;
+	mapname[ret] = 0;
+
+	return strdup(mapname);
+}
+
+static struct trace_debug_file *get_mapped_file(struct trace_debug_object *dbg,
+						char *fname,
+						unsigned long long vmem_start)
+{
+	struct trace_debug_file *file = dbg->files;
+
+	while (file) {
+		if (!strcmp(fname, file->file_name) &&
+		    vmem_start && file->vmem_end == vmem_start)
+			break;
+		file = file->next;
+	}
+	if (file)
+		return file;
+
+	file = calloc(1, sizeof(*file));
+	if (!file)
+		return NULL;
+	file->file_name = strdup(fname);
+	file->dbg = debug_handle_create(fname);
+	file->next = dbg->files;
+	dbg->files = file;
+	return file;
+}
+
+void trace_debug_obj_destroy(struct trace_debug_object *dbg)
+{
+	struct trace_debug_file *fdel;
+	struct debug_symbols *sdel;
+
+	while (dbg->sym) {
+		sdel = dbg->sym;
+		dbg->sym = dbg->sym->next;
+		free(sdel->symbol.name);
+		free(sdel->symbol.fname);
+		free(sdel);
+	}
+	while (dbg->files) {
+		fdel = dbg->files;
+		dbg->files = dbg->files->next;
+		debug_handle_destroy(fdel->dbg);
+		while (fdel->sym) {
+			sdel = fdel->sym;
+			fdel->sym = fdel->sym->next;
+			free(sdel->symbol.name);
+			free(sdel->symbol.fname);
+			free(sdel);
+		}
+		free(fdel);
+	}
+
+	free(dbg->fname);
+	trace_debug_free_filemap(dbg->fmaps);
+	free(dbg);
+}
+
+int trace_debug_obj_add_file(struct trace_debug_object *dbg, char *file_name,
+			     unsigned long long vmem_start,
+			     unsigned long long vmem_end,
+			     unsigned long long pgoff)
+{
+	struct trace_debug_file *file;
+
+	file = get_mapped_file(dbg, file_name, vmem_start);
+	if (!file)
+		return -1;
+	if (file->vmem_end == vmem_start) {
+		file->vmem_end = vmem_end;
+	} else {
+		file->vmem_start = vmem_start;
+		file->vmem_end = vmem_end;
+		set_vma_offset(file->dbg, vmem_start - pgoff);
+	}
+
+	return 0;
+}
+
+struct trace_debug_object *trace_debug_obj_create_pid(int pid)
+{
+	struct trace_debug_object *dbg;
+	unsigned int i;
+	int ret;
+
+	dbg = calloc(1, sizeof(*dbg));
+	if (!dbg)
+		return NULL;
+
+	dbg->pid = pid;
+	dbg->fname = get_full_name(pid);
+	if (!dbg->fname)
+		return NULL;
+	trace_debug_get_filemap(&dbg->fmaps, pid);
+
+	for (i = 0; i < dbg->fmaps->nr_lib_maps; i++) {
+		ret = trace_debug_obj_add_file(dbg, dbg->fmaps->lib_maps[i].lib_name,
+					       dbg->fmaps->lib_maps[i].start,
+					       dbg->fmaps->lib_maps[i].end, 0);
+		if (ret < 0)
+			break;
+	}
+	if (i < dbg->fmaps->nr_lib_maps)
+		goto error;
+
+	return dbg;
+
+error:
+	trace_debug_obj_destroy(dbg);
+	return NULL;
+}
+
+static char *get_lib_full_path(char *libname)
+{
+	void *h = dlmopen(LM_ID_NEWLM, libname, RTLD_LAZY);
+	char dldir[PATH_MAX+1];
+	char *fname = NULL;
+	int ret;
+
+	if (!h)
+		return NULL;
+	ret = dlinfo(h, RTLD_DI_ORIGIN, dldir);
+	dlclose(h);
+
+	if (!ret) {
+		ret = asprintf(&fname, "%s/%s", dldir, libname);
+		if (ret > 0)
+			return fname;
+	}
+
+	free(fname);
+	return NULL;
+
+
+}
+
+static int debug_obj_file_add_libs(struct trace_debug_object *dbg,
+				   struct trace_debug_file *file)
+{
+	char line[PATH_MAX];
+	char *libname;
+	char *trimmed;
+	char *fullname;
+	char *cmd = NULL;
+	FILE *fp = NULL;
+	int ret = -1;
+
+	if (asprintf(&cmd, "LD_TRACE_LOADED_OBJECTS=1 %s", file->file_name) < 0)
+		return -1;
+	fp = popen(cmd, "r");
+	if (!fp)
+		goto out;
+
+	while (fgets(line, sizeof(line), fp) != NULL) {
+		libname = strchr(line, ' ');
+		trimmed = line;
+		if (libname) {
+			*libname = '\0';
+			while (isspace(*trimmed))
+				trimmed++;
+			if (*trimmed != '/') {
+				fullname = get_lib_full_path(trimmed);
+				if (fullname) {
+					get_mapped_file(dbg, fullname, 0);
+					free(fullname);
+				}
+			} else {
+				get_mapped_file(dbg, trimmed, 0);
+			}
+		}
+	}
+
+out:
+	if (fp)
+		pclose(fp);
+	free(cmd);
+	return ret;
+}
+
+struct trace_debug_object *trace_debug_obj_create_file(char *fname, bool libs)
+{
+	struct trace_debug_object *dbg;
+	struct trace_debug_file *file;
+
+	dbg = calloc(1, sizeof(*dbg));
+	if (!dbg)
+		return NULL;
+
+	dbg->fname = strdup(fname);
+	file = get_mapped_file(dbg, fname, 0);
+	if (!file)
+		goto error;
+	if (libs)
+		debug_obj_file_add_libs(dbg, file);
+
+#ifdef DEBUG_INTERNALS
+	printf("Created debug object for %s:\n\r", dbg->fname);
+	file = dbg->files;
+	while (file) {
+		printf("\t%s\n\r", file->file_name);
+		file = file->next;
+	}
+#endif
+	return dbg;
+
+error:
+	trace_debug_obj_destroy(dbg);
+	return NULL;
+}
+
+static void set_unknown(struct debug_symbols *sym, char *file)
+{
+	while (sym) {
+		if (!sym->symbol.fname)
+			sym->symbol.fname = strdup(file);
+		sym = sym->next;
+	}
+}
+
+int trace_debug_resolve_symbols(struct trace_debug_object *obj)
+{
+	struct trace_debug_file *file;
+
+	for (file = obj->files; file; file = file->next) {
+		if (!file->dbg) {
+			set_unknown(file->sym, file->file_name);
+			continue;
+		}
+		/* near VMA -> name resolving */
+		resolve_symbol_name(file->dbg, file->sym);
+		/* name -> exact VMA resolving */
+		resolve_symbol_vma(file->dbg, file->sym);
+		resolve_symbol_vma(file->dbg, obj->sym);
+	}
+
+	return 0;
+}
+
+static int add_resolve_vma2name(struct trace_debug_object *obj,
+				unsigned long long vma)
+{
+	struct debug_symbols *s = NULL;
+	struct trace_debug_file *file;
+
+	file = obj->files;
+	while (file) {
+		if (vma >= file->vmem_start && vma <= file->vmem_end)
+			break;
+		file = file->next;
+	}
+	if (file) {
+		s = file->sym;
+		while (s) {
+			if (s->symbol.vma_near == vma)
+				break;
+			s = s->next;
+		}
+		if (!s) {
+			s = calloc(1, sizeof(*s));
+			if (!s)
+				return -1;
+			s->symbol.vma_near = vma;
+			s->symbol.fname = strdup(file->file_name);
+			s->next = file->sym;
+			file->sym = s;
+			file->sym_count++;
+		}
+	}
+
+	if (s)
+		return 0;
+	return -1;
+}
+
+static int add_resolve_name2vma(struct trace_debug_object *obj, char *name)
+{
+	struct debug_symbols *s = NULL;
+
+	s = obj->sym;
+	while (s) {
+		if (s->symbol.name && !strcmp(name, s->symbol.name))
+			break;
+		s = s->next;
+	}
+	if (!s) {
+		s = calloc(1, sizeof(*s));
+		if (!s)
+			return -1;
+		s->symbol.name = strdup(name);
+		if (!s->symbol.name)
+			goto error;
+		if (strchr(name, '*') || strchr(name, '?'))
+			s->match = MATH_WILDCARD;
+
+		s->next = obj->sym;
+		obj->sym = s;
+		obj->sym_count++;
+	}
+
+	return 0;
+
+error:
+	if (s) {
+		free(s->symbol.name);
+		free(s);
+	}
+	return -1;
+}
+
+int trace_debug_add_resolve_symbol(struct trace_debug_object *obj,
+				   unsigned long long vma, char *name)
+{
+	int ret = -1;
+
+	if (!obj)
+		return -1;
+
+	if (!name && vma) /* vma -> name resolving */
+		ret = add_resolve_vma2name(obj, vma);
+	else if (name) /* name -> vma resolving */
+		ret = add_resolve_name2vma(obj, name);
+	return ret;
+}
+
+static int walk_symbols(struct debug_symbols *sym,
+			int (*callback)(struct tracecmd_debug_symbols *, void *),
+			void *context)
+{
+	while (sym) {
+		if (callback(&sym->symbol, context))
+			return -1;
+		sym = sym->next;
+	}
+
+	return 0;
+}
+
+void trace_debug_walk_resolved_symbols(struct trace_debug_object *obj,
+				       int (*callback)(struct tracecmd_debug_symbols *, void *),
+				       void *context)
+{
+	struct trace_debug_file *file;
+
+	walk_symbols(obj->sym, callback, context);
+	file = obj->files;
+	while (file) {
+		walk_symbols(file->sym, callback, context);
+		file = file->next;
+	}
+}
+
+
+void trace_debug_free_symbols(struct tracecmd_debug_symbols *symbols, int count)
+{
+	int i;
+
+	if (!symbols)
+		return;
+
+	for (i = 0; i < count; i++) {
+		free(symbols[i].name);
+		free(symbols[i].fname);
+	}
+	free(symbols);
+
+}
+
+#define _STRINGIFY(x) #x
+#define STRINGIFY(x) _STRINGIFY(x)
+int trace_debug_get_filemap(struct pid_addr_maps **pid_maps, int pid)
+{
+	struct pid_addr_maps *maps = *pid_maps;
+	struct tracecmd_proc_addr_map *map;
+	unsigned long long begin, end;
+	struct pid_addr_maps *m;
+	char mapname[PATH_MAX+1];
+	char fname[PATH_MAX+1];
+	char buf[PATH_MAX+100];
+	unsigned int i;
+	FILE *f;
+	int ret;
+	int res;
+
+	sprintf(fname, "/proc/%d/exe", pid);
+	ret = readlink(fname, mapname, PATH_MAX);
+	if (ret >= PATH_MAX || ret < 0)
+		return -ENOENT;
+	mapname[ret] = 0;
+
+	sprintf(fname, "/proc/%d/maps", pid);
+	f = fopen(fname, "r");
+	if (!f)
+		return -ENOENT;
+
+	while (maps) {
+		if (pid == maps->pid)
+			break;
+		maps = maps->next;
+	}
+
+	ret = -ENOMEM;
+	if (!maps) {
+		maps = calloc(1, sizeof(*maps));
+		if (!maps)
+			goto out_fail;
+		maps->pid = pid;
+		maps->next = *pid_maps;
+		*pid_maps = maps;
+	} else {
+		for (i = 0; i < maps->nr_lib_maps; i++)
+			free(maps->lib_maps[i].lib_name);
+		free(maps->lib_maps);
+		maps->lib_maps = NULL;
+		maps->nr_lib_maps = 0;
+		free(maps->proc_name);
+	}
+
+	maps->proc_name = strdup(mapname);
+	if (!maps->proc_name)
+		goto out;
+
+	while (fgets(buf, sizeof(buf), f)) {
+		mapname[0] = '\0';
+		res = sscanf(buf, "%llx-%llx %*s %*x %*s %*d %"STRINGIFY(PATH_MAX)"s",
+			     &begin, &end, mapname);
+		if (res == 3 && mapname[0] != '\0') {
+			map = realloc(maps->lib_maps,
+				      (maps->nr_lib_maps + 1) * sizeof(*map));
+			if (!map)
+				goto out_fail;
+			map[maps->nr_lib_maps].end = end;
+			map[maps->nr_lib_maps].start = begin;
+			map[maps->nr_lib_maps].lib_name = strdup(mapname);
+			if (!map[maps->nr_lib_maps].lib_name)
+				goto out_fail;
+			maps->lib_maps = map;
+			maps->nr_lib_maps++;
+		}
+	}
+out:
+	fclose(f);
+	return 0;
+
+out_fail:
+	fclose(f);
+	if (maps) {
+		for (i = 0; i < maps->nr_lib_maps; i++)
+			free(maps->lib_maps[i].lib_name);
+		if (*pid_maps != maps) {
+			m = *pid_maps;
+			while (m) {
+				if (m->next == maps) {
+					m->next = maps->next;
+					break;
+				}
+				m = m->next;
+			}
+		} else
+			*pid_maps = maps->next;
+		free(maps->lib_maps);
+		maps->lib_maps = NULL;
+		maps->nr_lib_maps = 0;
+		free(maps->proc_name);
+		maps->proc_name = NULL;
+		free(maps);
+	}
+	return ret;
+}
+
+static void procmap_free(struct pid_addr_maps *maps)
+{
+	unsigned int i;
+
+	if (!maps)
+		return;
+	if (maps->lib_maps) {
+		for (i = 0; i < maps->nr_lib_maps; i++)
+			free(maps->lib_maps[i].lib_name);
+		free(maps->lib_maps);
+	}
+	free(maps->proc_name);
+	free(maps);
+}
+
+void trace_debug_free_filemap(struct pid_addr_maps *maps)
+{
+	struct pid_addr_maps *del;
+
+	while (maps) {
+		del = maps;
+		maps = maps->next;
+		procmap_free(del);
+	}
+}
diff --git a/src/trace-obj-debug.h b/src/trace-obj-debug.h
new file mode 100644
index 0000000..4040f93
--- /dev/null
+++ b/src/trace-obj-debug.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+/*
+ * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
+ */
+
+#ifndef _TC_TRACE_DEBUG_UTILS_
+#define _TC_TRACE_DEBUG_UTILS_
+
+/* --- Debug symbols--- */
+struct pid_addr_maps {
+	struct pid_addr_maps	*next;
+	struct tracecmd_proc_addr_map	*lib_maps;
+	unsigned int			nr_lib_maps;
+	char				*proc_name;
+	int				pid;
+};
+int trace_debug_get_filemap(struct pid_addr_maps **file_maps, int pid);
+void trace_debug_free_filemap(struct pid_addr_maps *maps);
+
+struct tracecmd_debug_symbols {
+	char *name;			/* symbol's name */
+	char *fname;			/* symbol's file */
+	unsigned long long vma_start;	/* symbol's start VMA */
+	unsigned long long vma_near;	/* symbol's requested VMA */
+	unsigned long long foffset;	/* symbol's offset in the binary file*/
+};
+
+struct tracecmd_proc_addr_map {
+	unsigned long long	start;
+	unsigned long long	end;
+	char			*lib_name;
+};
+
+struct trace_debug_object;
+struct trace_debug_object *trace_debug_obj_create_file(char *file, bool libs);
+struct trace_debug_object *trace_debug_obj_create_pid(int pid);
+void trace_debug_obj_destroy(struct trace_debug_object *debug);
+int trace_debug_obj_add_file(struct trace_debug_object *dbg, char *file_name,
+			     unsigned long long vmem_start,
+			     unsigned long long vmem_end,
+			     unsigned long long pgoff);
+
+int trace_debug_resolve_symbols(struct trace_debug_object *obj);
+int trace_debug_add_resolve_symbol(struct trace_debug_object *obj,
+				   unsigned long long vma, char *name);
+
+void trace_debug_walk_resolved_symbols(struct trace_debug_object *obj,
+				       int (*callback)(struct tracecmd_debug_symbols *, void *),
+				       void *context);
+
+#endif /* _TC_TRACE_DEBUG_UTILS_ */
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC PATCH 2/3] trace-cruncher: Support for perf
  2022-02-24 16:37 [RFC PATCH 0/3] trace-cruncher: Initial support for perf Tzvetomir Stoyanov (VMware)
  2022-02-24 16:37 ` [RFC PATCH 1/3] trace-cruncher: Logic for resolving address to function name Tzvetomir Stoyanov (VMware)
@ 2022-02-24 16:37 ` Tzvetomir Stoyanov (VMware)
  2022-02-25 14:51   ` Arnaldo Carvalho de Melo
  2022-02-24 16:37 ` [RFC PATCH 3/3] trace-cruncher: perf example Tzvetomir Stoyanov (VMware)
  2022-02-24 16:52 ` [RFC PATCH 0/3] trace-cruncher: Initial support for perf Ian Rogers
  3 siblings, 1 reply; 8+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2022-02-24 16:37 UTC (permalink / raw)
  To: y.karadz, acme, olsajiri, irogers
  Cc: rostedt, linux-trace-devel, linux-perf-users

Initial perf support for trace-cruncher, using libperf. As a first
stage, collecting of stack trace samples of given process is supported.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 setup.py           |   9 +-
 src/perfpy-utils.c | 699 +++++++++++++++++++++++++++++++++++++++++++++
 src/perfpy-utils.h |  41 +++
 src/perfpy.c       | 141 +++++++++
 4 files changed, 889 insertions(+), 1 deletion(-)
 create mode 100644 src/perfpy-utils.c
 create mode 100644 src/perfpy-utils.h
 create mode 100644 src/perfpy.c

diff --git a/setup.py b/setup.py
index 4d7e727..31ca2e3 100644
--- a/setup.py
+++ b/setup.py
@@ -19,18 +19,21 @@ def third_party_paths():
     pkg_ftracepy = pkg.parse('libtracefs')
     pkg_tracecmd = pkg.parse('libtracecmd')
     pkg_kshark = pkg.parse('libkshark')
+    pkg_perf = pkg.parse('libperf')
 
     include_dirs = [np.get_include()]
     include_dirs.extend(pkg_traceevent['include_dirs'])
     include_dirs.extend(pkg_ftracepy['include_dirs'])
     include_dirs.extend(pkg_tracecmd['include_dirs'])
     include_dirs.extend(pkg_kshark['include_dirs'])
+    include_dirs.extend(pkg_perf['include_dirs'])
 
     library_dirs = []
     library_dirs.extend(pkg_traceevent['library_dirs'])
     library_dirs.extend(pkg_ftracepy['library_dirs'])
     library_dirs.extend(pkg_tracecmd['library_dirs'])
     library_dirs.extend(pkg_kshark['library_dirs'])
+    library_dirs.extend(pkg_perf['library_dirs'])
     library_dirs = list(set(library_dirs))
 
     return include_dirs, library_dirs
@@ -61,6 +64,10 @@ def main():
                           sources=['src/ksharkpy.c', 'src/ksharkpy-utils.c'],
                           libraries=['kshark'])
 
+    module_perf = extension(name='tracecruncher.perfpy',
+                            sources=['src/perfpy.c', 'src/perfpy-utils.c', 'src/trace-obj-debug.c'],
+                            libraries=['traceevent', 'perf', 'bfd'])
+
     setup(name='tracecruncher',
           version='0.1.0',
           description='NumPy based interface for accessing tracing data in Python.',
@@ -69,7 +76,7 @@ def main():
           url='https://github.com/vmware/trace-cruncher',
           license='LGPL-2.1',
           packages=find_packages(),
-          ext_modules=[module_ft, module_data, module_ks],
+          ext_modules=[module_ft, module_data, module_ks, module_perf],
           classifiers=[
               'Development Status :: 3 - Alpha',
               'Programming Language :: Python :: 3',
diff --git a/src/perfpy-utils.c b/src/perfpy-utils.c
new file mode 100644
index 0000000..ae57a63
--- /dev/null
+++ b/src/perfpy-utils.c
@@ -0,0 +1,699 @@
+// SPDX-License-Identifier: LGPL-2.1
+
+/*
+ * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
+ */
+
+#ifndef _GNU_SOURCE
+/** Use GNU C Library. */
+#define _GNU_SOURCE
+#endif // _GNU_SOURCE
+
+// C
+#include <stdio.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/resource.h>
+
+// libperf
+#include <linux/perf_event.h>
+#include <perf/evlist.h>
+#include <perf/evsel.h>
+#include <perf/cpumap.h>
+#include <perf/threadmap.h>
+#include <perf/mmap.h>
+#include <perf/core.h>
+#include <perf/event.h>
+
+// trace-cruncher
+#include "perfpy-utils.h"
+#include "trace-obj-debug.h"
+
+PyObject *PERF_ERROR;
+
+#define TMP_FILE "/tmp/perf_temp_data.XXXXXX"
+
+struct perf_scan_thread {
+	uint32_t tid;
+	char *comm;
+	struct perf_counts_values count;
+};
+
+struct perf_handle {
+	bool running;
+	pthread_t reader;
+	int fd;
+	int thr_count;
+	uint32_t pid;
+	struct perf_scan_thread *thr_map;
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr;
+	struct perf_cpu_map *cpus;
+	struct trace_debug_object *debug;
+	struct perf_thread_map *threads;
+};
+
+struct event_sample {
+	uint64_t		id;		/* PERF_SAMPLE_IDENTIFIER */
+	uint64_t		ip;		/* PERF_SAMPLE_IP */
+	uint32_t		pid, tid;	/* PERF_SAMPLE_TID */
+	uint64_t		time;		/* PERF_SAMPLE_TIME */
+	uint32_t		cpu, res;	/* PERF_SAMPLE_CPU */
+	uint64_t		nr;
+	uint64_t		ips[];		/* PERF_SAMPLE_CALLCHAIN */
+} __attribute__((packed));
+
+struct perf_event_sample {
+	struct event_sample	data;
+	struct perf_handle	*perf;
+	char			*ip;
+	char			*ips[];
+} __attribute__((packed));
+
+static void perf_reader_stop(struct perf_handle *perf)
+{
+
+	if (!perf->running)
+		return;
+
+	perf->running = false;
+	pthread_join(perf->reader, NULL);
+	fsync(perf->fd);
+	if (perf->debug)
+		trace_debug_resolve_symbols(perf->debug);
+}
+
+void py_perf_handle_free(struct perf_handle *perf)
+{
+	int i;
+
+	if (perf) {
+		perf_reader_stop(perf);
+		perf_evlist__delete(perf->evlist);
+		if (perf->fd >= 0)
+			close(perf->fd);
+		if (perf->debug)
+			trace_debug_obj_destroy(perf->debug);
+		if (perf->thr_map) {
+			for (i = 0; i < perf->thr_count; i++)
+				free(perf->thr_map[i].comm);
+			free(perf->thr_map);
+		}
+	}
+
+	free(perf);
+}
+
+void py_perf_sample_free(struct perf_event_sample *sample)
+{
+	unsigned int i;
+
+	if (sample) {
+		free(sample->ip);
+		for (i = 0; i < sample->data.nr; i++)
+			free((char *)(sample->ips[i]));
+	}
+	free(sample);
+}
+
+static int pid_filter(const struct dirent *dir)
+{
+	const char *dname = dir->d_name;
+
+	if (!dname || dname[0] == '.')
+		return 0;
+
+	while (*dname) {
+		if (!isdigit(*dname))
+			return 0;
+		dname++;
+	}
+
+	return 1;
+}
+
+static  int str_read_file(const char *file, char **buffer)
+{
+	char stbuf[BUFSIZ];
+	char *buf = NULL;
+	int size = 0;
+	char *nbuf;
+	int fd;
+	int r;
+
+	fd = open(file, O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	do {
+		r = read(fd, stbuf, BUFSIZ);
+		if (r <= 0)
+			continue;
+		nbuf = realloc(buf, size+r+1);
+		if (!nbuf) {
+			size = -1;
+			break;
+		}
+		buf = nbuf;
+		memcpy(buf+size, stbuf, r);
+		size += r;
+	} while (r > 0);
+
+	close(fd);
+	if (r == 0 && size > 0) {
+		buf[size] = '\0';
+		*buffer = buf;
+	} else
+		free(buf);
+
+	return size;
+}
+
+static void strip_control_chars(char *str)
+{
+	while (*str) {
+		if (iscntrl(*str)) {
+			*str = '\0';
+			break;
+		}
+		str++;
+	}
+}
+
+static struct perf_thread_map *create_thread_map(struct perf_handle *perf, int pid)
+{
+	struct perf_thread_map *tmap = NULL;
+	struct dirent **pids = NULL;
+	char path[PATH_MAX];
+	int i, count;
+
+	snprintf(path, PATH_MAX, "/proc/%d/task", pid);
+	count = scandir(path, &pids, pid_filter, NULL);
+	if (count < 1)
+		goto out;
+
+	tmap = perf_thread_map__new_array(count, NULL);
+	if (!tmap)
+		goto out;
+	free(perf->thr_map);
+	perf->thr_map = calloc(count, sizeof(struct perf_scan_thread));
+	if (!perf->thr_map)
+		goto out;
+	perf->thr_count = count;
+
+	for (i = 0; i < count; i++) {
+		perf->thr_map[i].tid = atoi(pids[i]->d_name);
+		perf_thread_map__set_pid(tmap, i, perf->thr_map[i].tid);
+		snprintf(path, PATH_MAX, "/proc/%d/task/%s/comm", pid, pids[i]->d_name);
+		str_read_file(path, &perf->thr_map[i].comm);
+		strip_control_chars(perf->thr_map[i].comm);
+	}
+	perf_thread_map__read_comms(tmap);
+
+out:
+	if (pids) {
+		for (i = 0; i < count; i++)
+			free(pids[i]);
+		free(pids);
+	}
+
+	return tmap;
+}
+
+static struct perf_handle *new_perf_sampling_handle(pid_t pid, int freq)
+{
+	struct perf_handle *perf = NULL;
+	char *tmp_file = NULL;
+
+	perf = calloc(1, sizeof(*perf));
+	if (!perf)
+		return NULL;
+
+	perf->fd = -1;
+	perf->attr.type        = PERF_TYPE_HARDWARE;
+	perf->attr.config      = PERF_COUNT_HW_CPU_CYCLES;
+	perf->attr.disabled    = 1;
+	perf->attr.freq        = 1;
+	perf->attr.sample_freq = freq;
+	perf->attr.exclude_kernel = 1;
+	perf->attr.exclude_idle = 1;
+	perf->attr.exclude_callchain_kernel = 1;
+	perf->attr.comm = 1;
+	perf->attr.mmap2 = 1;
+	perf->attr.task = 1;
+	perf->attr.precise_ip = 0;
+	perf->attr.inherit = 1;
+	perf->attr.task = 1;
+	perf->attr.inherit_stat = 1;
+	perf->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+				 PERF_FORMAT_TOTAL_TIME_RUNNING;
+	perf->attr.sample_type = PERF_SAMPLE_IDENTIFIER |
+				 PERF_SAMPLE_IP |
+				 PERF_SAMPLE_TID |
+				 PERF_SAMPLE_TIME |
+				 PERF_SAMPLE_CPU |
+				 PERF_SAMPLE_CALLCHAIN;
+
+	/* trace all CPUs in the system */
+	perf->cpus = perf_cpu_map__new(NULL);
+	if (!perf->cpus) {
+		PyErr_Format(PERF_ERROR, "Failed to create perf cpu map");
+		goto error;
+	}
+
+	if (pid >= 0) {
+		perf->pid = pid;
+		perf->threads = create_thread_map(perf, pid);
+		if (!perf->threads) {
+			PyErr_Format(PERF_ERROR, "Failed to create perf threads map");
+			goto error;
+		}
+		perf->debug = trace_debug_obj_create_pid(pid);
+	}
+
+	perf->evlist = perf_evlist__new();
+	if (!perf->evlist) {
+		PyErr_Format(PERF_ERROR, "Failed to create perf events list");
+		goto error;
+	}
+
+	tmp_file = strdup(TMP_FILE);
+	if (!tmp_file)
+		goto error;
+
+	mktemp(tmp_file);
+	perf->fd = open(tmp_file, O_RDWR|O_CREAT|O_TRUNC, 0600);
+	unlink(tmp_file);
+	if (perf->fd < 0)
+		goto error;
+
+	perf->evsel = perf_evsel__new(&perf->attr);
+	if (!perf->evsel) {
+		PyErr_Format(PERF_ERROR, "Failed to create perf cycles");
+		goto error;
+	}
+
+	perf_evlist__add(perf->evlist, perf->evsel);
+	perf_evlist__set_maps(perf->evlist, perf->cpus, perf->threads);
+
+	free(tmp_file);
+	return perf;
+
+error:
+	perf_cpu_map__put(perf->cpus);
+	perf_thread_map__put(perf->threads);
+	py_perf_handle_free(perf);
+	free(tmp_file);
+	return NULL;
+}
+
+PyObject *PyPerfSample_new(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+	static char *kwlist[] = {"pid", "freq", NULL};
+	struct perf_handle *perf = NULL;
+	int freq = 10, pid = -1;
+
+	if (!PyArg_ParseTupleAndKeywords(args,
+					 kwargs,
+					 "i|i",
+					 kwlist,
+					 &pid,
+					 &freq
+					 )) {
+		return NULL;
+	}
+
+	perf = new_perf_sampling_handle(pid, freq);
+	if (!perf)
+		return NULL;
+
+	return PyPerf_New(perf);
+}
+
+static void perf_read_sample(struct perf_handle *perf, struct perf_record_sample *event)
+{
+	struct event_sample *sample;
+	uint64_t i;
+
+	sample = (struct event_sample *)(event->array);
+
+	/* check if the sample is for our PID */
+	if (sample->pid != perf->pid)
+		return;
+
+	if (perf->debug)
+		trace_debug_add_resolve_symbol(perf->debug, sample->ip, NULL);
+
+	if (write(perf->fd, sample, sizeof(*sample)) != sizeof(*sample))
+		return;
+
+	for (i = 0; i < sample->nr; i++) {
+		if (write(perf->fd, &sample->ips[i], sizeof(uint64_t)) != sizeof(uint64_t))
+			return;
+		if (perf->debug)
+			trace_debug_add_resolve_symbol(perf->debug, sample->ips[i], NULL);
+	}
+}
+
+/* A new memory is mapped to traced process */
+static void perf_read_mmap2(struct perf_handle *perf, struct perf_record_mmap2 *mmap)
+{
+	/* check if mmap is for our PID */
+	if (perf->pid != mmap->pid)
+		return;
+
+	/* check if executable memory is mapped */
+	if (mmap->header.misc & PERF_RECORD_MISC_MMAP_DATA)
+		return;
+
+	/*
+	 * A new dynamic library is dlopen() by the traced process,
+	 * store it for vma -> name resolving
+	 */
+	trace_debug_obj_add_file(perf->debug, mmap->filename,
+				 mmap->start, mmap->start + mmap->len, mmap->pgoff);
+}
+
+/* A new thread is started */
+static void perf_read_comm(struct perf_handle *perf, struct perf_record_comm *comm)
+{
+	struct perf_scan_thread *tmp;
+	int i;
+
+	/* check if the thread is started by PID */
+	if (perf->pid != comm->pid)
+		return;
+
+	for (i = 0; i < perf->thr_count; i++) {
+		if (perf->thr_map[i].tid == comm->tid) {
+			free(perf->thr_map[i].comm);
+			perf->thr_map[i].comm = strdup(comm->comm);
+			return;
+		}
+	}
+
+	tmp = realloc(perf->thr_map, (perf->thr_count + 1) * sizeof(struct perf_scan_thread));
+	if (!tmp)
+		return;
+
+	perf->thr_map = tmp;
+	perf->thr_map[perf->thr_count].tid = comm->tid;
+	perf->thr_map[perf->thr_count].comm = strdup(comm->comm);
+	perf->thr_count++;
+}
+
+static void *perf_reader_thread(void *data)
+{
+	struct perf_handle *perf = data;
+	struct perf_mmap *map;
+	union perf_event *event;
+
+	perf_evlist__enable(perf->evlist);
+
+	while (true) {
+		if (!perf->running)
+			break;
+		perf_evlist__for_each_mmap(perf->evlist, map, false) {
+			if (perf_mmap__read_init(map) < 0)
+				continue;
+
+			while ((event = perf_mmap__read_event(map)) != NULL) {
+
+				switch (event->sample.header.type) {
+				case PERF_RECORD_SAMPLE:
+					perf_read_sample(perf, (struct perf_record_sample *)event);
+					break;
+				case PERF_RECORD_COMM:
+					perf_read_comm(perf, (struct perf_record_comm *)event);
+					break;
+				case PERF_RECORD_MMAP2:
+					perf_read_mmap2(perf, (struct perf_record_mmap2 *)event);
+					break;
+				}
+
+				perf_mmap__consume(map);
+			}
+
+			perf_mmap__read_done(map);
+		}
+	}
+	perf_evlist__disable(perf->evlist);
+	pthread_exit(0);
+}
+
+static int increase_file_limit(void)
+{
+	struct rlimit lim;
+
+	if (getrlimit(RLIMIT_NOFILE, &lim))
+		return -1;
+
+	if (lim.rlim_cur < lim.rlim_max) {
+		lim.rlim_cur = lim.rlim_max;
+	} else {
+		lim.rlim_cur += 100;
+		lim.rlim_max += 100;
+	}
+
+	return setrlimit(RLIMIT_NOFILE, &lim);
+}
+
+static int perf_reader_start(struct perf_handle *perf)
+{
+	pthread_attr_t attrib;
+	int err = 0;
+
+	if (perf->running)
+		return 0;
+
+	pthread_attr_init(&attrib);
+	pthread_attr_setdetachstate(&attrib, PTHREAD_CREATE_JOINABLE);
+
+	do {
+		err = perf_evlist__open(perf->evlist);
+		if (!err)
+			break;
+		if (err != -EMFILE)
+			goto out;
+		if (increase_file_limit())
+			goto out;
+	} while (err);
+
+	err = perf_evlist__mmap(perf->evlist, 4);
+	if (err)
+		goto out;
+
+	perf->running = true;
+	err = pthread_create(&perf->reader, &attrib, perf_reader_thread, perf);
+	if (err)
+		goto out;
+
+out:
+	pthread_attr_destroy(&attrib);
+	if (err) {
+		perf_evlist__close(perf->evlist);
+		perf->running = false;
+	}
+	return err;
+}
+
+PyObject *PyPerf_start(PyPerf *self)
+{
+	struct perf_handle *perf = self->ptrObj;
+	int err;
+
+	if (perf->running) {
+		PyErr_Format(PERF_ERROR, "Perf reader is already started");
+		return NULL;
+	}
+
+	err = perf_reader_start(perf);
+	if (err) {
+		PyErr_Format(PERF_ERROR,
+			     "Failed to start perf reader - %s", strerror(-err));
+		return NULL;
+	}
+
+	Py_RETURN_NONE;
+}
+
+PyObject *PyPerf_stop(PyPerf *self)
+{
+	struct perf_handle *perf = self->ptrObj;
+
+	if (!perf->running) {
+		PyErr_Format(PERF_ERROR, "Perf reader is not started");
+		return NULL;
+	}
+
+	perf_reader_stop(perf);
+
+	Py_RETURN_NONE;
+}
+
+struct symb_walk {
+	uint64_t ip;
+	char *name;
+};
+
+static int sym_get(struct tracecmd_debug_symbols *symb, void *data)
+{
+	struct symb_walk *s = (struct symb_walk *)data;
+
+	if (s->ip == symb->vma_near) {
+		if (symb->name)
+			asprintf(&s->name, "%s @ %s", symb->name, symb->fname);
+		else
+			asprintf(&s->name, "(0x%llX) @ %s", symb->vma_near, symb->fname);
+		return 1;
+	}
+	return 0;
+}
+
+static char *ip_name(struct perf_handle *perf, uint64_t ip)
+{
+	struct symb_walk symb;
+
+	symb.ip = ip;
+	symb.name = NULL;
+	if (perf && perf->debug)
+		trace_debug_walk_resolved_symbols(perf->debug, sym_get, &symb);
+
+	if (!symb.name)
+		asprintf(&symb.name, "0x%lX", ip);
+
+	return symb.name;
+}
+
+PyObject *PyPerf_getSamples(PyPerf *self)
+{
+	struct perf_handle *perf = self->ptrObj;
+	struct event_sample sample;
+	struct perf_event_sample *store;
+	PyObject *slist, *sobject;
+	uint64_t i, ip;
+	int ca = 0, cs = 0;
+
+	if (perf->running) {
+		PyErr_Format(PERF_ERROR, "Perf reader is running");
+		return NULL;
+	}
+
+	if (lseek(perf->fd, 0, SEEK_SET) == (off_t)-1) {
+		PyErr_Format(PERF_ERROR, "No samples");
+		return NULL;
+	}
+
+	slist = PyList_New(0);
+	do {
+		if (read(perf->fd, &sample, sizeof(sample)) != sizeof(sample))
+			break;
+		ca++;
+		store = calloc(1, sizeof(*store) + (sample.nr * sizeof(char *)));
+		if (!store)
+			break;
+		memcpy(&store->data, &sample, sizeof(sample));
+		store->perf = perf;
+		store->ip = ip_name(perf, store->data.ip);
+		for (i = 0; i < sample.nr; i++) {
+			if (read(perf->fd, &ip, sizeof(uint64_t)) != sizeof(uint64_t))
+				break;
+			store->ips[i] = ip_name(perf, ip);
+		}
+		cs += sample.nr;
+		if (i < sample.nr)
+			break;
+		sobject = PyPerfEventSample_New(store);
+		PyList_Append(slist, sobject);
+	} while (true);
+	ftruncate(perf->fd, 0);
+	return slist;
+}
+
+PyObject *PyPerfSample_id(PyPerfEventSample *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+
+	return PyLong_FromUnsignedLongLong(sample->data.id);
+}
+
+PyObject *PyPerfSample_pid(PyPerfEventSample *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+
+	return PyLong_FromUnsignedLong(sample->data.pid);
+}
+
+PyObject *PyPerfSample_tid(PyPerfEventSample *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+
+	return PyLong_FromUnsignedLong(sample->data.tid);
+}
+
+PyObject *PyPerfSample_time(PyPerfEventSample *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+
+	return PyLong_FromUnsignedLongLong(sample->data.time);
+}
+
+PyObject *PyPerfSample_cpu(PyPerfEventSample *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+
+	return PyLong_FromUnsignedLong(sample->data.cpu);
+}
+
+PyObject *PyPerfSample_nr(PyPerfEventSample *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+
+	return PyLong_FromUnsignedLongLong(sample->data.nr);
+}
+
+PyObject *PyPerfSample_ip(PyPerfEventSample *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+
+	return PyUnicode_FromString(sample->ip);
+}
+
+PyObject *PyPerfSample_tid_comm(PyPerfEventSample *self, PyObject *args, PyObject *kwargs)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+	char *name = NULL;
+	int i;
+
+	if (!sample->perf || !sample->perf->thr_count || !sample->perf->thr_map)
+		Py_RETURN_NONE;
+
+	for (i = 0; i < sample->perf->thr_count; i++)
+		if (sample->perf->thr_map[i].tid == sample->data.tid)
+			break;
+
+	if (i < sample->perf->thr_count && sample->perf->thr_map[i].comm)
+		name = sample->perf->thr_map[i].comm;
+
+	if (name)
+		return PyUnicode_FromString(name);
+
+	Py_RETURN_NONE;
+}
+
+PyObject *PyPerfSample_ips(PyPerfEventSample *self)
+{
+	struct perf_event_sample *sample = self->ptrObj;
+	PyObject *slist;
+	unsigned int i;
+
+	slist = PyList_New(0);
+	for (i = 0 ; i < sample->data.nr; i++)
+		PyList_Append(slist, PyUnicode_FromString((char *)sample->ips[i]));
+
+	return slist;
+}
diff --git a/src/perfpy-utils.h b/src/perfpy-utils.h
new file mode 100644
index 0000000..0727a9a
--- /dev/null
+++ b/src/perfpy-utils.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+/*
+ * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
+ */
+
+#ifndef _TC_PERF_PY_UTILS
+#define _TC_PERF_PY_UTILS
+
+// Python
+#include <Python.h>
+
+// trace-cruncher
+#include "common.h"
+
+struct perf_handle;
+struct perf_event_sample;
+
+C_OBJECT_WRAPPER_DECLARE(perf_handle, PyPerf);
+C_OBJECT_WRAPPER_DECLARE(perf_event_sample, PyPerfEventSample);
+
+PyObject *PyPerfSample_new(PyObject *self, PyObject *args, PyObject *kwargs);
+
+PyObject *PyPerf_start(PyPerf *self);
+PyObject *PyPerf_stop(PyPerf *self);
+PyObject *PyPerf_getSamples(PyPerf *self);
+
+PyObject *PyPerfSample_id(PyPerfEventSample *self);
+PyObject *PyPerfSample_ip(PyPerfEventSample *self);
+PyObject *PyPerfSample_pid(PyPerfEventSample *self);
+PyObject *PyPerfSample_tid(PyPerfEventSample *self);
+PyObject *PyPerfSample_tid_comm(PyPerfEventSample *self, PyObject *args, PyObject *kwargs);
+PyObject *PyPerfSample_time(PyPerfEventSample *self);
+PyObject *PyPerfSample_cpu(PyPerfEventSample *self);
+PyObject *PyPerfSample_nr(PyPerfEventSample *self);
+PyObject *PyPerfSample_ips(PyPerfEventSample *self);
+
+void py_perf_handle_free(struct perf_handle *handle);
+void py_perf_sample_free(struct perf_event_sample *sample);
+
+#endif
diff --git a/src/perfpy.c b/src/perfpy.c
new file mode 100644
index 0000000..745d519
--- /dev/null
+++ b/src/perfpy.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: LGPL-2.1
+
+/*
+ * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
+ */
+
+// Python
+#include <Python.h>
+
+// libperf
+#include <perf/core.h>
+#include <perf/evsel.h>
+#include <perf/mmap.h>
+#include <perf/event.h>
+
+// trace-cruncher
+#include "common.h"
+#include "perfpy-utils.h"
+
+extern PyObject *PERF_ERROR;
+
+static PyMethodDef PyPerf_methods[] = {
+	{"start",
+	 (PyCFunction) PyPerf_start,
+	 METH_NOARGS,
+	 "start sampling"
+	},
+	{"stop",
+	 (PyCFunction) PyPerf_stop,
+	 METH_NOARGS,
+	 "stop sampling"
+	},
+	{"get_samples",
+	 (PyCFunction) PyPerf_getSamples,
+	 METH_NOARGS,
+	 "get recorded samples"
+	},
+	{NULL}
+};
+C_OBJECT_WRAPPER(perf_handle, PyPerf, NO_DESTROY, py_perf_handle_free);
+
+static PyMethodDef PyPerfEventSample_methods[] = {
+	{"id",
+	 (PyCFunction) PyPerfSample_id,
+	 METH_NOARGS,
+	 "get sample id"
+	},
+	{"ip",
+	 (PyCFunction) PyPerfSample_ip,
+	 METH_NOARGS,
+	 "get sample ip"
+	},
+	{"pid",
+	 (PyCFunction) PyPerfSample_pid,
+	 METH_NOARGS,
+	 "get sample pid"
+	},
+	{"tid",
+	 (PyCFunction) PyPerfSample_tid,
+	 METH_NOARGS,
+	 "get sample tid"
+	},
+	{"tid_comm",
+	 (PyCFunction) PyPerfSample_tid_comm,
+	 METH_VARARGS | METH_KEYWORDS,
+	 "get sample tid"
+	},
+	{"time",
+	 (PyCFunction) PyPerfSample_time,
+	 METH_NOARGS,
+	 "get sample timestamp"
+	},
+	{"cpu",
+	 (PyCFunction) PyPerfSample_cpu,
+	 METH_NOARGS,
+	 "get sample cpu"
+	},
+	{"stack_count",
+	 (PyCFunction) PyPerfSample_nr,
+	 METH_NOARGS,
+	 "get sample stack count"
+	},
+	{"stack",
+	 (PyCFunction) PyPerfSample_ips,
+	 METH_NOARGS,
+	 "get sample stack"
+	},
+	{NULL}
+};
+C_OBJECT_WRAPPER(perf_event_sample, PyPerfEventSample, NO_DESTROY, py_perf_sample_free);
+
+static PyMethodDef perfpy_methods[] = {
+	{"sample",
+	 (PyCFunction) PyPerfSample_new,
+	 METH_VARARGS | METH_KEYWORDS,
+	 "new perf sample instance"
+	},
+	{NULL}
+};
+
+static int perf_error_print(enum libperf_print_level level,
+			    const char *fmt, va_list ap)
+{
+	return vfprintf(stderr, fmt, ap);
+}
+
+static struct PyModuleDef perfpy_module = {
+	PyModuleDef_HEAD_INIT,
+	"perfpy",
+	"Python interface for Perf.",
+	-1,
+	perfpy_methods
+};
+
+PyMODINIT_FUNC PyInit_perfpy(void)
+{
+
+	if (!PyPerfTypeInit())
+		return NULL;
+	if (!PyPerfEventSampleTypeInit())
+		return NULL;
+
+	PERF_ERROR = PyErr_NewException("tracecruncher.perfpy.perf_error",
+					NULL, NULL);
+
+	PyObject *module = PyModule_Create(&perfpy_module);
+
+	PyModule_AddObject(module, "perf_error", PERF_ERROR);
+	PyModule_AddObject(module, "perf_handle", (PyObject *) &PyPerfType);
+	PyModule_AddObject(module, "perf_event_sample", (PyObject *) &PyPerfEventSampleType);
+
+	if (geteuid() != 0) {
+		PyErr_SetString(PERF_ERROR,
+				"Permission denied. Root privileges are required.");
+		return NULL;
+	}
+
+	libperf_init(perf_error_print);
+
+	return module;
+}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC PATCH 3/3] trace-cruncher: perf example
  2022-02-24 16:37 [RFC PATCH 0/3] trace-cruncher: Initial support for perf Tzvetomir Stoyanov (VMware)
  2022-02-24 16:37 ` [RFC PATCH 1/3] trace-cruncher: Logic for resolving address to function name Tzvetomir Stoyanov (VMware)
  2022-02-24 16:37 ` [RFC PATCH 2/3] trace-cruncher: Support for perf Tzvetomir Stoyanov (VMware)
@ 2022-02-24 16:37 ` Tzvetomir Stoyanov (VMware)
  2022-03-18  9:52   ` Yordan Karadzhov
  2022-02-24 16:52 ` [RFC PATCH 0/3] trace-cruncher: Initial support for perf Ian Rogers
  3 siblings, 1 reply; 8+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2022-02-24 16:37 UTC (permalink / raw)
  To: y.karadz, acme, olsajiri, irogers
  Cc: rostedt, linux-trace-devel, linux-perf-users

Example python program for using trace-cruncher to collect performance
statistics of a given process.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 examples/perf_sampling.py | 51 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100755 examples/perf_sampling.py

diff --git a/examples/perf_sampling.py b/examples/perf_sampling.py
new file mode 100755
index 0000000..1b57f39
--- /dev/null
+++ b/examples/perf_sampling.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+"""
+SPDX-License-Identifier: CC-BY-4.0
+
+Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
+"""
+
+import sys
+import time
+import signal
+
+import tracecruncher.perfpy as perf
+
+def SortKey(sample):
+    return sample.time()
+
+def perf_stop(sig, frame):
+    # Stop collection of performance traces
+    p.stop()
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print('Usage: ', sys.argv[0], ' [PROCESS]')
+        sys.exit(1)
+
+    # Create perf sample object for the given process
+    p = perf.sample(pid=int(sys.argv[1]), freq=99)
+    signal.signal(signal.SIGINT, perf_stop)
+    print('Start collecting performance data, press ctrl+c  to stop')
+    # Start collecting performance traces
+    p.start()
+    # wait for ctrl+c
+    signal.pause()
+    # Get collected samples
+    samples = p.get_samples()
+    # Sort the list based on the timestamp
+    samples.sort(key=SortKey)
+    time = 0
+    ip_count = 0
+    for s in samples:
+        # Print PID, TID, time and trace depth of each sample
+        if time == 0:
+            time = s.time()
+        print("{0} {1} ({2}), +{3}:".format(s.ip(), s.tid(), s.tid_comm(), s.time() - time))
+        ips = s.stack()
+        ip_count += len(ips)
+        for ip in reversed(ips):
+            # Print stack trace of the sample
+            print("\t{0}".format(ip))
+    print("\nCollected {0} samples, {1} ip traces".format(len(samples), ip_count))
\ No newline at end of file
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 0/3] trace-cruncher: Initial support for perf
  2022-02-24 16:37 [RFC PATCH 0/3] trace-cruncher: Initial support for perf Tzvetomir Stoyanov (VMware)
                   ` (2 preceding siblings ...)
  2022-02-24 16:37 ` [RFC PATCH 3/3] trace-cruncher: perf example Tzvetomir Stoyanov (VMware)
@ 2022-02-24 16:52 ` Ian Rogers
  3 siblings, 0 replies; 8+ messages in thread
From: Ian Rogers @ 2022-02-24 16:52 UTC (permalink / raw)
  To: Tzvetomir Stoyanov (VMware)
  Cc: y.karadz, acme, olsajiri, rostedt, linux-trace-devel,
	linux-perf-users

On Thu, Feb 24, 2022 at 8:37 AM Tzvetomir Stoyanov (VMware)
<tz.stoyanov@gmail.com> wrote:
>
> Two major functionalities are introduced by this patch set:
>  - VMA <-> function name resolving, using bfd library.

Just wanted to point out that perf is often not built against libbfd:
Please build perf against libbfd -
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=911815

where long running addr2line is now the best performance solution:
https://lore.kernel.org/linux-perf-users/20210909112202.1947499-1-tonyg@leastfixedpoint.com/

The comment from the bug:
perf can link against libbfd if available, but the result is
undistributable as they are licenced under GPL v2 and v3+
respectively.

Thanks,
Ian

>  - Support for Linux kernel perf framework, using perf library.
>
> This is still a work in progress. Depends on this patch, not yet merged:
> https://lore.kernel.org/linux-perf-users/20220221102628.43904-1-tz.stoyanov@gmail.com/
>
> Tzvetomir Stoyanov (VMware) (3):
>   trace-cruncher: Logic for resolving address to function name
>   trace-cruncher: Support for perf
>   trace-cruncher: perf example
>
>  examples/perf_sampling.py |  51 +++
>  setup.py                  |   9 +-
>  src/perfpy-utils.c        | 699 ++++++++++++++++++++++++++++++
>  src/perfpy-utils.h        |  41 ++
>  src/perfpy.c              | 141 ++++++
>  src/trace-obj-debug.c     | 873 ++++++++++++++++++++++++++++++++++++++
>  src/trace-obj-debug.h     |  52 +++
>  7 files changed, 1865 insertions(+), 1 deletion(-)
>  create mode 100755 examples/perf_sampling.py
>  create mode 100644 src/perfpy-utils.c
>  create mode 100644 src/perfpy-utils.h
>  create mode 100644 src/perfpy.c
>  create mode 100644 src/trace-obj-debug.c
>  create mode 100644 src/trace-obj-debug.h
>
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 2/3] trace-cruncher: Support for perf
  2022-02-24 16:37 ` [RFC PATCH 2/3] trace-cruncher: Support for perf Tzvetomir Stoyanov (VMware)
@ 2022-02-25 14:51   ` Arnaldo Carvalho de Melo
  2022-02-25 15:38     ` Tzvetomir Stoyanov
  0 siblings, 1 reply; 8+ messages in thread
From: Arnaldo Carvalho de Melo @ 2022-02-25 14:51 UTC (permalink / raw)
  To: Tzvetomir Stoyanov (VMware)
  Cc: y.karadz, olsajiri, irogers, rostedt, linux-trace-devel,
	linux-perf-users

Em Thu, Feb 24, 2022 at 06:37:10PM +0200, Tzvetomir Stoyanov (VMware) escreveu:
> Initial perf support for trace-cruncher, using libperf. As a first
> stage, collecting of stack trace samples of given process is supported.

Are you aware of tools/perf/util/python.c? It was done for use in tuna:

https://git.kernel.org/pub/scm/utils/tuna/tuna.git/

I think it is already used in other tools, IIRC tuned also uses it:

https://github.com/redhat-performance/tuned

yeah:

⬢[acme@toolbox tuned]$ grep -r "import perf" *
tuned/plugins/plugin_irqbalance.py:import perf
tuned/plugins/plugin_scheduler.py:import perf
⬢[acme@toolbox tuned]$

Please consider looking at it and possibly adding what you need. It is available in most distros, I think:

Fedora:

python3-perf.x86_64 : Python bindings for apps which will manipulate perf events

[root@five perf]# cat tools/perf/python/twatch.py
#! /usr/bin/env python
# SPDX-License-Identifier: GPL-2.0-only
# -*- python -*-
# -*- coding: utf-8 -*-
#   twatch - Experimental use of the perf python interface
#   Copyright (C) 2011 Arnaldo Carvalho de Melo <acme@redhat.com>
#

import perf

def main(context_switch = 0, thread = -1):
	cpus = perf.cpu_map()
	threads = perf.thread_map(thread)
	evsel = perf.evsel(type	  = perf.TYPE_SOFTWARE,
			   config = perf.COUNT_SW_DUMMY,
			   task = 1, comm = 1, mmap = 0, freq = 0,
			   wakeup_events = 1, watermark = 1,
			   sample_id_all = 1, context_switch = context_switch,
			   sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU)

	"""What we want are just the PERF_RECORD_ lifetime events for threads,
	 using the default, PERF_TYPE_HARDWARE + PERF_COUNT_HW_CYCLES & freq=1
	 (the default), makes perf reenable irq_vectors:local_timer_entry, when
	 disabling nohz, not good for some use cases where all we want is to get
	 threads comes and goes... So use (perf.TYPE_SOFTWARE, perf_COUNT_SW_DUMMY,
	 freq=0) instead."""

	evsel.open(cpus = cpus, threads = threads);
	evlist = perf.evlist(cpus, threads)
	evlist.add(evsel)
	evlist.mmap()
	while True:
		evlist.poll(timeout = -1)
		for cpu in cpus:
			event = evlist.read_on_cpu(cpu)
			if not event:
				continue
			print("cpu: {0}, pid: {1}, tid: {2} {3}".format(event.sample_cpu,
                                                                        event.sample_pid,
                                                                        event.sample_tid,
                                                                        event))

if __name__ == '__main__':
    """
	To test the PERF_RECORD_SWITCH record, pick a pid and replace
	in the following line.

	Example output:

cpu: 3, pid: 31463, tid: 31593 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31593, switch_out: 1 }
cpu: 1, pid: 31463, tid: 31489 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31489, switch_out: 1 }
cpu: 2, pid: 31463, tid: 31496 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31496, switch_out: 1 }
cpu: 3, pid: 31463, tid: 31491 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31491, switch_out: 0 }

	It is possible as well to use event.misc & perf.PERF_RECORD_MISC_SWITCH_OUT
	to figure out if this is a context switch in or out of the monitored threads.

	If bored, please add command line option parsing support for these options :-)
    """
    # main(context_switch = 1, thread = 31463)
    main()
[root@five perf]# export PYTHONPATH=/tmp/build/perf/python_ext_build/lib/
[root@five perf]# tools/perf/python/twatch.py
cpu: 22, pid: 13290, tid: 3471647 { type: fork, pid: 13290, ppid: 13290, tid: 3471653, ptid: 3471647, time: 151504201166040}
cpu: 27, pid: 13290, tid: 3471653 { type: comm, pid: 13290, tid: 3471653, comm: MediaPD~oder #1 }
cpu: 27, pid: 13290, tid: 3471653 { type: comm, pid: 13290, tid: 3471653, comm: MediaPD~oder #1 }
cpu: 27, pid: 13290, tid: 3471653 { type: fork, pid: 13290, ppid: 13290, tid: 3471654, ptid: 3471653, time: 151504201524181}
cpu: 22, pid: 13290, tid: 3471654 { type: comm, pid: 13290, tid: 3471654, comm: MediaPD~oder #2 }
cpu: 22, pid: 13290, tid: 3471654 { type: comm, pid: 13290, tid: 3471654, comm: MediaPD~oder #2 }
cpu: 29, pid: 13290, tid: 3471653 { type: exit, pid: 13290, ppid: 12612, tid: 3471653, ptid: 12612, time: 151504213801994}
cpu: 0, pid: 13290, tid: 3471654 { type: exit, pid: 13290, ppid: 12612, tid: 3471654, ptid: 12612, time: 151504213861954}
cpu: 2, pid: 3471643, tid: 3471643 { type: exit, pid: 3471643, ppid: 13080, tid: 3471643, ptid: 13080, time: 151505955082335}
cpu: 20, pid: 13080, tid: 13080 { type: fork, pid: 3471655, ppid: 13080, tid: 3471655, ptid: 13080, time: 151505955346595}
cpu: 20, pid: 13080, tid: 13080 { type: fork, pid: 3471656, ppid: 13080, tid: 3471656, ptid: 13080, time: 151505955523645}
cpu: 20, pid: 13080, tid: 13080 { type: fork, pid: 3471657, ppid: 13080, tid: 3471657, ptid: 13080, time: 151505955649485}
cpu: 9, pid: 3471656, tid: 3471656 { type: comm, pid: 3471656, tid: 3471656, comm: grep }
cpu: 21, pid: 3471655, tid: 3471655 { type: comm, pid: 3471655, tid: 3471655, comm: sensors }
cpu: 28, pid: 3471657, tid: 3471657 { type: comm, pid: 3471657, tid: 3471657, comm: sed }
cpu: 21, pid: 3471655, tid: 3471655 { type: exit, pid: 3471655, ppid: 13080, tid: 3471655, ptid: 13080, time: 151505986031034}
cpu: 20, pid: 3471656, tid: 3471656 { type: exit, pid: 3471656, ppid: 13080, tid: 3471656, ptid: 13080, time: 151505986154194}
cpu: 22, pid: 3471657, tid: 3471657 { type: exit, pid: 3471657, ppid: 13080, tid: 3471657, ptid: 13080, time: 151505986292214}
cpu: 23, pid: 13080, tid: 13080 { type: fork, pid: 3471658, ppid: 13080, tid: 3471658, ptid: 13080, time: 151505986418014}
cpu: 27, pid: 3471658, tid: 3471658 { type: comm, pid: 3471658, tid: 3471658, comm: sleep }
^CTraceback (most recent call last):
  File "/var/home/acme/git/perf/tools/perf/python/twatch.py", line 61, in <module>
    main()
  File "/var/home/acme/git/perf/tools/perf/python/twatch.py", line 33, in main
    evlist.poll(timeout = -1)
KeyboardInterrupt

[root@five perf]#


 
> Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> ---
>  setup.py           |   9 +-
>  src/perfpy-utils.c | 699 +++++++++++++++++++++++++++++++++++++++++++++
>  src/perfpy-utils.h |  41 +++
>  src/perfpy.c       | 141 +++++++++
>  4 files changed, 889 insertions(+), 1 deletion(-)
>  create mode 100644 src/perfpy-utils.c
>  create mode 100644 src/perfpy-utils.h
>  create mode 100644 src/perfpy.c
> 
> diff --git a/setup.py b/setup.py
> index 4d7e727..31ca2e3 100644
> --- a/setup.py
> +++ b/setup.py
> @@ -19,18 +19,21 @@ def third_party_paths():
>      pkg_ftracepy = pkg.parse('libtracefs')
>      pkg_tracecmd = pkg.parse('libtracecmd')
>      pkg_kshark = pkg.parse('libkshark')
> +    pkg_perf = pkg.parse('libperf')
>  
>      include_dirs = [np.get_include()]
>      include_dirs.extend(pkg_traceevent['include_dirs'])
>      include_dirs.extend(pkg_ftracepy['include_dirs'])
>      include_dirs.extend(pkg_tracecmd['include_dirs'])
>      include_dirs.extend(pkg_kshark['include_dirs'])
> +    include_dirs.extend(pkg_perf['include_dirs'])
>  
>      library_dirs = []
>      library_dirs.extend(pkg_traceevent['library_dirs'])
>      library_dirs.extend(pkg_ftracepy['library_dirs'])
>      library_dirs.extend(pkg_tracecmd['library_dirs'])
>      library_dirs.extend(pkg_kshark['library_dirs'])
> +    library_dirs.extend(pkg_perf['library_dirs'])
>      library_dirs = list(set(library_dirs))
>  
>      return include_dirs, library_dirs
> @@ -61,6 +64,10 @@ def main():
>                            sources=['src/ksharkpy.c', 'src/ksharkpy-utils.c'],
>                            libraries=['kshark'])
>  
> +    module_perf = extension(name='tracecruncher.perfpy',
> +                            sources=['src/perfpy.c', 'src/perfpy-utils.c', 'src/trace-obj-debug.c'],
> +                            libraries=['traceevent', 'perf', 'bfd'])
> +
>      setup(name='tracecruncher',
>            version='0.1.0',
>            description='NumPy based interface for accessing tracing data in Python.',
> @@ -69,7 +76,7 @@ def main():
>            url='https://github.com/vmware/trace-cruncher',
>            license='LGPL-2.1',
>            packages=find_packages(),
> -          ext_modules=[module_ft, module_data, module_ks],
> +          ext_modules=[module_ft, module_data, module_ks, module_perf],
>            classifiers=[
>                'Development Status :: 3 - Alpha',
>                'Programming Language :: Python :: 3',
> diff --git a/src/perfpy-utils.c b/src/perfpy-utils.c
> new file mode 100644
> index 0000000..ae57a63
> --- /dev/null
> +++ b/src/perfpy-utils.c
> @@ -0,0 +1,699 @@
> +// SPDX-License-Identifier: LGPL-2.1
> +
> +/*
> + * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> + */
> +
> +#ifndef _GNU_SOURCE
> +/** Use GNU C Library. */
> +#define _GNU_SOURCE
> +#endif // _GNU_SOURCE
> +
> +// C
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <pthread.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <dirent.h>
> +#include <sys/resource.h>
> +
> +// libperf
> +#include <linux/perf_event.h>
> +#include <perf/evlist.h>
> +#include <perf/evsel.h>
> +#include <perf/cpumap.h>
> +#include <perf/threadmap.h>
> +#include <perf/mmap.h>
> +#include <perf/core.h>
> +#include <perf/event.h>
> +
> +// trace-cruncher
> +#include "perfpy-utils.h"
> +#include "trace-obj-debug.h"
> +
> +PyObject *PERF_ERROR;
> +
> +#define TMP_FILE "/tmp/perf_temp_data.XXXXXX"
> +
> +struct perf_scan_thread {
> +	uint32_t tid;
> +	char *comm;
> +	struct perf_counts_values count;
> +};
> +
> +struct perf_handle {
> +	bool running;
> +	pthread_t reader;
> +	int fd;
> +	int thr_count;
> +	uint32_t pid;
> +	struct perf_scan_thread *thr_map;
> +	struct perf_evlist *evlist;
> +	struct perf_evsel *evsel;
> +	struct perf_event_attr attr;
> +	struct perf_cpu_map *cpus;
> +	struct trace_debug_object *debug;
> +	struct perf_thread_map *threads;
> +};
> +
> +struct event_sample {
> +	uint64_t		id;		/* PERF_SAMPLE_IDENTIFIER */
> +	uint64_t		ip;		/* PERF_SAMPLE_IP */
> +	uint32_t		pid, tid;	/* PERF_SAMPLE_TID */
> +	uint64_t		time;		/* PERF_SAMPLE_TIME */
> +	uint32_t		cpu, res;	/* PERF_SAMPLE_CPU */
> +	uint64_t		nr;
> +	uint64_t		ips[];		/* PERF_SAMPLE_CALLCHAIN */
> +} __attribute__((packed));
> +
> +struct perf_event_sample {
> +	struct event_sample	data;
> +	struct perf_handle	*perf;
> +	char			*ip;
> +	char			*ips[];
> +} __attribute__((packed));
> +
> +static void perf_reader_stop(struct perf_handle *perf)
> +{
> +
> +	if (!perf->running)
> +		return;
> +
> +	perf->running = false;
> +	pthread_join(perf->reader, NULL);
> +	fsync(perf->fd);
> +	if (perf->debug)
> +		trace_debug_resolve_symbols(perf->debug);
> +}
> +
> +void py_perf_handle_free(struct perf_handle *perf)
> +{
> +	int i;
> +
> +	if (perf) {
> +		perf_reader_stop(perf);
> +		perf_evlist__delete(perf->evlist);
> +		if (perf->fd >= 0)
> +			close(perf->fd);
> +		if (perf->debug)
> +			trace_debug_obj_destroy(perf->debug);
> +		if (perf->thr_map) {
> +			for (i = 0; i < perf->thr_count; i++)
> +				free(perf->thr_map[i].comm);
> +			free(perf->thr_map);
> +		}
> +	}
> +
> +	free(perf);
> +}
> +
> +void py_perf_sample_free(struct perf_event_sample *sample)
> +{
> +	unsigned int i;
> +
> +	if (sample) {
> +		free(sample->ip);
> +		for (i = 0; i < sample->data.nr; i++)
> +			free((char *)(sample->ips[i]));
> +	}
> +	free(sample);
> +}
> +
> +static int pid_filter(const struct dirent *dir)
> +{
> +	const char *dname = dir->d_name;
> +
> +	if (!dname || dname[0] == '.')
> +		return 0;
> +
> +	while (*dname) {
> +		if (!isdigit(*dname))
> +			return 0;
> +		dname++;
> +	}
> +
> +	return 1;
> +}
> +
> +static  int str_read_file(const char *file, char **buffer)
> +{
> +	char stbuf[BUFSIZ];
> +	char *buf = NULL;
> +	int size = 0;
> +	char *nbuf;
> +	int fd;
> +	int r;
> +
> +	fd = open(file, O_RDONLY);
> +	if (fd < 0)
> +		return -1;
> +
> +	do {
> +		r = read(fd, stbuf, BUFSIZ);
> +		if (r <= 0)
> +			continue;
> +		nbuf = realloc(buf, size+r+1);
> +		if (!nbuf) {
> +			size = -1;
> +			break;
> +		}
> +		buf = nbuf;
> +		memcpy(buf+size, stbuf, r);
> +		size += r;
> +	} while (r > 0);
> +
> +	close(fd);
> +	if (r == 0 && size > 0) {
> +		buf[size] = '\0';
> +		*buffer = buf;
> +	} else
> +		free(buf);
> +
> +	return size;
> +}
> +
> +static void strip_control_chars(char *str)
> +{
> +	while (*str) {
> +		if (iscntrl(*str)) {
> +			*str = '\0';
> +			break;
> +		}
> +		str++;
> +	}
> +}
> +
> +static struct perf_thread_map *create_thread_map(struct perf_handle *perf, int pid)
> +{
> +	struct perf_thread_map *tmap = NULL;
> +	struct dirent **pids = NULL;
> +	char path[PATH_MAX];
> +	int i, count;
> +
> +	snprintf(path, PATH_MAX, "/proc/%d/task", pid);
> +	count = scandir(path, &pids, pid_filter, NULL);
> +	if (count < 1)
> +		goto out;
> +
> +	tmap = perf_thread_map__new_array(count, NULL);
> +	if (!tmap)
> +		goto out;
> +	free(perf->thr_map);
> +	perf->thr_map = calloc(count, sizeof(struct perf_scan_thread));
> +	if (!perf->thr_map)
> +		goto out;
> +	perf->thr_count = count;
> +
> +	for (i = 0; i < count; i++) {
> +		perf->thr_map[i].tid = atoi(pids[i]->d_name);
> +		perf_thread_map__set_pid(tmap, i, perf->thr_map[i].tid);
> +		snprintf(path, PATH_MAX, "/proc/%d/task/%s/comm", pid, pids[i]->d_name);
> +		str_read_file(path, &perf->thr_map[i].comm);
> +		strip_control_chars(perf->thr_map[i].comm);
> +	}
> +	perf_thread_map__read_comms(tmap);
> +
> +out:
> +	if (pids) {
> +		for (i = 0; i < count; i++)
> +			free(pids[i]);
> +		free(pids);
> +	}
> +
> +	return tmap;
> +}
> +
> +static struct perf_handle *new_perf_sampling_handle(pid_t pid, int freq)
> +{
> +	struct perf_handle *perf = NULL;
> +	char *tmp_file = NULL;
> +
> +	perf = calloc(1, sizeof(*perf));
> +	if (!perf)
> +		return NULL;
> +
> +	perf->fd = -1;
> +	perf->attr.type        = PERF_TYPE_HARDWARE;
> +	perf->attr.config      = PERF_COUNT_HW_CPU_CYCLES;
> +	perf->attr.disabled    = 1;
> +	perf->attr.freq        = 1;
> +	perf->attr.sample_freq = freq;
> +	perf->attr.exclude_kernel = 1;
> +	perf->attr.exclude_idle = 1;
> +	perf->attr.exclude_callchain_kernel = 1;
> +	perf->attr.comm = 1;
> +	perf->attr.mmap2 = 1;
> +	perf->attr.task = 1;
> +	perf->attr.precise_ip = 0;
> +	perf->attr.inherit = 1;
> +	perf->attr.task = 1;
> +	perf->attr.inherit_stat = 1;
> +	perf->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
> +				 PERF_FORMAT_TOTAL_TIME_RUNNING;
> +	perf->attr.sample_type = PERF_SAMPLE_IDENTIFIER |
> +				 PERF_SAMPLE_IP |
> +				 PERF_SAMPLE_TID |
> +				 PERF_SAMPLE_TIME |
> +				 PERF_SAMPLE_CPU |
> +				 PERF_SAMPLE_CALLCHAIN;
> +
> +	/* trace all CPUs in the system */
> +	perf->cpus = perf_cpu_map__new(NULL);
> +	if (!perf->cpus) {
> +		PyErr_Format(PERF_ERROR, "Failed to create perf cpu map");
> +		goto error;
> +	}
> +
> +	if (pid >= 0) {
> +		perf->pid = pid;
> +		perf->threads = create_thread_map(perf, pid);
> +		if (!perf->threads) {
> +			PyErr_Format(PERF_ERROR, "Failed to create perf threads map");
> +			goto error;
> +		}
> +		perf->debug = trace_debug_obj_create_pid(pid);
> +	}
> +
> +	perf->evlist = perf_evlist__new();
> +	if (!perf->evlist) {
> +		PyErr_Format(PERF_ERROR, "Failed to create perf events list");
> +		goto error;
> +	}
> +
> +	tmp_file = strdup(TMP_FILE);
> +	if (!tmp_file)
> +		goto error;
> +
> +	mktemp(tmp_file);
> +	perf->fd = open(tmp_file, O_RDWR|O_CREAT|O_TRUNC, 0600);
> +	unlink(tmp_file);
> +	if (perf->fd < 0)
> +		goto error;
> +
> +	perf->evsel = perf_evsel__new(&perf->attr);
> +	if (!perf->evsel) {
> +		PyErr_Format(PERF_ERROR, "Failed to create perf cycles");
> +		goto error;
> +	}
> +
> +	perf_evlist__add(perf->evlist, perf->evsel);
> +	perf_evlist__set_maps(perf->evlist, perf->cpus, perf->threads);
> +
> +	free(tmp_file);
> +	return perf;
> +
> +error:
> +	perf_cpu_map__put(perf->cpus);
> +	perf_thread_map__put(perf->threads);
> +	py_perf_handle_free(perf);
> +	free(tmp_file);
> +	return NULL;
> +}
> +
> +PyObject *PyPerfSample_new(PyObject *self, PyObject *args, PyObject *kwargs)
> +{
> +	static char *kwlist[] = {"pid", "freq", NULL};
> +	struct perf_handle *perf = NULL;
> +	int freq = 10, pid = -1;
> +
> +	if (!PyArg_ParseTupleAndKeywords(args,
> +					 kwargs,
> +					 "i|i",
> +					 kwlist,
> +					 &pid,
> +					 &freq
> +					 )) {
> +		return NULL;
> +	}
> +
> +	perf = new_perf_sampling_handle(pid, freq);
> +	if (!perf)
> +		return NULL;
> +
> +	return PyPerf_New(perf);
> +}
> +
> +static void perf_read_sample(struct perf_handle *perf, struct perf_record_sample *event)
> +{
> +	struct event_sample *sample;
> +	uint64_t i;
> +
> +	sample = (struct event_sample *)(event->array);
> +
> +	/* check if the sample is for our PID */
> +	if (sample->pid != perf->pid)
> +		return;
> +
> +	if (perf->debug)
> +		trace_debug_add_resolve_symbol(perf->debug, sample->ip, NULL);
> +
> +	if (write(perf->fd, sample, sizeof(*sample)) != sizeof(*sample))
> +		return;
> +
> +	for (i = 0; i < sample->nr; i++) {
> +		if (write(perf->fd, &sample->ips[i], sizeof(uint64_t)) != sizeof(uint64_t))
> +			return;
> +		if (perf->debug)
> +			trace_debug_add_resolve_symbol(perf->debug, sample->ips[i], NULL);
> +	}
> +}
> +
> +/* A new memory is mapped to traced process */
> +static void perf_read_mmap2(struct perf_handle *perf, struct perf_record_mmap2 *mmap)
> +{
> +	/* check if mmap is for our PID */
> +	if (perf->pid != mmap->pid)
> +		return;
> +
> +	/* check if executable memory is mapped */
> +	if (mmap->header.misc & PERF_RECORD_MISC_MMAP_DATA)
> +		return;
> +
> +	/*
> +	 * A new dynamic library is dlopen() by the traced process,
> +	 * store it for vma -> name resolving
> +	 */
> +	trace_debug_obj_add_file(perf->debug, mmap->filename,
> +				 mmap->start, mmap->start + mmap->len, mmap->pgoff);
> +}
> +
> +/* A new thread is started */
> +static void perf_read_comm(struct perf_handle *perf, struct perf_record_comm *comm)
> +{
> +	struct perf_scan_thread *tmp;
> +	int i;
> +
> +	/* check if the thread is started by PID */
> +	if (perf->pid != comm->pid)
> +		return;
> +
> +	for (i = 0; i < perf->thr_count; i++) {
> +		if (perf->thr_map[i].tid == comm->tid) {
> +			free(perf->thr_map[i].comm);
> +			perf->thr_map[i].comm = strdup(comm->comm);
> +			return;
> +		}
> +	}
> +
> +	tmp = realloc(perf->thr_map, (perf->thr_count + 1) * sizeof(struct perf_scan_thread));
> +	if (!tmp)
> +		return;
> +
> +	perf->thr_map = tmp;
> +	perf->thr_map[perf->thr_count].tid = comm->tid;
> +	perf->thr_map[perf->thr_count].comm = strdup(comm->comm);
> +	perf->thr_count++;
> +}
> +
> +static void *perf_reader_thread(void *data)
> +{
> +	struct perf_handle *perf = data;
> +	struct perf_mmap *map;
> +	union perf_event *event;
> +
> +	perf_evlist__enable(perf->evlist);
> +
> +	while (true) {
> +		if (!perf->running)
> +			break;
> +		perf_evlist__for_each_mmap(perf->evlist, map, false) {
> +			if (perf_mmap__read_init(map) < 0)
> +				continue;
> +
> +			while ((event = perf_mmap__read_event(map)) != NULL) {
> +
> +				switch (event->sample.header.type) {
> +				case PERF_RECORD_SAMPLE:
> +					perf_read_sample(perf, (struct perf_record_sample *)event);
> +					break;
> +				case PERF_RECORD_COMM:
> +					perf_read_comm(perf, (struct perf_record_comm *)event);
> +					break;
> +				case PERF_RECORD_MMAP2:
> +					perf_read_mmap2(perf, (struct perf_record_mmap2 *)event);
> +					break;
> +				}
> +
> +				perf_mmap__consume(map);
> +			}
> +
> +			perf_mmap__read_done(map);
> +		}
> +	}
> +	perf_evlist__disable(perf->evlist);
> +	pthread_exit(0);
> +}
> +
> +static int increase_file_limit(void)
> +{
> +	struct rlimit lim;
> +
> +	if (getrlimit(RLIMIT_NOFILE, &lim))
> +		return -1;
> +
> +	if (lim.rlim_cur < lim.rlim_max) {
> +		lim.rlim_cur = lim.rlim_max;
> +	} else {
> +		lim.rlim_cur += 100;
> +		lim.rlim_max += 100;
> +	}
> +
> +	return setrlimit(RLIMIT_NOFILE, &lim);
> +}
> +
> +static int perf_reader_start(struct perf_handle *perf)
> +{
> +	pthread_attr_t attrib;
> +	int err = 0;
> +
> +	if (perf->running)
> +		return 0;
> +
> +	pthread_attr_init(&attrib);
> +	pthread_attr_setdetachstate(&attrib, PTHREAD_CREATE_JOINABLE);
> +
> +	do {
> +		err = perf_evlist__open(perf->evlist);
> +		if (!err)
> +			break;
> +		if (err != -EMFILE)
> +			goto out;
> +		if (increase_file_limit())
> +			goto out;
> +	} while (err);
> +
> +	err = perf_evlist__mmap(perf->evlist, 4);
> +	if (err)
> +		goto out;
> +
> +	perf->running = true;
> +	err = pthread_create(&perf->reader, &attrib, perf_reader_thread, perf);
> +	if (err)
> +		goto out;
> +
> +out:
> +	pthread_attr_destroy(&attrib);
> +	if (err) {
> +		perf_evlist__close(perf->evlist);
> +		perf->running = false;
> +	}
> +	return err;
> +}
> +
> +PyObject *PyPerf_start(PyPerf *self)
> +{
> +	struct perf_handle *perf = self->ptrObj;
> +	int err;
> +
> +	if (perf->running) {
> +		PyErr_Format(PERF_ERROR, "Perf reader is already started");
> +		return NULL;
> +	}
> +
> +	err = perf_reader_start(perf);
> +	if (err) {
> +		PyErr_Format(PERF_ERROR,
> +			     "Failed to start perf reader - %s", strerror(-err));
> +		return NULL;
> +	}
> +
> +	Py_RETURN_NONE;
> +}
> +
> +PyObject *PyPerf_stop(PyPerf *self)
> +{
> +	struct perf_handle *perf = self->ptrObj;
> +
> +	if (!perf->running) {
> +		PyErr_Format(PERF_ERROR, "Perf reader is not started");
> +		return NULL;
> +	}
> +
> +	perf_reader_stop(perf);
> +
> +	Py_RETURN_NONE;
> +}
> +
> +struct symb_walk {
> +	uint64_t ip;
> +	char *name;
> +};
> +
> +static int sym_get(struct tracecmd_debug_symbols *symb, void *data)
> +{
> +	struct symb_walk *s = (struct symb_walk *)data;
> +
> +	if (s->ip == symb->vma_near) {
> +		if (symb->name)
> +			asprintf(&s->name, "%s @ %s", symb->name, symb->fname);
> +		else
> +			asprintf(&s->name, "(0x%llX) @ %s", symb->vma_near, symb->fname);
> +		return 1;
> +	}
> +	return 0;
> +}
> +
> +static char *ip_name(struct perf_handle *perf, uint64_t ip)
> +{
> +	struct symb_walk symb;
> +
> +	symb.ip = ip;
> +	symb.name = NULL;
> +	if (perf && perf->debug)
> +		trace_debug_walk_resolved_symbols(perf->debug, sym_get, &symb);
> +
> +	if (!symb.name)
> +		asprintf(&symb.name, "0x%lX", ip);
> +
> +	return symb.name;
> +}
> +
> +PyObject *PyPerf_getSamples(PyPerf *self)
> +{
> +	struct perf_handle *perf = self->ptrObj;
> +	struct event_sample sample;
> +	struct perf_event_sample *store;
> +	PyObject *slist, *sobject;
> +	uint64_t i, ip;
> +	int ca = 0, cs = 0;
> +
> +	if (perf->running) {
> +		PyErr_Format(PERF_ERROR, "Perf reader is running");
> +		return NULL;
> +	}
> +
> +	if (lseek(perf->fd, 0, SEEK_SET) == (off_t)-1) {
> +		PyErr_Format(PERF_ERROR, "No samples");
> +		return NULL;
> +	}
> +
> +	slist = PyList_New(0);
> +	do {
> +		if (read(perf->fd, &sample, sizeof(sample)) != sizeof(sample))
> +			break;
> +		ca++;
> +		store = calloc(1, sizeof(*store) + (sample.nr * sizeof(char *)));
> +		if (!store)
> +			break;
> +		memcpy(&store->data, &sample, sizeof(sample));
> +		store->perf = perf;
> +		store->ip = ip_name(perf, store->data.ip);
> +		for (i = 0; i < sample.nr; i++) {
> +			if (read(perf->fd, &ip, sizeof(uint64_t)) != sizeof(uint64_t))
> +				break;
> +			store->ips[i] = ip_name(perf, ip);
> +		}
> +		cs += sample.nr;
> +		if (i < sample.nr)
> +			break;
> +		sobject = PyPerfEventSample_New(store);
> +		PyList_Append(slist, sobject);
> +	} while (true);
> +	ftruncate(perf->fd, 0);
> +	return slist;
> +}
> +
> +PyObject *PyPerfSample_id(PyPerfEventSample *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +
> +	return PyLong_FromUnsignedLongLong(sample->data.id);
> +}
> +
> +PyObject *PyPerfSample_pid(PyPerfEventSample *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +
> +	return PyLong_FromUnsignedLong(sample->data.pid);
> +}
> +
> +PyObject *PyPerfSample_tid(PyPerfEventSample *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +
> +	return PyLong_FromUnsignedLong(sample->data.tid);
> +}
> +
> +PyObject *PyPerfSample_time(PyPerfEventSample *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +
> +	return PyLong_FromUnsignedLongLong(sample->data.time);
> +}
> +
> +PyObject *PyPerfSample_cpu(PyPerfEventSample *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +
> +	return PyLong_FromUnsignedLong(sample->data.cpu);
> +}
> +
> +PyObject *PyPerfSample_nr(PyPerfEventSample *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +
> +	return PyLong_FromUnsignedLongLong(sample->data.nr);
> +}
> +
> +PyObject *PyPerfSample_ip(PyPerfEventSample *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +
> +	return PyUnicode_FromString(sample->ip);
> +}
> +
> +PyObject *PyPerfSample_tid_comm(PyPerfEventSample *self, PyObject *args, PyObject *kwargs)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +	char *name = NULL;
> +	int i;
> +
> +	if (!sample->perf || !sample->perf->thr_count || !sample->perf->thr_map)
> +		Py_RETURN_NONE;
> +
> +	for (i = 0; i < sample->perf->thr_count; i++)
> +		if (sample->perf->thr_map[i].tid == sample->data.tid)
> +			break;
> +
> +	if (i < sample->perf->thr_count && sample->perf->thr_map[i].comm)
> +		name = sample->perf->thr_map[i].comm;
> +
> +	if (name)
> +		return PyUnicode_FromString(name);
> +
> +	Py_RETURN_NONE;
> +}
> +
> +PyObject *PyPerfSample_ips(PyPerfEventSample *self)
> +{
> +	struct perf_event_sample *sample = self->ptrObj;
> +	PyObject *slist;
> +	unsigned int i;
> +
> +	slist = PyList_New(0);
> +	for (i = 0 ; i < sample->data.nr; i++)
> +		PyList_Append(slist, PyUnicode_FromString((char *)sample->ips[i]));
> +
> +	return slist;
> +}
> diff --git a/src/perfpy-utils.h b/src/perfpy-utils.h
> new file mode 100644
> index 0000000..0727a9a
> --- /dev/null
> +++ b/src/perfpy-utils.h
> @@ -0,0 +1,41 @@
> +/* SPDX-License-Identifier: LGPL-2.1 */
> +
> +/*
> + * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> + */
> +
> +#ifndef _TC_PERF_PY_UTILS
> +#define _TC_PERF_PY_UTILS
> +
> +// Python
> +#include <Python.h>
> +
> +// trace-cruncher
> +#include "common.h"
> +
> +struct perf_handle;
> +struct perf_event_sample;
> +
> +C_OBJECT_WRAPPER_DECLARE(perf_handle, PyPerf);
> +C_OBJECT_WRAPPER_DECLARE(perf_event_sample, PyPerfEventSample);
> +
> +PyObject *PyPerfSample_new(PyObject *self, PyObject *args, PyObject *kwargs);
> +
> +PyObject *PyPerf_start(PyPerf *self);
> +PyObject *PyPerf_stop(PyPerf *self);
> +PyObject *PyPerf_getSamples(PyPerf *self);
> +
> +PyObject *PyPerfSample_id(PyPerfEventSample *self);
> +PyObject *PyPerfSample_ip(PyPerfEventSample *self);
> +PyObject *PyPerfSample_pid(PyPerfEventSample *self);
> +PyObject *PyPerfSample_tid(PyPerfEventSample *self);
> +PyObject *PyPerfSample_tid_comm(PyPerfEventSample *self, PyObject *args, PyObject *kwargs);
> +PyObject *PyPerfSample_time(PyPerfEventSample *self);
> +PyObject *PyPerfSample_cpu(PyPerfEventSample *self);
> +PyObject *PyPerfSample_nr(PyPerfEventSample *self);
> +PyObject *PyPerfSample_ips(PyPerfEventSample *self);
> +
> +void py_perf_handle_free(struct perf_handle *handle);
> +void py_perf_sample_free(struct perf_event_sample *sample);
> +
> +#endif
> diff --git a/src/perfpy.c b/src/perfpy.c
> new file mode 100644
> index 0000000..745d519
> --- /dev/null
> +++ b/src/perfpy.c
> @@ -0,0 +1,141 @@
> +// SPDX-License-Identifier: LGPL-2.1
> +
> +/*
> + * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> + */
> +
> +// Python
> +#include <Python.h>
> +
> +// libperf
> +#include <perf/core.h>
> +#include <perf/evsel.h>
> +#include <perf/mmap.h>
> +#include <perf/event.h>
> +
> +// trace-cruncher
> +#include "common.h"
> +#include "perfpy-utils.h"
> +
> +extern PyObject *PERF_ERROR;
> +
> +static PyMethodDef PyPerf_methods[] = {
> +	{"start",
> +	 (PyCFunction) PyPerf_start,
> +	 METH_NOARGS,
> +	 "start sampling"
> +	},
> +	{"stop",
> +	 (PyCFunction) PyPerf_stop,
> +	 METH_NOARGS,
> +	 "stop sampling"
> +	},
> +	{"get_samples",
> +	 (PyCFunction) PyPerf_getSamples,
> +	 METH_NOARGS,
> +	 "get recorded samples"
> +	},
> +	{NULL}
> +};
> +C_OBJECT_WRAPPER(perf_handle, PyPerf, NO_DESTROY, py_perf_handle_free);
> +
> +static PyMethodDef PyPerfEventSample_methods[] = {
> +	{"id",
> +	 (PyCFunction) PyPerfSample_id,
> +	 METH_NOARGS,
> +	 "get sample id"
> +	},
> +	{"ip",
> +	 (PyCFunction) PyPerfSample_ip,
> +	 METH_NOARGS,
> +	 "get sample ip"
> +	},
> +	{"pid",
> +	 (PyCFunction) PyPerfSample_pid,
> +	 METH_NOARGS,
> +	 "get sample pid"
> +	},
> +	{"tid",
> +	 (PyCFunction) PyPerfSample_tid,
> +	 METH_NOARGS,
> +	 "get sample tid"
> +	},
> +	{"tid_comm",
> +	 (PyCFunction) PyPerfSample_tid_comm,
> +	 METH_VARARGS | METH_KEYWORDS,
> +	 "get sample tid"
> +	},
> +	{"time",
> +	 (PyCFunction) PyPerfSample_time,
> +	 METH_NOARGS,
> +	 "get sample timestamp"
> +	},
> +	{"cpu",
> +	 (PyCFunction) PyPerfSample_cpu,
> +	 METH_NOARGS,
> +	 "get sample cpu"
> +	},
> +	{"stack_count",
> +	 (PyCFunction) PyPerfSample_nr,
> +	 METH_NOARGS,
> +	 "get sample stack count"
> +	},
> +	{"stack",
> +	 (PyCFunction) PyPerfSample_ips,
> +	 METH_NOARGS,
> +	 "get sample stack"
> +	},
> +	{NULL}
> +};
> +C_OBJECT_WRAPPER(perf_event_sample, PyPerfEventSample, NO_DESTROY, py_perf_sample_free);
> +
> +static PyMethodDef perfpy_methods[] = {
> +	{"sample",
> +	 (PyCFunction) PyPerfSample_new,
> +	 METH_VARARGS | METH_KEYWORDS,
> +	 "new perf sample instance"
> +	},
> +	{NULL}
> +};
> +
> +static int perf_error_print(enum libperf_print_level level,
> +			    const char *fmt, va_list ap)
> +{
> +	return vfprintf(stderr, fmt, ap);
> +}
> +
> +static struct PyModuleDef perfpy_module = {
> +	PyModuleDef_HEAD_INIT,
> +	"perfpy",
> +	"Python interface for Perf.",
> +	-1,
> +	perfpy_methods
> +};
> +
> +PyMODINIT_FUNC PyInit_perfpy(void)
> +{
> +
> +	if (!PyPerfTypeInit())
> +		return NULL;
> +	if (!PyPerfEventSampleTypeInit())
> +		return NULL;
> +
> +	PERF_ERROR = PyErr_NewException("tracecruncher.perfpy.perf_error",
> +					NULL, NULL);
> +
> +	PyObject *module = PyModule_Create(&perfpy_module);
> +
> +	PyModule_AddObject(module, "perf_error", PERF_ERROR);
> +	PyModule_AddObject(module, "perf_handle", (PyObject *) &PyPerfType);
> +	PyModule_AddObject(module, "perf_event_sample", (PyObject *) &PyPerfEventSampleType);
> +
> +	if (geteuid() != 0) {
> +		PyErr_SetString(PERF_ERROR,
> +				"Permission denied. Root privileges are required.");
> +		return NULL;
> +	}
> +
> +	libperf_init(perf_error_print);
> +
> +	return module;
> +}
> -- 
> 2.34.1

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 2/3] trace-cruncher: Support for perf
  2022-02-25 14:51   ` Arnaldo Carvalho de Melo
@ 2022-02-25 15:38     ` Tzvetomir Stoyanov
  0 siblings, 0 replies; 8+ messages in thread
From: Tzvetomir Stoyanov @ 2022-02-25 15:38 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Yordan Karadzhov, Jiri Olsa, Ian Rogers, Steven Rostedt,
	Linux Trace Devel, linux-perf-users

On Fri, Feb 25, 2022 at 4:51 PM Arnaldo Carvalho de Melo
<arnaldo.melo@gmail.com> wrote:
>
> Em Thu, Feb 24, 2022 at 06:37:10PM +0200, Tzvetomir Stoyanov (VMware) escreveu:
> > Initial perf support for trace-cruncher, using libperf. As a first
> > stage, collecting of stack trace samples of given process is supported.
>
> Are you aware of tools/perf/util/python.c? It was done for use in tuna:
>
> https://git.kernel.org/pub/scm/utils/tuna/tuna.git/
>
> I think it is already used in other tools, IIRC tuned also uses it:
>
> https://github.com/redhat-performance/tuned
>
> yeah:
>
> ⬢[acme@toolbox tuned]$ grep -r "import perf" *
> tuned/plugins/plugin_irqbalance.py:import perf
> tuned/plugins/plugin_scheduler.py:import perf
> ⬢[acme@toolbox tuned]$
>
> Please consider looking at it and possibly adding what you need. It is available in most distros, I think:
>

Awesome! We'll look at this, thanks Arnaldo!

> Fedora:
>
> python3-perf.x86_64 : Python bindings for apps which will manipulate perf events
>
> [root@five perf]# cat tools/perf/python/twatch.py
> #! /usr/bin/env python
> # SPDX-License-Identifier: GPL-2.0-only
> # -*- python -*-
> # -*- coding: utf-8 -*-
> #   twatch - Experimental use of the perf python interface
> #   Copyright (C) 2011 Arnaldo Carvalho de Melo <acme@redhat.com>
> #
>
> import perf
>
> def main(context_switch = 0, thread = -1):
>         cpus = perf.cpu_map()
>         threads = perf.thread_map(thread)
>         evsel = perf.evsel(type   = perf.TYPE_SOFTWARE,
>                            config = perf.COUNT_SW_DUMMY,
>                            task = 1, comm = 1, mmap = 0, freq = 0,
>                            wakeup_events = 1, watermark = 1,
>                            sample_id_all = 1, context_switch = context_switch,
>                            sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU)
>
>         """What we want are just the PERF_RECORD_ lifetime events for threads,
>          using the default, PERF_TYPE_HARDWARE + PERF_COUNT_HW_CYCLES & freq=1
>          (the default), makes perf reenable irq_vectors:local_timer_entry, when
>          disabling nohz, not good for some use cases where all we want is to get
>          threads comes and goes... So use (perf.TYPE_SOFTWARE, perf_COUNT_SW_DUMMY,
>          freq=0) instead."""
>
>         evsel.open(cpus = cpus, threads = threads);
>         evlist = perf.evlist(cpus, threads)
>         evlist.add(evsel)
>         evlist.mmap()
>         while True:
>                 evlist.poll(timeout = -1)
>                 for cpu in cpus:
>                         event = evlist.read_on_cpu(cpu)
>                         if not event:
>                                 continue
>                         print("cpu: {0}, pid: {1}, tid: {2} {3}".format(event.sample_cpu,
>                                                                         event.sample_pid,
>                                                                         event.sample_tid,
>                                                                         event))
>
> if __name__ == '__main__':
>     """
>         To test the PERF_RECORD_SWITCH record, pick a pid and replace
>         in the following line.
>
>         Example output:
>
> cpu: 3, pid: 31463, tid: 31593 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31593, switch_out: 1 }
> cpu: 1, pid: 31463, tid: 31489 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31489, switch_out: 1 }
> cpu: 2, pid: 31463, tid: 31496 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31496, switch_out: 1 }
> cpu: 3, pid: 31463, tid: 31491 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31491, switch_out: 0 }
>
>         It is possible as well to use event.misc & perf.PERF_RECORD_MISC_SWITCH_OUT
>         to figure out if this is a context switch in or out of the monitored threads.
>
>         If bored, please add command line option parsing support for these options :-)
>     """
>     # main(context_switch = 1, thread = 31463)
>     main()
> [root@five perf]# export PYTHONPATH=/tmp/build/perf/python_ext_build/lib/
> [root@five perf]# tools/perf/python/twatch.py
> cpu: 22, pid: 13290, tid: 3471647 { type: fork, pid: 13290, ppid: 13290, tid: 3471653, ptid: 3471647, time: 151504201166040}
> cpu: 27, pid: 13290, tid: 3471653 { type: comm, pid: 13290, tid: 3471653, comm: MediaPD~oder #1 }
> cpu: 27, pid: 13290, tid: 3471653 { type: comm, pid: 13290, tid: 3471653, comm: MediaPD~oder #1 }
> cpu: 27, pid: 13290, tid: 3471653 { type: fork, pid: 13290, ppid: 13290, tid: 3471654, ptid: 3471653, time: 151504201524181}
> cpu: 22, pid: 13290, tid: 3471654 { type: comm, pid: 13290, tid: 3471654, comm: MediaPD~oder #2 }
> cpu: 22, pid: 13290, tid: 3471654 { type: comm, pid: 13290, tid: 3471654, comm: MediaPD~oder #2 }
> cpu: 29, pid: 13290, tid: 3471653 { type: exit, pid: 13290, ppid: 12612, tid: 3471653, ptid: 12612, time: 151504213801994}
> cpu: 0, pid: 13290, tid: 3471654 { type: exit, pid: 13290, ppid: 12612, tid: 3471654, ptid: 12612, time: 151504213861954}
> cpu: 2, pid: 3471643, tid: 3471643 { type: exit, pid: 3471643, ppid: 13080, tid: 3471643, ptid: 13080, time: 151505955082335}
> cpu: 20, pid: 13080, tid: 13080 { type: fork, pid: 3471655, ppid: 13080, tid: 3471655, ptid: 13080, time: 151505955346595}
> cpu: 20, pid: 13080, tid: 13080 { type: fork, pid: 3471656, ppid: 13080, tid: 3471656, ptid: 13080, time: 151505955523645}
> cpu: 20, pid: 13080, tid: 13080 { type: fork, pid: 3471657, ppid: 13080, tid: 3471657, ptid: 13080, time: 151505955649485}
> cpu: 9, pid: 3471656, tid: 3471656 { type: comm, pid: 3471656, tid: 3471656, comm: grep }
> cpu: 21, pid: 3471655, tid: 3471655 { type: comm, pid: 3471655, tid: 3471655, comm: sensors }
> cpu: 28, pid: 3471657, tid: 3471657 { type: comm, pid: 3471657, tid: 3471657, comm: sed }
> cpu: 21, pid: 3471655, tid: 3471655 { type: exit, pid: 3471655, ppid: 13080, tid: 3471655, ptid: 13080, time: 151505986031034}
> cpu: 20, pid: 3471656, tid: 3471656 { type: exit, pid: 3471656, ppid: 13080, tid: 3471656, ptid: 13080, time: 151505986154194}
> cpu: 22, pid: 3471657, tid: 3471657 { type: exit, pid: 3471657, ppid: 13080, tid: 3471657, ptid: 13080, time: 151505986292214}
> cpu: 23, pid: 13080, tid: 13080 { type: fork, pid: 3471658, ppid: 13080, tid: 3471658, ptid: 13080, time: 151505986418014}
> cpu: 27, pid: 3471658, tid: 3471658 { type: comm, pid: 3471658, tid: 3471658, comm: sleep }
> ^CTraceback (most recent call last):
>   File "/var/home/acme/git/perf/tools/perf/python/twatch.py", line 61, in <module>
>     main()
>   File "/var/home/acme/git/perf/tools/perf/python/twatch.py", line 33, in main
>     evlist.poll(timeout = -1)
> KeyboardInterrupt
>
> [root@five perf]#
>
>
>
> > Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> > ---
> >  setup.py           |   9 +-
> >  src/perfpy-utils.c | 699 +++++++++++++++++++++++++++++++++++++++++++++
> >  src/perfpy-utils.h |  41 +++
> >  src/perfpy.c       | 141 +++++++++
> >  4 files changed, 889 insertions(+), 1 deletion(-)
> >  create mode 100644 src/perfpy-utils.c
> >  create mode 100644 src/perfpy-utils.h
> >  create mode 100644 src/perfpy.c
> >
> > diff --git a/setup.py b/setup.py
> > index 4d7e727..31ca2e3 100644
> > --- a/setup.py
> > +++ b/setup.py
> > @@ -19,18 +19,21 @@ def third_party_paths():
> >      pkg_ftracepy = pkg.parse('libtracefs')
> >      pkg_tracecmd = pkg.parse('libtracecmd')
> >      pkg_kshark = pkg.parse('libkshark')
> > +    pkg_perf = pkg.parse('libperf')
> >
> >      include_dirs = [np.get_include()]
> >      include_dirs.extend(pkg_traceevent['include_dirs'])
> >      include_dirs.extend(pkg_ftracepy['include_dirs'])
> >      include_dirs.extend(pkg_tracecmd['include_dirs'])
> >      include_dirs.extend(pkg_kshark['include_dirs'])
> > +    include_dirs.extend(pkg_perf['include_dirs'])
> >
> >      library_dirs = []
> >      library_dirs.extend(pkg_traceevent['library_dirs'])
> >      library_dirs.extend(pkg_ftracepy['library_dirs'])
> >      library_dirs.extend(pkg_tracecmd['library_dirs'])
> >      library_dirs.extend(pkg_kshark['library_dirs'])
> > +    library_dirs.extend(pkg_perf['library_dirs'])
> >      library_dirs = list(set(library_dirs))
> >
> >      return include_dirs, library_dirs
> > @@ -61,6 +64,10 @@ def main():
> >                            sources=['src/ksharkpy.c', 'src/ksharkpy-utils.c'],
> >                            libraries=['kshark'])
> >
> > +    module_perf = extension(name='tracecruncher.perfpy',
> > +                            sources=['src/perfpy.c', 'src/perfpy-utils.c', 'src/trace-obj-debug.c'],
> > +                            libraries=['traceevent', 'perf', 'bfd'])
> > +
> >      setup(name='tracecruncher',
> >            version='0.1.0',
> >            description='NumPy based interface for accessing tracing data in Python.',
> > @@ -69,7 +76,7 @@ def main():
> >            url='https://github.com/vmware/trace-cruncher',
> >            license='LGPL-2.1',
> >            packages=find_packages(),
> > -          ext_modules=[module_ft, module_data, module_ks],
> > +          ext_modules=[module_ft, module_data, module_ks, module_perf],
> >            classifiers=[
> >                'Development Status :: 3 - Alpha',
> >                'Programming Language :: Python :: 3',
> > diff --git a/src/perfpy-utils.c b/src/perfpy-utils.c
> > new file mode 100644
> > index 0000000..ae57a63
> > --- /dev/null
> > +++ b/src/perfpy-utils.c
> > @@ -0,0 +1,699 @@
> > +// SPDX-License-Identifier: LGPL-2.1
> > +
> > +/*
> > + * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> > + */
> > +
> > +#ifndef _GNU_SOURCE
> > +/** Use GNU C Library. */
> > +#define _GNU_SOURCE
> > +#endif // _GNU_SOURCE
> > +
> > +// C
> > +#include <stdio.h>
> > +#include <unistd.h>
> > +#include <pthread.h>
> > +#include <sys/stat.h>
> > +#include <fcntl.h>
> > +#include <dirent.h>
> > +#include <sys/resource.h>
> > +
> > +// libperf
> > +#include <linux/perf_event.h>
> > +#include <perf/evlist.h>
> > +#include <perf/evsel.h>
> > +#include <perf/cpumap.h>
> > +#include <perf/threadmap.h>
> > +#include <perf/mmap.h>
> > +#include <perf/core.h>
> > +#include <perf/event.h>
> > +
> > +// trace-cruncher
> > +#include "perfpy-utils.h"
> > +#include "trace-obj-debug.h"
> > +
> > +PyObject *PERF_ERROR;
> > +
> > +#define TMP_FILE "/tmp/perf_temp_data.XXXXXX"
> > +
> > +struct perf_scan_thread {
> > +     uint32_t tid;
> > +     char *comm;
> > +     struct perf_counts_values count;
> > +};
> > +
> > +struct perf_handle {
> > +     bool running;
> > +     pthread_t reader;
> > +     int fd;
> > +     int thr_count;
> > +     uint32_t pid;
> > +     struct perf_scan_thread *thr_map;
> > +     struct perf_evlist *evlist;
> > +     struct perf_evsel *evsel;
> > +     struct perf_event_attr attr;
> > +     struct perf_cpu_map *cpus;
> > +     struct trace_debug_object *debug;
> > +     struct perf_thread_map *threads;
> > +};
> > +
> > +struct event_sample {
> > +     uint64_t                id;             /* PERF_SAMPLE_IDENTIFIER */
> > +     uint64_t                ip;             /* PERF_SAMPLE_IP */
> > +     uint32_t                pid, tid;       /* PERF_SAMPLE_TID */
> > +     uint64_t                time;           /* PERF_SAMPLE_TIME */
> > +     uint32_t                cpu, res;       /* PERF_SAMPLE_CPU */
> > +     uint64_t                nr;
> > +     uint64_t                ips[];          /* PERF_SAMPLE_CALLCHAIN */
> > +} __attribute__((packed));
> > +
> > +struct perf_event_sample {
> > +     struct event_sample     data;
> > +     struct perf_handle      *perf;
> > +     char                    *ip;
> > +     char                    *ips[];
> > +} __attribute__((packed));
> > +
> > +static void perf_reader_stop(struct perf_handle *perf)
> > +{
> > +
> > +     if (!perf->running)
> > +             return;
> > +
> > +     perf->running = false;
> > +     pthread_join(perf->reader, NULL);
> > +     fsync(perf->fd);
> > +     if (perf->debug)
> > +             trace_debug_resolve_symbols(perf->debug);
> > +}
> > +
> > +void py_perf_handle_free(struct perf_handle *perf)
> > +{
> > +     int i;
> > +
> > +     if (perf) {
> > +             perf_reader_stop(perf);
> > +             perf_evlist__delete(perf->evlist);
> > +             if (perf->fd >= 0)
> > +                     close(perf->fd);
> > +             if (perf->debug)
> > +                     trace_debug_obj_destroy(perf->debug);
> > +             if (perf->thr_map) {
> > +                     for (i = 0; i < perf->thr_count; i++)
> > +                             free(perf->thr_map[i].comm);
> > +                     free(perf->thr_map);
> > +             }
> > +     }
> > +
> > +     free(perf);
> > +}
> > +
> > +void py_perf_sample_free(struct perf_event_sample *sample)
> > +{
> > +     unsigned int i;
> > +
> > +     if (sample) {
> > +             free(sample->ip);
> > +             for (i = 0; i < sample->data.nr; i++)
> > +                     free((char *)(sample->ips[i]));
> > +     }
> > +     free(sample);
> > +}
> > +
> > +static int pid_filter(const struct dirent *dir)
> > +{
> > +     const char *dname = dir->d_name;
> > +
> > +     if (!dname || dname[0] == '.')
> > +             return 0;
> > +
> > +     while (*dname) {
> > +             if (!isdigit(*dname))
> > +                     return 0;
> > +             dname++;
> > +     }
> > +
> > +     return 1;
> > +}
> > +
> > +static  int str_read_file(const char *file, char **buffer)
> > +{
> > +     char stbuf[BUFSIZ];
> > +     char *buf = NULL;
> > +     int size = 0;
> > +     char *nbuf;
> > +     int fd;
> > +     int r;
> > +
> > +     fd = open(file, O_RDONLY);
> > +     if (fd < 0)
> > +             return -1;
> > +
> > +     do {
> > +             r = read(fd, stbuf, BUFSIZ);
> > +             if (r <= 0)
> > +                     continue;
> > +             nbuf = realloc(buf, size+r+1);
> > +             if (!nbuf) {
> > +                     size = -1;
> > +                     break;
> > +             }
> > +             buf = nbuf;
> > +             memcpy(buf+size, stbuf, r);
> > +             size += r;
> > +     } while (r > 0);
> > +
> > +     close(fd);
> > +     if (r == 0 && size > 0) {
> > +             buf[size] = '\0';
> > +             *buffer = buf;
> > +     } else
> > +             free(buf);
> > +
> > +     return size;
> > +}
> > +
> > +static void strip_control_chars(char *str)
> > +{
> > +     while (*str) {
> > +             if (iscntrl(*str)) {
> > +                     *str = '\0';
> > +                     break;
> > +             }
> > +             str++;
> > +     }
> > +}
> > +
> > +static struct perf_thread_map *create_thread_map(struct perf_handle *perf, int pid)
> > +{
> > +     struct perf_thread_map *tmap = NULL;
> > +     struct dirent **pids = NULL;
> > +     char path[PATH_MAX];
> > +     int i, count;
> > +
> > +     snprintf(path, PATH_MAX, "/proc/%d/task", pid);
> > +     count = scandir(path, &pids, pid_filter, NULL);
> > +     if (count < 1)
> > +             goto out;
> > +
> > +     tmap = perf_thread_map__new_array(count, NULL);
> > +     if (!tmap)
> > +             goto out;
> > +     free(perf->thr_map);
> > +     perf->thr_map = calloc(count, sizeof(struct perf_scan_thread));
> > +     if (!perf->thr_map)
> > +             goto out;
> > +     perf->thr_count = count;
> > +
> > +     for (i = 0; i < count; i++) {
> > +             perf->thr_map[i].tid = atoi(pids[i]->d_name);
> > +             perf_thread_map__set_pid(tmap, i, perf->thr_map[i].tid);
> > +             snprintf(path, PATH_MAX, "/proc/%d/task/%s/comm", pid, pids[i]->d_name);
> > +             str_read_file(path, &perf->thr_map[i].comm);
> > +             strip_control_chars(perf->thr_map[i].comm);
> > +     }
> > +     perf_thread_map__read_comms(tmap);
> > +
> > +out:
> > +     if (pids) {
> > +             for (i = 0; i < count; i++)
> > +                     free(pids[i]);
> > +             free(pids);
> > +     }
> > +
> > +     return tmap;
> > +}
> > +
> > +static struct perf_handle *new_perf_sampling_handle(pid_t pid, int freq)
> > +{
> > +     struct perf_handle *perf = NULL;
> > +     char *tmp_file = NULL;
> > +
> > +     perf = calloc(1, sizeof(*perf));
> > +     if (!perf)
> > +             return NULL;
> > +
> > +     perf->fd = -1;
> > +     perf->attr.type        = PERF_TYPE_HARDWARE;
> > +     perf->attr.config      = PERF_COUNT_HW_CPU_CYCLES;
> > +     perf->attr.disabled    = 1;
> > +     perf->attr.freq        = 1;
> > +     perf->attr.sample_freq = freq;
> > +     perf->attr.exclude_kernel = 1;
> > +     perf->attr.exclude_idle = 1;
> > +     perf->attr.exclude_callchain_kernel = 1;
> > +     perf->attr.comm = 1;
> > +     perf->attr.mmap2 = 1;
> > +     perf->attr.task = 1;
> > +     perf->attr.precise_ip = 0;
> > +     perf->attr.inherit = 1;
> > +     perf->attr.task = 1;
> > +     perf->attr.inherit_stat = 1;
> > +     perf->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
> > +                              PERF_FORMAT_TOTAL_TIME_RUNNING;
> > +     perf->attr.sample_type = PERF_SAMPLE_IDENTIFIER |
> > +                              PERF_SAMPLE_IP |
> > +                              PERF_SAMPLE_TID |
> > +                              PERF_SAMPLE_TIME |
> > +                              PERF_SAMPLE_CPU |
> > +                              PERF_SAMPLE_CALLCHAIN;
> > +
> > +     /* trace all CPUs in the system */
> > +     perf->cpus = perf_cpu_map__new(NULL);
> > +     if (!perf->cpus) {
> > +             PyErr_Format(PERF_ERROR, "Failed to create perf cpu map");
> > +             goto error;
> > +     }
> > +
> > +     if (pid >= 0) {
> > +             perf->pid = pid;
> > +             perf->threads = create_thread_map(perf, pid);
> > +             if (!perf->threads) {
> > +                     PyErr_Format(PERF_ERROR, "Failed to create perf threads map");
> > +                     goto error;
> > +             }
> > +             perf->debug = trace_debug_obj_create_pid(pid);
> > +     }
> > +
> > +     perf->evlist = perf_evlist__new();
> > +     if (!perf->evlist) {
> > +             PyErr_Format(PERF_ERROR, "Failed to create perf events list");
> > +             goto error;
> > +     }
> > +
> > +     tmp_file = strdup(TMP_FILE);
> > +     if (!tmp_file)
> > +             goto error;
> > +
> > +     mktemp(tmp_file);
> > +     perf->fd = open(tmp_file, O_RDWR|O_CREAT|O_TRUNC, 0600);
> > +     unlink(tmp_file);
> > +     if (perf->fd < 0)
> > +             goto error;
> > +
> > +     perf->evsel = perf_evsel__new(&perf->attr);
> > +     if (!perf->evsel) {
> > +             PyErr_Format(PERF_ERROR, "Failed to create perf cycles");
> > +             goto error;
> > +     }
> > +
> > +     perf_evlist__add(perf->evlist, perf->evsel);
> > +     perf_evlist__set_maps(perf->evlist, perf->cpus, perf->threads);
> > +
> > +     free(tmp_file);
> > +     return perf;
> > +
> > +error:
> > +     perf_cpu_map__put(perf->cpus);
> > +     perf_thread_map__put(perf->threads);
> > +     py_perf_handle_free(perf);
> > +     free(tmp_file);
> > +     return NULL;
> > +}
> > +
> > +PyObject *PyPerfSample_new(PyObject *self, PyObject *args, PyObject *kwargs)
> > +{
> > +     static char *kwlist[] = {"pid", "freq", NULL};
> > +     struct perf_handle *perf = NULL;
> > +     int freq = 10, pid = -1;
> > +
> > +     if (!PyArg_ParseTupleAndKeywords(args,
> > +                                      kwargs,
> > +                                      "i|i",
> > +                                      kwlist,
> > +                                      &pid,
> > +                                      &freq
> > +                                      )) {
> > +             return NULL;
> > +     }
> > +
> > +     perf = new_perf_sampling_handle(pid, freq);
> > +     if (!perf)
> > +             return NULL;
> > +
> > +     return PyPerf_New(perf);
> > +}
> > +
> > +static void perf_read_sample(struct perf_handle *perf, struct perf_record_sample *event)
> > +{
> > +     struct event_sample *sample;
> > +     uint64_t i;
> > +
> > +     sample = (struct event_sample *)(event->array);
> > +
> > +     /* check if the sample is for our PID */
> > +     if (sample->pid != perf->pid)
> > +             return;
> > +
> > +     if (perf->debug)
> > +             trace_debug_add_resolve_symbol(perf->debug, sample->ip, NULL);
> > +
> > +     if (write(perf->fd, sample, sizeof(*sample)) != sizeof(*sample))
> > +             return;
> > +
> > +     for (i = 0; i < sample->nr; i++) {
> > +             if (write(perf->fd, &sample->ips[i], sizeof(uint64_t)) != sizeof(uint64_t))
> > +                     return;
> > +             if (perf->debug)
> > +                     trace_debug_add_resolve_symbol(perf->debug, sample->ips[i], NULL);
> > +     }
> > +}
> > +
> > +/* A new memory is mapped to traced process */
> > +static void perf_read_mmap2(struct perf_handle *perf, struct perf_record_mmap2 *mmap)
> > +{
> > +     /* check if mmap is for our PID */
> > +     if (perf->pid != mmap->pid)
> > +             return;
> > +
> > +     /* check if executable memory is mapped */
> > +     if (mmap->header.misc & PERF_RECORD_MISC_MMAP_DATA)
> > +             return;
> > +
> > +     /*
> > +      * A new dynamic library is dlopen() by the traced process,
> > +      * store it for vma -> name resolving
> > +      */
> > +     trace_debug_obj_add_file(perf->debug, mmap->filename,
> > +                              mmap->start, mmap->start + mmap->len, mmap->pgoff);
> > +}
> > +
> > +/* A new thread is started */
> > +static void perf_read_comm(struct perf_handle *perf, struct perf_record_comm *comm)
> > +{
> > +     struct perf_scan_thread *tmp;
> > +     int i;
> > +
> > +     /* check if the thread is started by PID */
> > +     if (perf->pid != comm->pid)
> > +             return;
> > +
> > +     for (i = 0; i < perf->thr_count; i++) {
> > +             if (perf->thr_map[i].tid == comm->tid) {
> > +                     free(perf->thr_map[i].comm);
> > +                     perf->thr_map[i].comm = strdup(comm->comm);
> > +                     return;
> > +             }
> > +     }
> > +
> > +     tmp = realloc(perf->thr_map, (perf->thr_count + 1) * sizeof(struct perf_scan_thread));
> > +     if (!tmp)
> > +             return;
> > +
> > +     perf->thr_map = tmp;
> > +     perf->thr_map[perf->thr_count].tid = comm->tid;
> > +     perf->thr_map[perf->thr_count].comm = strdup(comm->comm);
> > +     perf->thr_count++;
> > +}
> > +
> > +static void *perf_reader_thread(void *data)
> > +{
> > +     struct perf_handle *perf = data;
> > +     struct perf_mmap *map;
> > +     union perf_event *event;
> > +
> > +     perf_evlist__enable(perf->evlist);
> > +
> > +     while (true) {
> > +             if (!perf->running)
> > +                     break;
> > +             perf_evlist__for_each_mmap(perf->evlist, map, false) {
> > +                     if (perf_mmap__read_init(map) < 0)
> > +                             continue;
> > +
> > +                     while ((event = perf_mmap__read_event(map)) != NULL) {
> > +
> > +                             switch (event->sample.header.type) {
> > +                             case PERF_RECORD_SAMPLE:
> > +                                     perf_read_sample(perf, (struct perf_record_sample *)event);
> > +                                     break;
> > +                             case PERF_RECORD_COMM:
> > +                                     perf_read_comm(perf, (struct perf_record_comm *)event);
> > +                                     break;
> > +                             case PERF_RECORD_MMAP2:
> > +                                     perf_read_mmap2(perf, (struct perf_record_mmap2 *)event);
> > +                                     break;
> > +                             }
> > +
> > +                             perf_mmap__consume(map);
> > +                     }
> > +
> > +                     perf_mmap__read_done(map);
> > +             }
> > +     }
> > +     perf_evlist__disable(perf->evlist);
> > +     pthread_exit(0);
> > +}
> > +
> > +static int increase_file_limit(void)
> > +{
> > +     struct rlimit lim;
> > +
> > +     if (getrlimit(RLIMIT_NOFILE, &lim))
> > +             return -1;
> > +
> > +     if (lim.rlim_cur < lim.rlim_max) {
> > +             lim.rlim_cur = lim.rlim_max;
> > +     } else {
> > +             lim.rlim_cur += 100;
> > +             lim.rlim_max += 100;
> > +     }
> > +
> > +     return setrlimit(RLIMIT_NOFILE, &lim);
> > +}
> > +
> > +static int perf_reader_start(struct perf_handle *perf)
> > +{
> > +     pthread_attr_t attrib;
> > +     int err = 0;
> > +
> > +     if (perf->running)
> > +             return 0;
> > +
> > +     pthread_attr_init(&attrib);
> > +     pthread_attr_setdetachstate(&attrib, PTHREAD_CREATE_JOINABLE);
> > +
> > +     do {
> > +             err = perf_evlist__open(perf->evlist);
> > +             if (!err)
> > +                     break;
> > +             if (err != -EMFILE)
> > +                     goto out;
> > +             if (increase_file_limit())
> > +                     goto out;
> > +     } while (err);
> > +
> > +     err = perf_evlist__mmap(perf->evlist, 4);
> > +     if (err)
> > +             goto out;
> > +
> > +     perf->running = true;
> > +     err = pthread_create(&perf->reader, &attrib, perf_reader_thread, perf);
> > +     if (err)
> > +             goto out;
> > +
> > +out:
> > +     pthread_attr_destroy(&attrib);
> > +     if (err) {
> > +             perf_evlist__close(perf->evlist);
> > +             perf->running = false;
> > +     }
> > +     return err;
> > +}
> > +
> > +PyObject *PyPerf_start(PyPerf *self)
> > +{
> > +     struct perf_handle *perf = self->ptrObj;
> > +     int err;
> > +
> > +     if (perf->running) {
> > +             PyErr_Format(PERF_ERROR, "Perf reader is already started");
> > +             return NULL;
> > +     }
> > +
> > +     err = perf_reader_start(perf);
> > +     if (err) {
> > +             PyErr_Format(PERF_ERROR,
> > +                          "Failed to start perf reader - %s", strerror(-err));
> > +             return NULL;
> > +     }
> > +
> > +     Py_RETURN_NONE;
> > +}
> > +
> > +PyObject *PyPerf_stop(PyPerf *self)
> > +{
> > +     struct perf_handle *perf = self->ptrObj;
> > +
> > +     if (!perf->running) {
> > +             PyErr_Format(PERF_ERROR, "Perf reader is not started");
> > +             return NULL;
> > +     }
> > +
> > +     perf_reader_stop(perf);
> > +
> > +     Py_RETURN_NONE;
> > +}
> > +
> > +struct symb_walk {
> > +     uint64_t ip;
> > +     char *name;
> > +};
> > +
> > +static int sym_get(struct tracecmd_debug_symbols *symb, void *data)
> > +{
> > +     struct symb_walk *s = (struct symb_walk *)data;
> > +
> > +     if (s->ip == symb->vma_near) {
> > +             if (symb->name)
> > +                     asprintf(&s->name, "%s @ %s", symb->name, symb->fname);
> > +             else
> > +                     asprintf(&s->name, "(0x%llX) @ %s", symb->vma_near, symb->fname);
> > +             return 1;
> > +     }
> > +     return 0;
> > +}
> > +
> > +static char *ip_name(struct perf_handle *perf, uint64_t ip)
> > +{
> > +     struct symb_walk symb;
> > +
> > +     symb.ip = ip;
> > +     symb.name = NULL;
> > +     if (perf && perf->debug)
> > +             trace_debug_walk_resolved_symbols(perf->debug, sym_get, &symb);
> > +
> > +     if (!symb.name)
> > +             asprintf(&symb.name, "0x%lX", ip);
> > +
> > +     return symb.name;
> > +}
> > +
> > +PyObject *PyPerf_getSamples(PyPerf *self)
> > +{
> > +     struct perf_handle *perf = self->ptrObj;
> > +     struct event_sample sample;
> > +     struct perf_event_sample *store;
> > +     PyObject *slist, *sobject;
> > +     uint64_t i, ip;
> > +     int ca = 0, cs = 0;
> > +
> > +     if (perf->running) {
> > +             PyErr_Format(PERF_ERROR, "Perf reader is running");
> > +             return NULL;
> > +     }
> > +
> > +     if (lseek(perf->fd, 0, SEEK_SET) == (off_t)-1) {
> > +             PyErr_Format(PERF_ERROR, "No samples");
> > +             return NULL;
> > +     }
> > +
> > +     slist = PyList_New(0);
> > +     do {
> > +             if (read(perf->fd, &sample, sizeof(sample)) != sizeof(sample))
> > +                     break;
> > +             ca++;
> > +             store = calloc(1, sizeof(*store) + (sample.nr * sizeof(char *)));
> > +             if (!store)
> > +                     break;
> > +             memcpy(&store->data, &sample, sizeof(sample));
> > +             store->perf = perf;
> > +             store->ip = ip_name(perf, store->data.ip);
> > +             for (i = 0; i < sample.nr; i++) {
> > +                     if (read(perf->fd, &ip, sizeof(uint64_t)) != sizeof(uint64_t))
> > +                             break;
> > +                     store->ips[i] = ip_name(perf, ip);
> > +             }
> > +             cs += sample.nr;
> > +             if (i < sample.nr)
> > +                     break;
> > +             sobject = PyPerfEventSample_New(store);
> > +             PyList_Append(slist, sobject);
> > +     } while (true);
> > +     ftruncate(perf->fd, 0);
> > +     return slist;
> > +}
> > +
> > +PyObject *PyPerfSample_id(PyPerfEventSample *self)
> > +{
> > +     struct perf_event_sample *sample = self->ptrObj;
> > +
> > +     return PyLong_FromUnsignedLongLong(sample->data.id);
> > +}
> > +
> > +PyObject *PyPerfSample_pid(PyPerfEventSample *self)
> > +{
> > +     struct perf_event_sample *sample = self->ptrObj;
> > +
> > +     return PyLong_FromUnsignedLong(sample->data.pid);
> > +}
> > +
> > +PyObject *PyPerfSample_tid(PyPerfEventSample *self)
> > +{
> > +     struct perf_event_sample *sample = self->ptrObj;
> > +
> > +     return PyLong_FromUnsignedLong(sample->data.tid);
> > +}
> > +
> > +PyObject *PyPerfSample_time(PyPerfEventSample *self)
> > +{
> > +     struct perf_event_sample *sample = self->ptrObj;
> > +
> > +     return PyLong_FromUnsignedLongLong(sample->data.time);
> > +}
> > +
> > +PyObject *PyPerfSample_cpu(PyPerfEventSample *self)
> > +{
> > +     struct perf_event_sample *sample = self->ptrObj;
> > +
> > +     return PyLong_FromUnsignedLong(sample->data.cpu);
> > +}
> > +
> > +PyObject *PyPerfSample_nr(PyPerfEventSample *self)
> > +{
> > +     struct perf_event_sample *sample = self->ptrObj;
> > +
> > +     return PyLong_FromUnsignedLongLong(sample->data.nr);
> > +}
> > +
> > +PyObject *PyPerfSample_ip(PyPerfEventSample *self)
> > +{
> > +     struct perf_event_sample *sample = self->ptrObj;
> > +
> > +     return PyUnicode_FromString(sample->ip);
> > +}
> > +
> > +PyObject *PyPerfSample_tid_comm(PyPerfEventSample *self, PyObject *args, PyObject *kwargs)
> > +{
> > +     struct perf_event_sample *sample = self->ptrObj;
> > +     char *name = NULL;
> > +     int i;
> > +
> > +     if (!sample->perf || !sample->perf->thr_count || !sample->perf->thr_map)
> > +             Py_RETURN_NONE;
> > +
> > +     for (i = 0; i < sample->perf->thr_count; i++)
> > +             if (sample->perf->thr_map[i].tid == sample->data.tid)
> > +                     break;
> > +
> > +     if (i < sample->perf->thr_count && sample->perf->thr_map[i].comm)
> > +             name = sample->perf->thr_map[i].comm;
> > +
> > +     if (name)
> > +             return PyUnicode_FromString(name);
> > +
> > +     Py_RETURN_NONE;
> > +}
> > +
> > +PyObject *PyPerfSample_ips(PyPerfEventSample *self)
> > +{
> > +     struct perf_event_sample *sample = self->ptrObj;
> > +     PyObject *slist;
> > +     unsigned int i;
> > +
> > +     slist = PyList_New(0);
> > +     for (i = 0 ; i < sample->data.nr; i++)
> > +             PyList_Append(slist, PyUnicode_FromString((char *)sample->ips[i]));
> > +
> > +     return slist;
> > +}
> > diff --git a/src/perfpy-utils.h b/src/perfpy-utils.h
> > new file mode 100644
> > index 0000000..0727a9a
> > --- /dev/null
> > +++ b/src/perfpy-utils.h
> > @@ -0,0 +1,41 @@
> > +/* SPDX-License-Identifier: LGPL-2.1 */
> > +
> > +/*
> > + * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> > + */
> > +
> > +#ifndef _TC_PERF_PY_UTILS
> > +#define _TC_PERF_PY_UTILS
> > +
> > +// Python
> > +#include <Python.h>
> > +
> > +// trace-cruncher
> > +#include "common.h"
> > +
> > +struct perf_handle;
> > +struct perf_event_sample;
> > +
> > +C_OBJECT_WRAPPER_DECLARE(perf_handle, PyPerf);
> > +C_OBJECT_WRAPPER_DECLARE(perf_event_sample, PyPerfEventSample);
> > +
> > +PyObject *PyPerfSample_new(PyObject *self, PyObject *args, PyObject *kwargs);
> > +
> > +PyObject *PyPerf_start(PyPerf *self);
> > +PyObject *PyPerf_stop(PyPerf *self);
> > +PyObject *PyPerf_getSamples(PyPerf *self);
> > +
> > +PyObject *PyPerfSample_id(PyPerfEventSample *self);
> > +PyObject *PyPerfSample_ip(PyPerfEventSample *self);
> > +PyObject *PyPerfSample_pid(PyPerfEventSample *self);
> > +PyObject *PyPerfSample_tid(PyPerfEventSample *self);
> > +PyObject *PyPerfSample_tid_comm(PyPerfEventSample *self, PyObject *args, PyObject *kwargs);
> > +PyObject *PyPerfSample_time(PyPerfEventSample *self);
> > +PyObject *PyPerfSample_cpu(PyPerfEventSample *self);
> > +PyObject *PyPerfSample_nr(PyPerfEventSample *self);
> > +PyObject *PyPerfSample_ips(PyPerfEventSample *self);
> > +
> > +void py_perf_handle_free(struct perf_handle *handle);
> > +void py_perf_sample_free(struct perf_event_sample *sample);
> > +
> > +#endif
> > diff --git a/src/perfpy.c b/src/perfpy.c
> > new file mode 100644
> > index 0000000..745d519
> > --- /dev/null
> > +++ b/src/perfpy.c
> > @@ -0,0 +1,141 @@
> > +// SPDX-License-Identifier: LGPL-2.1
> > +
> > +/*
> > + * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> > + */
> > +
> > +// Python
> > +#include <Python.h>
> > +
> > +// libperf
> > +#include <perf/core.h>
> > +#include <perf/evsel.h>
> > +#include <perf/mmap.h>
> > +#include <perf/event.h>
> > +
> > +// trace-cruncher
> > +#include "common.h"
> > +#include "perfpy-utils.h"
> > +
> > +extern PyObject *PERF_ERROR;
> > +
> > +static PyMethodDef PyPerf_methods[] = {
> > +     {"start",
> > +      (PyCFunction) PyPerf_start,
> > +      METH_NOARGS,
> > +      "start sampling"
> > +     },
> > +     {"stop",
> > +      (PyCFunction) PyPerf_stop,
> > +      METH_NOARGS,
> > +      "stop sampling"
> > +     },
> > +     {"get_samples",
> > +      (PyCFunction) PyPerf_getSamples,
> > +      METH_NOARGS,
> > +      "get recorded samples"
> > +     },
> > +     {NULL}
> > +};
> > +C_OBJECT_WRAPPER(perf_handle, PyPerf, NO_DESTROY, py_perf_handle_free);
> > +
> > +static PyMethodDef PyPerfEventSample_methods[] = {
> > +     {"id",
> > +      (PyCFunction) PyPerfSample_id,
> > +      METH_NOARGS,
> > +      "get sample id"
> > +     },
> > +     {"ip",
> > +      (PyCFunction) PyPerfSample_ip,
> > +      METH_NOARGS,
> > +      "get sample ip"
> > +     },
> > +     {"pid",
> > +      (PyCFunction) PyPerfSample_pid,
> > +      METH_NOARGS,
> > +      "get sample pid"
> > +     },
> > +     {"tid",
> > +      (PyCFunction) PyPerfSample_tid,
> > +      METH_NOARGS,
> > +      "get sample tid"
> > +     },
> > +     {"tid_comm",
> > +      (PyCFunction) PyPerfSample_tid_comm,
> > +      METH_VARARGS | METH_KEYWORDS,
> > +      "get sample tid"
> > +     },
> > +     {"time",
> > +      (PyCFunction) PyPerfSample_time,
> > +      METH_NOARGS,
> > +      "get sample timestamp"
> > +     },
> > +     {"cpu",
> > +      (PyCFunction) PyPerfSample_cpu,
> > +      METH_NOARGS,
> > +      "get sample cpu"
> > +     },
> > +     {"stack_count",
> > +      (PyCFunction) PyPerfSample_nr,
> > +      METH_NOARGS,
> > +      "get sample stack count"
> > +     },
> > +     {"stack",
> > +      (PyCFunction) PyPerfSample_ips,
> > +      METH_NOARGS,
> > +      "get sample stack"
> > +     },
> > +     {NULL}
> > +};
> > +C_OBJECT_WRAPPER(perf_event_sample, PyPerfEventSample, NO_DESTROY, py_perf_sample_free);
> > +
> > +static PyMethodDef perfpy_methods[] = {
> > +     {"sample",
> > +      (PyCFunction) PyPerfSample_new,
> > +      METH_VARARGS | METH_KEYWORDS,
> > +      "new perf sample instance"
> > +     },
> > +     {NULL}
> > +};
> > +
> > +static int perf_error_print(enum libperf_print_level level,
> > +                         const char *fmt, va_list ap)
> > +{
> > +     return vfprintf(stderr, fmt, ap);
> > +}
> > +
> > +static struct PyModuleDef perfpy_module = {
> > +     PyModuleDef_HEAD_INIT,
> > +     "perfpy",
> > +     "Python interface for Perf.",
> > +     -1,
> > +     perfpy_methods
> > +};
> > +
> > +PyMODINIT_FUNC PyInit_perfpy(void)
> > +{
> > +
> > +     if (!PyPerfTypeInit())
> > +             return NULL;
> > +     if (!PyPerfEventSampleTypeInit())
> > +             return NULL;
> > +
> > +     PERF_ERROR = PyErr_NewException("tracecruncher.perfpy.perf_error",
> > +                                     NULL, NULL);
> > +
> > +     PyObject *module = PyModule_Create(&perfpy_module);
> > +
> > +     PyModule_AddObject(module, "perf_error", PERF_ERROR);
> > +     PyModule_AddObject(module, "perf_handle", (PyObject *) &PyPerfType);
> > +     PyModule_AddObject(module, "perf_event_sample", (PyObject *) &PyPerfEventSampleType);
> > +
> > +     if (geteuid() != 0) {
> > +             PyErr_SetString(PERF_ERROR,
> > +                             "Permission denied. Root privileges are required.");
> > +             return NULL;
> > +     }
> > +
> > +     libperf_init(perf_error_print);
> > +
> > +     return module;
> > +}
> > --
> > 2.34.1
>
> --
>
> - Arnaldo



-- 
Tzvetomir (Ceco) Stoyanov
VMware Open Source Technology Center

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 3/3] trace-cruncher: perf example
  2022-02-24 16:37 ` [RFC PATCH 3/3] trace-cruncher: perf example Tzvetomir Stoyanov (VMware)
@ 2022-03-18  9:52   ` Yordan Karadzhov
  0 siblings, 0 replies; 8+ messages in thread
From: Yordan Karadzhov @ 2022-03-18  9:52 UTC (permalink / raw)
  To: Tzvetomir Stoyanov (VMware), acme, olsajiri, irogers
  Cc: rostedt, linux-trace-devel, linux-perf-users

Hi Ceco,

Thanks a lot!

I really like the idea of have a perf sub-module in trace-cruncher and I think your RFC patch-set is a great starting point.

Let's start with some discussion of the structure of the APIs that this module has to contain. See my comment bellow.

On 24.02.22 г. 18:37 ч., Tzvetomir Stoyanov (VMware) wrote:
> Example python program for using trace-cruncher to collect performance
> statistics of a given process.
> 
> Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> ---
>   examples/perf_sampling.py | 51 +++++++++++++++++++++++++++++++++++++++
>   1 file changed, 51 insertions(+)
>   create mode 100755 examples/perf_sampling.py
> 
> diff --git a/examples/perf_sampling.py b/examples/perf_sampling.py
> new file mode 100755
> index 0000000..1b57f39
> --- /dev/null
> +++ b/examples/perf_sampling.py
> @@ -0,0 +1,51 @@
> +#!/usr/bin/env python3
> +
> +"""
> +SPDX-License-Identifier: CC-BY-4.0
> +
> +Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> +"""
> +
> +import sys
> +import time
> +import signal
> +
> +import tracecruncher.perfpy as perf
> +
> +def SortKey(sample):
> +    return sample.time()
> +
> +def perf_stop(sig, frame):
> +    # Stop collection of performance traces
> +    p.stop()
> +
> +if __name__ == "__main__":
> +    if len(sys.argv) < 2:
> +        print('Usage: ', sys.argv[0], ' [PROCESS]')
> +        sys.exit(1)
> +
> +    # Create perf sample object for the given process
> +    p = perf.sample(pid=int(sys.argv[1]), freq=99)

If you initiate the sampling using the PID of the process, you are limited to tracing only processes that are already 
running. Hence, there will be no way to trace the very beginning of the process you are interested in. Let's keep the 
current way of initializing (via PID), but make it optional and have a second option that will be to provide a process 
name and arguments to be started internally (using fork–>exec).

Also we need a better name for this API. Something that is more coherent with the naming of the equivalent ftracepy APIs.

> +    signal.signal(signal.SIGINT, perf_stop)

I would prefer to have the signal handling done internally inside the C code and do not bother the Python user with this.

> +    print('Start collecting performance data, press ctrl+c  to stop')
> +    # Start collecting performance traces
> +    p.start()

I wonder what is the reason for having the constructor of the perf instance above and 'start()' as separate APIs? Do you 
have in mind some use case in which we have to create the instance, do something important and only then start()?

Also in the current implementation, the only way to stop the sampling is 'ctrl+c'. You have the 'stop()' API but the 
user has no way of really calling it, since the execution is blocked inside 'start()' which will never return if the 
sampling is running.

But if the sampling runs on its own (started using fork->exec) then the stop() API will be indeed useful. Note that in 
this case you will have to also provide 'destroy' method for the 'perf' object, because we have to guaranty that the 
sampling will stop when the execution of the user script exits.

cheers,
Yordan

> +    # wait for ctrl+c
> +    signal.pause()
> +    # Get collected samples
> +    samples = p.get_samples()
> +    # Sort the list based on the timestamp
> +    samples.sort(key=SortKey)
> +    time = 0
> +    ip_count = 0
> +    for s in samples:
> +        # Print PID, TID, time and trace depth of each sample
> +        if time == 0:
> +            time = s.time()
> +        print("{0} {1} ({2}), +{3}:".format(s.ip(), s.tid(), s.tid_comm(), s.time() - time))
> +        ips = s.stack()
> +        ip_count += len(ips)
> +        for ip in reversed(ips):
> +            # Print stack trace of the sample
> +            print("\t{0}".format(ip))
> +    print("\nCollected {0} samples, {1} ip traces".format(len(samples), ip_count))
> \ No newline at end of file

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-03-18  9:52 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-02-24 16:37 [RFC PATCH 0/3] trace-cruncher: Initial support for perf Tzvetomir Stoyanov (VMware)
2022-02-24 16:37 ` [RFC PATCH 1/3] trace-cruncher: Logic for resolving address to function name Tzvetomir Stoyanov (VMware)
2022-02-24 16:37 ` [RFC PATCH 2/3] trace-cruncher: Support for perf Tzvetomir Stoyanov (VMware)
2022-02-25 14:51   ` Arnaldo Carvalho de Melo
2022-02-25 15:38     ` Tzvetomir Stoyanov
2022-02-24 16:37 ` [RFC PATCH 3/3] trace-cruncher: perf example Tzvetomir Stoyanov (VMware)
2022-03-18  9:52   ` Yordan Karadzhov
2022-02-24 16:52 ` [RFC PATCH 0/3] trace-cruncher: Initial support for perf Ian Rogers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).