LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Tanushree Shah <tshah@linux.ibm.com>
To: acme@kernel.org, jolsa@kernel.org, adrian.hunter@intel.com,
	vmolnaro@redhat.com, mpetlan@redhat.com, tmricht@linux.ibm.com,
	maddy@linux.ibm.com, irogers@google.com, namhyung@kernel.org,
	linux-kernel@vger.kernel.org
Cc: linux-perf-users@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	atrajeev@linux.ibm.com, hbathini@linux.ibm.com,
	Tejas.Manhas1@ibm.com, Tanushree.Shah@ibm.com,
	Shivani.Nittor@ibm.com, Tanushree Shah <tshah@linux.ibm.com>
Subject: [RFC PATCH 1/4] perf/trace-dat: Add trace.dat export infrastructure
Date: Mon,  8 Jun 2026 18:29:49 +0530	[thread overview]
Message-ID: <20260608125951.90425-3-tshah@linux.ibm.com> (raw)
In-Reply-To: <20260608125951.90425-2-tshah@linux.ibm.com>

Add new utility files util/trace-dat.c and util/trace-dat.h
implementing the infrastructure for exporting perf.data tracepoints
to trace.dat format compatible with trace-cmd and KernelShark.

trace-dat.c defines all globals and functions needed for:
- Per-cpu raw event buffer management (init_cpu_buffers,
  collect_cpu_event, free_cpu_buffers)
- ftrace ring buffer page construction (write_page, write_cpu_dat)
- trace.dat section writers (write_strings_section,
  write_options_section1, write_options_section2,
  write_flyrecord_section)

trace-dat.h declares all globals and function prototypes to be
used by data-convert-trace.c and trace-event-read.c.

Signed-off-by: Tanushree Shah <tshah@linux.ibm.com>
---
 tools/perf/util/Build       |   1 +
 tools/perf/util/trace-dat.c | 705 ++++++++++++++++++++++++++++++++++++
 tools/perf/util/trace-dat.h |  79 ++++
 3 files changed, 785 insertions(+)
 create mode 100644 tools/perf/util/trace-dat.c
 create mode 100644 tools/perf/util/trace-dat.h

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 70cc91d00804..c000d8032d25 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -98,6 +98,7 @@ perf-util-y += trace-event-scripting.o
 perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event.o
 perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-parse.o
 perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-read.o
+perf-util-$(CONFIG_LIBTRACEEVENT) += trace-dat.o
 perf-util-y += sort.o
 perf-util-y += hist.o
 perf-util-y += util.o
diff --git a/tools/perf/util/trace-dat.c b/tools/perf/util/trace-dat.c
new file mode 100644
index 000000000000..aa34a7b89b7a
--- /dev/null
+++ b/tools/perf/util/trace-dat.c
@@ -0,0 +1,705 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2026, IBM Corporation
+ * Author: Tanushree Shah <tshah@linux.ibm.com>
+ *
+ * trace-dat.c
+ *
+ * This file implements the trace.dat format writer for perf tool.
+ * It collects trace events from multiple CPUs and writes them in
+ * the trace-cmd compatible format.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include "api/fs/tracing_path.h"
+#include "trace-dat.h"
+#include "trace-event.h"
+#include "session.h"
+#include "header.h"
+#include "../perf.h"
+#include "debug.h"
+
+/* ftrace ring buffer constants for trace.dat flyrecord section
+ *
+ * Each page has a 16-byte header (timestamp + commit size), followed by
+ * variable-length records. Each record has a 4-byte header word encoding:
+ *   Bits 0-4:	 Type/Length field (5 bits, masked by TYPE_LEN_MASK)
+ *   Bits 5-31:  Time delta from page base timestamp (27 bits, masked by TIME_MASK)
+ */
+#define TRACE_DAT_RECORD_HEADER_SIZE 16		/* Page header: 8-byte ts + 8-byte commit */
+#define TRACE_DAT_RECORD_TYPE_LEN_MASK 0x1F		/* Extract lower 5 bits for type/length */
+#define TRACE_DAT_RECORD_TIME_SHIFT	5		/* Shift to extract time delta */
+#define TRACE_DAT_RECORD_TIME_MASK 0x07FFFFFF	/* Mask for 27-bit time delta */
+#define TRACE_DAT_WORD_SIZE	4		/* Records aligned to 4-byte boundaries */
+#define TRACE_DAT_WORD_ALIGN_MASK 3
+
+/* Initial capacity for per-CPU event buffer (grows by doubling) */
+#define INITIAL_EVENT_CAPACITY 1024
+/* Initial capacity for page record array (grows by doubling) */
+#define INITIAL_PAGE_RECORD_CAPACITY 64
+/* Buffer size for reading trace_clock string from debugfs/tracefs */
+#define CLOCK_BUFFER_SIZE 256
+
+FILE *trace_dat_fp;
+int trace_dat_page_size;
+int trace_dat_nr_cpus;
+long trace_dat_options_offset;
+long trace_dat_header_info_offset;
+long trace_dat_events_format_offset;
+long trace_dat_ftrace_format_offset;
+long trace_dat_kallsyms_offset;
+long trace_dat_cmdline_offset;
+long trace_dat_next_options_offset;
+
+
+/**
+ * struct cpu_event - Single trace event from a CPU
+ * @ts: Timestamp of the event
+ * @raw: Raw event data
+ * @raw_size: Size of raw event data in bytes
+ */
+struct cpu_event {
+	unsigned long long ts;
+	void *raw;
+	unsigned int raw_size;
+};
+
+/**
+ * struct cpu_events - Collection of trace events for a single CPU
+ * @events: Array of events
+ * @count: Number of events currently stored
+ * @capacity: Maximum number of events that can be stored
+ */
+struct cpu_events {
+	struct cpu_event  *events;
+	int count;
+	int capacity;
+};
+
+static struct cpu_events *trace_cpu_data;
+static long *buffer_opt_cpu_offsets_pos;
+static long opt_payload_start;
+
+/* Allocate per-cpu event buffers for tracepoint data collection */
+int trace_dat__init_cpu_buffers(int nr_cpus)
+{
+	trace_cpu_data = calloc(nr_cpus, sizeof(struct cpu_events));
+	if (!trace_cpu_data)
+		return -ENOMEM;
+	buffer_opt_cpu_offsets_pos = calloc(nr_cpus, sizeof(long));
+	if (!buffer_opt_cpu_offsets_pos) {
+		free(trace_cpu_data);
+		trace_cpu_data = NULL;
+		return -ENOMEM;
+	}
+	trace_dat_nr_cpus = nr_cpus;
+	return 0;
+}
+
+/* Store raw tracepoint event data in per-cpu buffer for trace.dat
+ * flyrecord
+ */
+int trace_dat__collect_cpu_event(int cpu, unsigned long long ts,
+				 void *raw, unsigned int raw_size)
+{
+	struct cpu_events *cpu_events;
+
+	if (!trace_cpu_data || cpu < 0 || cpu >= trace_dat_nr_cpus)
+		return -EINVAL;
+
+	if (!raw || raw_size == 0)
+		return -EINVAL;
+
+	cpu_events = &trace_cpu_data[cpu];
+
+	if (cpu_events->count >= cpu_events->capacity) {
+		cpu_events->capacity = cpu_events->capacity ?
+				       cpu_events->capacity * 2 : INITIAL_EVENT_CAPACITY;
+		cpu_events->events   = realloc(cpu_events->events,
+				       cpu_events->capacity * sizeof(*cpu_events->events));
+		if (!cpu_events->events)
+			return -ENOMEM;
+	}
+
+	cpu_events->events[cpu_events->count].ts = ts;
+	cpu_events->events[cpu_events->count].raw = malloc(raw_size);
+	if (!cpu_events->events[cpu_events->count].raw)
+		return -ENOMEM;
+
+	memcpy(cpu_events->events[cpu_events->count].raw, raw, raw_size);
+	cpu_events->events[cpu_events->count].raw_size = raw_size;
+	cpu_events->count++;
+
+	return 0;
+}
+
+/* Write a single page of trace records */
+static int trace_dat__write_page(FILE *fp, unsigned long long base_ts,
+			char **records, int *rec_sizes, int nr_recs)
+{
+	unsigned long long commit = 0;
+	int offset = TRACE_DAT_RECORD_HEADER_SIZE;
+	int i;
+	char *page;
+
+	page = calloc(1, trace_dat_page_size);
+	if (!page)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_recs; i++) {
+		memcpy(page + offset, records[i], rec_sizes[i]);
+		offset += rec_sizes[i];
+		commit += rec_sizes[i];
+	}
+
+	memcpy(page, &base_ts, sizeof(base_ts));
+	memcpy(page + sizeof(base_ts), &commit, sizeof(commit));
+
+	if (!fwrite(page, 1, trace_dat_page_size, fp)) {
+		free(page);
+		return -EIO;
+	}
+	free(page);
+
+	return 0;
+}
+
+/* Write all trace data for a single CPU as trace.dat flyrecord pages */
+static int trace_dat__write_cpu_dat(FILE *fp, int cpu, unsigned long long *file_offset_out)
+{
+	struct cpu_events *cpu_events = &trace_cpu_data[cpu];
+	unsigned long long base_ts;
+	unsigned long long file_offset;
+	char **page_records = NULL;
+	int *page_rec_sizes = NULL;
+	int page_cap = 0;
+	int nr_page_recs = 0;
+	int page_size_used = 0;
+	int ret = 0;
+	int i, j;
+
+	file_offset = ftell(fp);
+	*file_offset_out = file_offset;
+
+	if (cpu_events->count == 0) {
+		char *empty_page = calloc(1, trace_dat_page_size);
+
+		if (!empty_page)
+			return -ENOMEM;
+		if (!fwrite(empty_page, 1, trace_dat_page_size, fp)) {
+			free(empty_page);
+			return -EIO;
+		}
+		free(empty_page);
+		return 0;
+	}
+
+	base_ts = cpu_events->events[0].ts;
+
+	for (i = 0; i < cpu_events->count; i++) {
+		struct cpu_event *event = &cpu_events->events[i];
+		unsigned long long time_delta = event->ts - base_ts;
+		unsigned int data_len = event->raw_size;
+		unsigned int words = (data_len + TRACE_DAT_WORD_ALIGN_MASK) / TRACE_DAT_WORD_SIZE;
+		unsigned int type_len = words & TRACE_DAT_RECORD_TYPE_LEN_MASK;
+		unsigned int hdr_word = ((time_delta & TRACE_DAT_RECORD_TIME_MASK) <<
+					TRACE_DAT_RECORD_TIME_SHIFT) | type_len;
+		int rec_size;
+		char *rec;
+
+		rec_size = TRACE_DAT_WORD_SIZE + data_len;
+		if (rec_size % TRACE_DAT_WORD_SIZE)
+			rec_size += TRACE_DAT_WORD_SIZE - (rec_size % TRACE_DAT_WORD_SIZE);
+
+		rec = calloc(1, rec_size);
+		if (!rec)
+			return -ENOMEM;
+		memcpy(rec, &hdr_word, TRACE_DAT_WORD_SIZE);
+		memcpy(rec + TRACE_DAT_WORD_SIZE, event->raw, data_len);
+
+		if (page_size_used + rec_size > trace_dat_page_size -
+		    TRACE_DAT_RECORD_HEADER_SIZE) {
+			ret = trace_dat__write_page(fp, base_ts,
+			      page_records, page_rec_sizes, nr_page_recs);
+			for (j = 0; j < nr_page_recs; j++)
+				free(page_records[j]);
+			nr_page_recs = 0;
+			page_size_used = 0;
+			base_ts = event->ts;
+			if (ret < 0)
+				goto out_free;
+		}
+
+		if (nr_page_recs >= page_cap) {
+			char **tmp_records;
+			int *tmp_sizes;
+
+			page_cap = page_cap ? page_cap * 2 : INITIAL_PAGE_RECORD_CAPACITY;
+			tmp_records = realloc(page_records, page_cap * sizeof(char *));
+			tmp_sizes = realloc(page_rec_sizes, page_cap * sizeof(int));
+			if (!tmp_records || !tmp_sizes) {
+				ret = -ENOMEM;
+				goto out_free;
+			}
+			page_records = tmp_records;
+			page_rec_sizes = tmp_sizes;
+		}
+		page_records[nr_page_recs] = rec;
+		page_rec_sizes[nr_page_recs] = rec_size;
+		nr_page_recs++;
+		page_size_used += rec_size;
+	}
+
+	if (nr_page_recs > 0) {
+		ret = trace_dat__write_page(fp, base_ts,
+		      page_records, page_rec_sizes, nr_page_recs);
+	}
+out_free:
+	for (j = 0; j < nr_page_recs; j++)
+		free(page_records[j]);
+	free(page_records);
+	free(page_rec_sizes);
+	return ret;
+}
+
+/* Write the strings section containing section name lookup table */
+int trace_dat__write_strings_section(void)
+{
+	unsigned short section_id = TRACE_DAT_SECTION_STRINGS;
+	unsigned short flags = 0;
+	unsigned long long section_size = 0;
+	static const char * const section_names[] = {
+		"headers",		/* offset 0 - strid for section 16  */
+		"ftrace event formats", /* offset 8 - strid for section 17  */
+		"events format",	/* offset 29 - strid for section 18  */
+		"kallsyms",		/* offset 43 - strid for section 19  */
+		"cmdlines",		/* offset 52 - strid for section 21  */
+		"strings",		/* offset 61 - strid for section 15  */
+		"options",		/* offset 69 - strid for options 1   */
+		"options",		/* offset 77 - strid for options 2   */
+		"buffer-flyrecord",	/* offset 85 - strid for flyrecord 3 */
+		NULL
+	};
+
+	/* string_id points to "strings" string itself */
+	unsigned int string_id = STRID_STRINGS;
+	int i;
+
+	if (!trace_dat_fp)
+		return -EBADF;
+
+	for (i = 0; section_names[i] != NULL; i++)
+		section_size += strlen(section_names[i]) + 1;
+
+	/* write section header */
+	if (!fwrite(&section_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+		       !fwrite(&flags, sizeof(unsigned short), 1, trace_dat_fp) ||
+		       !fwrite(&string_id, sizeof(unsigned int), 1, trace_dat_fp) ||
+		       !fwrite(&section_size, sizeof(unsigned long long), 1, trace_dat_fp))
+		return -EIO;
+
+	/* write strings */
+	for (i = 0; section_names[i] != NULL; i++)
+		if (!fwrite(section_names[i], 1, strlen(section_names[i]) + 1, trace_dat_fp))
+			return -EIO;
+	return 0;
+}
+
+/* Writes options section containing CPUCOUNT, TRACECLOCK, EVENT_FORMAT, HEADER_INFO,
+ * FTRACE_EVENTS, KALLSYMS, CMDLINES options, ending with DONE option pointing to next section.
+ */
+int trace_dat__write_options_section1(void)
+{
+	unsigned short section_id = TRACE_DAT_SECTION_OPTIONS;
+	unsigned short flags = 0;
+	unsigned int string_id = STRID_OPTIONS_1;
+	unsigned long long section_size = 0;
+	long section_size_pos;
+	long payload_start;
+	unsigned long long section_start;
+	unsigned short opt_id;
+	unsigned int opt_size;
+	char clock_buf[CLOCK_BUFFER_SIZE];
+	FILE *clock_file;
+	size_t bytes_read;
+	char *path;
+	unsigned long long next_offset;
+	long end_pos;
+
+	if (!trace_dat_fp)
+		return -EBADF;
+
+	/* fill options_offset in initial format */
+	section_start = ftell(trace_dat_fp);
+
+	if (fseek(trace_dat_fp, trace_dat_options_offset, SEEK_SET) < 0 ||
+	   !fwrite(&section_start, sizeof(unsigned long long), 1, trace_dat_fp) ||
+	   fseek(trace_dat_fp, 0, SEEK_END) < 0)
+		return -EIO;
+
+	/* write section header */
+	if (!fwrite(&section_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+	    !fwrite(&flags, sizeof(unsigned short), 1, trace_dat_fp) ||
+	    !fwrite(&string_id, sizeof(unsigned int), 1, trace_dat_fp))
+		return -EIO;
+	section_size_pos = ftell(trace_dat_fp);
+	if (!fwrite(&section_size, sizeof(unsigned long long), 1, trace_dat_fp))
+		return -EIO;
+
+	payload_start = ftell(trace_dat_fp);
+
+	/* CPUCOUNT option */
+	opt_id = TRACE_DAT_OPTION_CPUCOUNT;
+	opt_size = sizeof(unsigned int);
+
+	if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+	    !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+	    !fwrite(&trace_dat_nr_cpus, sizeof(unsigned int), 1, trace_dat_fp))
+		return -EIO;
+
+	/* TRACECLOCK option */
+	opt_id = TRACE_DAT_OPTION_TRACECLOCK;
+
+	path = get_tracing_file("trace_clock");
+	clock_file = fopen(path, "r");
+	put_tracing_file(path);
+	if (clock_file) {
+		bytes_read = fread(clock_buf, 1, sizeof(clock_buf) - 1, clock_file);
+		fclose(clock_file);
+		clock_buf[bytes_read] = '\0';
+	} else {
+		strcpy(clock_buf, "local\n");
+		bytes_read = strlen(clock_buf);
+	}
+	opt_size = bytes_read + 1;
+	if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+	    !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+	    !fwrite(clock_buf, 1, opt_size, trace_dat_fp))
+		return -EIO;
+
+	/* EVENT option */
+	opt_id = TRACE_DAT_OPTION_EVENT;
+	opt_size = sizeof(unsigned long long);
+
+	if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+	   !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+	   !fwrite(&trace_dat_events_format_offset, sizeof(unsigned long long),
+		   1, trace_dat_fp))
+		return -EIO;
+
+	/* HEADER option */
+	opt_id = TRACE_DAT_OPTION_HEADER;
+	opt_size = sizeof(unsigned long long);
+
+	if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+	    !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+	    !fwrite(&trace_dat_header_info_offset, sizeof(unsigned long long),
+		    1, trace_dat_fp))
+		return -EIO;
+
+	/* FTRACE option */
+	opt_id = TRACE_DAT_OPTION_FTRACE;
+	opt_size = sizeof(unsigned long long);
+
+	if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+	    !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+	    !fwrite(&trace_dat_ftrace_format_offset, sizeof(unsigned long long),
+		   1, trace_dat_fp))
+		return -EIO;
+
+	/* KALLSYMS option */
+	opt_id = TRACE_DAT_OPTION_KALLSYMS;
+	opt_size = sizeof(unsigned long long);
+
+	if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+	    !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+	    !fwrite(&trace_dat_kallsyms_offset, sizeof(unsigned long long),
+		    1, trace_dat_fp))
+		return -EIO;
+
+	/* CMDLINE option */
+	opt_id = TRACE_DAT_OPTION_CMDLINE;
+	opt_size = sizeof(unsigned long long);
+
+	if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+	    !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+	    !fwrite(&trace_dat_cmdline_offset, sizeof(unsigned long long),
+		    1, trace_dat_fp))
+		return -EIO;
+
+	/* DONE option id - next_options_offset filled later */
+	opt_id = TRACE_DAT_OPTION_DONE;
+	opt_size = sizeof(unsigned long long);
+	next_offset = 0;  /* placeholder */
+
+	trace_dat_next_options_offset = ftell(trace_dat_fp);
+	if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+	    !fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+	    !fwrite(&next_offset, sizeof(unsigned long long), 1, trace_dat_fp))
+		return -EIO;
+
+	/* fill section size */
+	end_pos = ftell(trace_dat_fp);
+
+	section_size = end_pos - payload_start;
+	if (fseek(trace_dat_fp, section_size_pos, SEEK_SET) < 0 ||
+	    !fwrite(&section_size, sizeof(unsigned long long), 1, trace_dat_fp) ||
+	    fseek(trace_dat_fp, end_pos, SEEK_SET) < 0)
+		return -EIO;
+
+	return 0;
+
+}
+
+/* Writes options section containing BUFFER option with flyrecord section
+ * (flyrecord section offset, clock type, page size, CPU count,
+ * per-CPU offsets/sizes) and DONE option.
+ */
+int trace_dat__write_options_section2(void)
+{
+	unsigned short section_id  = TRACE_DAT_SECTION_OPTIONS;
+	unsigned short flags = 0;
+	unsigned int string_id = STRID_OPTIONS_2;
+	unsigned long long section_size = 0;
+	long section_size_pos;
+	long payload_start;
+	int cpu;
+	unsigned short opt_id = TRACE_DAT_OPTION_BUFFER;
+	unsigned int opt_size = 0;
+	long opt_size_pos;
+	unsigned long long data_offset = 0;
+	unsigned int page_size = (unsigned int)trace_dat_page_size;
+	const char *clock = "local";
+	unsigned long long next;
+	long end_pos;
+	unsigned long long cpu_offset;
+	unsigned long long cpu_size;
+	unsigned short done_id;
+	unsigned int done_size;
+
+	if (!trace_dat_fp)
+		return -EINVAL;
+
+	/* fill done1 next offset - points to this section */
+	next = ftell(trace_dat_fp);
+
+	if (fseek(trace_dat_fp, trace_dat_next_options_offset + 2 + 4, SEEK_SET) < 0 ||
+	    !fwrite(&next, sizeof(unsigned long long), 1, trace_dat_fp) ||
+	    fseek(trace_dat_fp, 0, SEEK_END) < 0)
+		return -EIO;
+
+	/* write section header */
+	if (!fwrite(&section_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+	    !fwrite(&flags, sizeof(unsigned short), 1, trace_dat_fp) ||
+	    !fwrite(&string_id, sizeof(unsigned int), 1, trace_dat_fp))
+		return -EIO;
+	section_size_pos = ftell(trace_dat_fp);
+	if (!fwrite(&section_size, sizeof(unsigned long long), 1, trace_dat_fp))
+		return -EIO;
+
+	payload_start = ftell(trace_dat_fp);
+
+	/* BUFFER option */
+	if (!fwrite(&opt_id, sizeof(unsigned short), 1, trace_dat_fp))
+		return -EIO;
+	opt_size_pos = ftell(trace_dat_fp);
+	if (!fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp))
+		return -EIO;
+	opt_payload_start = ftell(trace_dat_fp);
+
+	/* data_offset placeholder */
+	if (!fwrite(&data_offset, sizeof(unsigned long long), 1, trace_dat_fp) ||
+	    !fwrite("\0", 1, 1, trace_dat_fp) ||
+	    !fwrite(clock, 1, strlen(clock) + 1, trace_dat_fp) ||
+	    !fwrite(&page_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+	    !fwrite(&trace_dat_nr_cpus, sizeof(unsigned int), 1, trace_dat_fp))
+		return -EIO;
+
+	/* per cpu: cpu_id + offset placeholder + size */
+	for (cpu = 0; cpu < trace_dat_nr_cpus; cpu++) {
+		cpu_offset = 0;  /* filled in write_flyrecord */
+		cpu_size   = 0;  /* filled in write_flyrecord */
+
+		if (!fwrite(&cpu, sizeof(unsigned int), 1, trace_dat_fp))
+			return -EIO;
+		buffer_opt_cpu_offsets_pos[cpu] = ftell(trace_dat_fp);
+		if (!fwrite(&cpu_offset, sizeof(unsigned long long), 1, trace_dat_fp) ||
+		    !fwrite(&cpu_size, sizeof(unsigned long long), 1, trace_dat_fp))
+			return -EIO;
+	}
+
+	/* fill opt_size */
+	end_pos = ftell(trace_dat_fp);
+
+	opt_size = end_pos - opt_payload_start;
+	fseek(trace_dat_fp, opt_size_pos, SEEK_SET);
+	if (!fwrite(&opt_size, sizeof(unsigned int), 1, trace_dat_fp))
+		return -EIO;
+	fseek(trace_dat_fp, end_pos, SEEK_SET);
+
+	/* DONE id=0 */
+	done_id = TRACE_DAT_OPTION_DONE;
+	done_size = sizeof(unsigned long long);
+	/* No additional options sections follow this one */
+	next = 0;
+
+	if (!fwrite(&done_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+	    !fwrite(&done_size, sizeof(unsigned int), 1, trace_dat_fp) ||
+	    !fwrite(&next, sizeof(unsigned long long), 1, trace_dat_fp))
+		return -EIO;
+
+	/* fill section size */
+	end_pos = ftell(trace_dat_fp);
+
+	section_size = end_pos - payload_start;
+	fseek(trace_dat_fp, section_size_pos, SEEK_SET);
+	if (!fwrite(&section_size, sizeof(unsigned long long), 1, trace_dat_fp))
+		return -EIO;
+	fseek(trace_dat_fp, end_pos, SEEK_SET);
+
+	return 0;
+
+}
+
+int trace_dat__write_flyrecord_section(void)
+{
+	unsigned short section_id = TRACE_DAT_SECTION_FLYRECORD;
+	unsigned short flags = 0;
+	unsigned int string_id = STRID_BUFFER_FLYRECORD;
+	unsigned long long section_size = 0;
+	long section_size_pos;
+	long flyrecord_start;
+	long after_header;
+	long padding_needed;
+	unsigned long long *cpu_offsets;
+	unsigned long long *cpu_sizes;
+	int cpu;
+	int ret = 0;
+	char *pad;
+	unsigned long long start;
+	long end_pos;
+
+	if (!trace_dat_fp)
+		return -EINVAL;
+
+	cpu_offsets = calloc(trace_dat_nr_cpus, sizeof(unsigned long long));
+	cpu_sizes   = calloc(trace_dat_nr_cpus, sizeof(unsigned long long));
+	if (!cpu_offsets || !cpu_sizes) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+	flyrecord_start = ftell(trace_dat_fp);
+	if (flyrecord_start < 0) {
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	/* section header */
+	if (!fwrite(&section_id, sizeof(unsigned short), 1, trace_dat_fp) ||
+	    !fwrite(&flags, sizeof(unsigned short), 1, trace_dat_fp) ||
+	    !fwrite(&string_id, sizeof(unsigned int), 1, trace_dat_fp)) {
+		ret = -EIO;
+		goto cleanup;
+	}
+	section_size_pos = ftell(trace_dat_fp);
+	if (!fwrite(&section_size, sizeof(unsigned long long), 1, trace_dat_fp)) {
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	/* Align to page boundary */
+	after_header   = ftell(trace_dat_fp);
+	padding_needed = (trace_dat_page_size -
+			 (after_header % trace_dat_page_size)) % trace_dat_page_size;
+
+	if (padding_needed > 0) {
+		pad = calloc(1, padding_needed);
+
+		if (!fwrite(pad, 1, padding_needed, trace_dat_fp)) {
+			free(pad);
+			ret = -EIO;
+			goto cleanup;
+		}
+		free(pad);
+	}
+
+	/* write per-cpu trace data */
+	for (cpu = 0; cpu < trace_dat_nr_cpus; cpu++) {
+		start = ftell(trace_dat_fp);
+
+		ret = trace_dat__write_cpu_dat(trace_dat_fp, cpu, &cpu_offsets[cpu]);
+
+		if (ret < 0) {
+			pr_err("Failed to write CPU %d data\n", cpu);
+			goto cleanup;
+		}
+		cpu_sizes[cpu]	 = ftell(trace_dat_fp) - start;
+	}
+
+	/* fill section size */
+	end_pos = ftell(trace_dat_fp);
+
+	section_size = end_pos - flyrecord_start;
+	if (fseek(trace_dat_fp, section_size_pos, SEEK_SET) < 0 ||
+	    !fwrite(&section_size, sizeof(unsigned long long), 1, trace_dat_fp)) {
+		ret = -EIO;
+		goto cleanup;
+	}
+	if (fseek(trace_dat_fp, end_pos, SEEK_SET) < 0) {
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	/* fill cpu offsets and sizes in BUFFER option */
+	for (cpu = 0; cpu < trace_dat_nr_cpus; cpu++) {
+		if (fseek(trace_dat_fp, buffer_opt_cpu_offsets_pos[cpu], SEEK_SET) < 0 ||
+		    !fwrite(&cpu_offsets[cpu], sizeof(unsigned long long), 1, trace_dat_fp) ||
+		    !fwrite(&cpu_sizes[cpu], sizeof(unsigned long long), 1, trace_dat_fp)) {
+			ret = -EIO;
+			goto cleanup;
+		}
+	}
+
+	/* fill data offset in buffer option */
+	if (fseek(trace_dat_fp, opt_payload_start, SEEK_SET) < 0 ||
+	    !fwrite(&flyrecord_start, sizeof(unsigned long long), 1, trace_dat_fp)) {
+		ret = -EIO;
+		goto cleanup;
+	}
+
+	if (fseek(trace_dat_fp, 0, SEEK_END) < 0) {
+		ret = -EIO;
+		goto cleanup;
+	}
+
+
+cleanup:
+	free(cpu_offsets);
+	free(cpu_sizes);
+	return ret;
+}
+
+/* Free all per-CPU event buffers */
+void trace_dat__free_cpu_buffers(void)
+{
+	int cpu;
+
+	if (!trace_cpu_data)
+		return;
+
+	for (cpu = 0; cpu < trace_dat_nr_cpus; cpu++) {
+		int i;
+
+		for (i = 0; i < trace_cpu_data[cpu].count; i++)
+			free(trace_cpu_data[cpu].events[i].raw);
+		free(trace_cpu_data[cpu].events);
+	}
+	free(trace_cpu_data);
+	trace_cpu_data = NULL;
+	free(buffer_opt_cpu_offsets_pos);
+	buffer_opt_cpu_offsets_pos = NULL;
+	trace_dat_nr_cpus = 0;
+}
diff --git a/tools/perf/util/trace-dat.h b/tools/perf/util/trace-dat.h
new file mode 100644
index 000000000000..7667a440330c
--- /dev/null
+++ b/tools/perf/util/trace-dat.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2026, IBM Corporation
+ * Author: Tanushree Shah <tshah@linux.ibm.com>
+ */
+
+#ifndef __PERF_TRACE_DAT_H
+#define __PERF_TRACE_DAT_H
+
+#include <stdio.h>
+
+/* trace.dat file format version */
+#define TRACE_DAT_VERSION '7'
+
+/*
+ * Section IDs for trace.dat format
+ */
+#define TRACE_DAT_SECTION_OPTIONS   0
+#define TRACE_DAT_SECTION_FLYRECORD 3
+#define TRACE_DAT_SECTION_STRINGS   15
+#define TRACE_DAT_SECTION_HEADER    16
+#define TRACE_DAT_SECTION_FTRACE    17
+#define TRACE_DAT_SECTION_EVENTS    18
+#define TRACE_DAT_SECTION_KALLSYMS  19
+#define TRACE_DAT_SECTION_CMDLINE   21
+
+/*
+ * Option IDs for trace.dat options sections
+ */
+#define TRACE_DAT_OPTION_DONE       0
+#define TRACE_DAT_OPTION_BUFFER     3
+#define TRACE_DAT_OPTION_TRACECLOCK 4
+#define TRACE_DAT_OPTION_CPUCOUNT   8
+#define TRACE_DAT_OPTION_HEADER     16
+#define TRACE_DAT_OPTION_FTRACE     17
+#define TRACE_DAT_OPTION_EVENT      18
+#define TRACE_DAT_OPTION_KALLSYMS   19
+#define TRACE_DAT_OPTION_CMDLINE    21
+
+/*
+ * String offsets in the strings section
+ * These point to null-terminated strings used as section names
+ */
+#define STRID_HEADERS          0
+#define STRID_FTRACE_FORMATS   8
+#define STRID_EVENT_FORMATS    29
+#define STRID_KALLSYMS         43
+#define STRID_CMDLINES         52
+#define STRID_STRINGS          61
+#define STRID_OPTIONS_1        69
+#define STRID_OPTIONS_2        77
+#define STRID_BUFFER_FLYRECORD 85
+
+struct perf_session;
+
+extern FILE *trace_dat_fp;
+extern int trace_dat_page_size;
+extern int trace_dat_nr_cpus;
+extern long trace_dat_options_offset;
+extern long trace_dat_header_info_offset;
+extern long trace_dat_events_format_offset;
+extern long trace_dat_ftrace_format_offset;
+extern long trace_dat_kallsyms_offset;
+extern long trace_dat_cmdline_offset;
+extern long trace_dat_next_options_offset;
+
+/* collect and manage per-cpu tracepoint event buffers */
+int trace_dat__init_cpu_buffers(int nr_cpus);
+int trace_dat__collect_cpu_event(int cpu, unsigned long long ts,
+		       void *raw, unsigned int raw_size);
+void trace_dat__free_cpu_buffers(void);
+
+/* write trace.dat file sections */
+int trace_dat__write_options_section1(void);
+int trace_dat__write_options_section2(void);
+int trace_dat__write_flyrecord_section(void);
+int trace_dat__write_strings_section(void);
+
+#endif /* __PERF_TRACE_DAT_H */
-- 
2.53.0



  reply	other threads:[~2026-06-08 13:01 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-08 12:59 [RFC PATCH 0/4] perf: Add perf.data tracepoint events to trace.dat conversion Tanushree Shah
2026-06-08 12:59 ` Tanushree Shah [this message]
2026-06-08 12:59 ` [RFC PATCH 2/4] perf/trace-event: Write trace.dat metadata sections during parsing Tanushree Shah
2026-06-08 12:59 ` [RFC PATCH 3/4] perf data-convert: Add perf.data to trace.dat conversion backend Tanushree Shah
2026-06-08 12:59 ` [RFC PATCH 4/4] perf data: Add --to-trace-dat option for converting perf.data tracepoint events into trace.dat format Tanushree Shah
2026-06-08 15:18 ` [RFC PATCH 0/4] perf: Add perf.data tracepoint events to trace.dat conversion Ian Rogers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260608125951.90425-3-tshah@linux.ibm.com \
    --to=tshah@linux.ibm.com \
    --cc=Shivani.Nittor@ibm.com \
    --cc=Tanushree.Shah@ibm.com \
    --cc=Tejas.Manhas1@ibm.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=atrajeev@linux.ibm.com \
    --cc=hbathini@linux.ibm.com \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=maddy@linux.ibm.com \
    --cc=mpetlan@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=tmricht@linux.ibm.com \
    --cc=vmolnaro@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox