* [PATCH v2] perf data: Clean up use_stdio and structures
@ 2026-04-08 0:14 Ian Rogers
2026-04-08 0:37 ` sashiko-bot
0 siblings, 1 reply; 2+ messages in thread
From: Ian Rogers @ 2026-04-08 0:14 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Alexander Shishkin, Jiri Olsa, Ian Rogers,
Adrian Hunter, James Clark, Chun-Tse Shao, Swapnil Sapkal,
linux-perf-users, linux-kernel
use_stdio was associated with struct perf_data and not perf_data_file
meaning there was implicit use of fd rather than fptr that may not be
safe. For example, in perf_data_file__write. Reorganize perf_data_file
to better abstract use_stdio, add kernel-doc and more consistently use
perf_data__ accessors so that use_stdio is better respected.
Signed-off-by: Ian Rogers <irogers@google.com>
---
v2: Sashiko pointed out that fseek differs in result to lseek, fix.
---
tools/perf/builtin-inject.c | 7 ++--
tools/perf/builtin-record.c | 10 +++--
tools/perf/tests/topology.c | 3 +-
tools/perf/util/data.c | 83 ++++++++++++++++++++++++-------------
tools/perf/util/data.h | 52 +++++++++++++++++++----
tools/perf/util/session.c | 2 +-
6 files changed, 111 insertions(+), 46 deletions(-)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 5b29f4296861..14ced122c9a9 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -270,9 +270,8 @@ static s64 perf_event__repipe_auxtrace(const struct perf_tool *tool,
inject->have_auxtrace = true;
if (!inject->output.is_pipe) {
- off_t offset;
+ off_t offset = perf_data__seek(&inject->output, 0, SEEK_CUR);
- offset = lseek(inject->output.file.fd, 0, SEEK_CUR);
if (offset == -1)
return -errno;
ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
@@ -2479,12 +2478,12 @@ int cmd_inject(int argc, const char **argv)
.output = {
.path = "-",
.mode = PERF_DATA_MODE_WRITE,
- .use_stdio = true,
+ .file.use_stdio = true,
},
};
struct perf_data data = {
.mode = PERF_DATA_MODE_READ,
- .use_stdio = true,
+ .file.use_stdio = true,
};
int ret;
const char *known_build_ids = NULL;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e919d1f021c3..764afd93d8bd 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -453,7 +453,7 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size
static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
{
int ret, idx;
- int trace_fd = rec->session->data->file.fd;
+ int trace_fd = perf_data__fd(rec->session->data);
struct record_aio aio = { .rec = rec, .size = 0 };
/*
@@ -1640,7 +1640,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
int rc = 0;
int nr_mmaps;
struct mmap **maps;
- int trace_fd = rec->data.file.fd;
+ int trace_fd = perf_data__fd(&rec->data);
off_t off = 0;
if (!evlist)
@@ -1847,8 +1847,10 @@ record__finish_output(struct record *rec)
rec->session->header.data_size += rec->bytes_written;
data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
if (record__threads_enabled(rec)) {
- for (i = 0; i < data->dir.nr; i++)
- data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
+ for (i = 0; i < data->dir.nr; i++) {
+ data->dir.files[i].size =
+ perf_data_file__seek(&data->dir.files[i], 0, SEEK_CUR);
+ }
}
/* Buildid scanning disabled or build ID in kernel and synthesized map events. */
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 75b748ddf824..f54502ebef4b 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -54,7 +54,8 @@ static int session_write_header(char *path)
session->header.data_size += DATA_SIZE;
TEST_ASSERT_VAL("failed to write header",
- !perf_session__write_header(session, session->evlist, data.file.fd, true));
+ !perf_session__write_header(session, session->evlist,
+ perf_data__fd(&data), true));
evlist__delete(session->evlist);
perf_session__delete(session);
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index 90df41da1a32..41993dfd6c2b 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -20,18 +20,30 @@
#include "rlimit.h"
#include <internal/lib.h>
-static void close_dir(struct perf_data_file *files, int nr)
+static void perf_data_file__close(struct perf_data_file *file)
{
- while (--nr >= 0) {
- close(files[nr].fd);
- zfree(&files[nr].path);
+ if (file->use_stdio) {
+ fclose(file->fptr);
+ file->fptr = NULL;
+ } else {
+ close(file->fd);
+ file->fd = -1;
}
+ zfree(&file->path);
+}
+
+static void close_dir(struct perf_data_file *files, int nr)
+{
+ while (--nr >= 0)
+ perf_data_file__close(&files[nr]);
+
free(files);
}
void perf_data__close_dir(struct perf_data *data)
{
close_dir(data->dir.files, data->dir.nr);
+ data->dir.files = NULL;
}
int perf_data__create_dir(struct perf_data *data, int nr)
@@ -174,7 +186,7 @@ static bool check_pipe(struct perf_data *data)
}
if (is_pipe) {
- if (data->use_stdio) {
+ if (data->file.use_stdio) {
const char *mode;
mode = perf_data__is_read(data) ? "r" : "w";
@@ -182,7 +194,7 @@ static bool check_pipe(struct perf_data *data)
if (data->file.fptr == NULL) {
data->file.fd = fd;
- data->use_stdio = false;
+ data->file.use_stdio = false;
}
/*
@@ -344,7 +356,7 @@ int perf_data__open(struct perf_data *data)
return 0;
/* currently it allows stdio for pipe only */
- data->use_stdio = false;
+ data->file.use_stdio = false;
if (!data->path)
data->path = "perf.data";
@@ -364,41 +376,57 @@ void perf_data__close(struct perf_data *data)
if (perf_data__is_dir(data))
perf_data__close_dir(data);
- zfree(&data->file.path);
-
- if (data->use_stdio)
- fclose(data->file.fptr);
- else
- close(data->file.fd);
+ perf_data_file__close(&data->file);
}
-ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size)
+static ssize_t perf_data_file__read(struct perf_data_file *file, void *buf, size_t size)
{
- if (data->use_stdio) {
- if (fread(buf, size, 1, data->file.fptr) == 1)
+ if (file->use_stdio) {
+ if (fread(buf, size, 1, file->fptr) == 1)
return size;
- return feof(data->file.fptr) ? 0 : -1;
+ return feof(file->fptr) ? 0 : -1;
}
- return readn(data->file.fd, buf, size);
+ return readn(file->fd, buf, size);
+}
+
+ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size)
+{
+ return perf_data_file__read(&data->file, buf, size);
}
ssize_t perf_data_file__write(struct perf_data_file *file,
void *buf, size_t size)
{
+ if (file->use_stdio) {
+ if (fwrite(buf, size, /*nmemb=*/1, file->fptr) == 1)
+ return size;
+ return -1;
+ }
return writen(file->fd, buf, size);
}
ssize_t perf_data__write(struct perf_data *data,
void *buf, size_t size)
{
- if (data->use_stdio) {
- if (fwrite(buf, size, 1, data->file.fptr) == 1)
- return size;
- return -1;
- }
return perf_data_file__write(&data->file, buf, size);
}
+off_t perf_data_file__seek(struct perf_data_file *file, off_t offset, int whence)
+{
+ if (file->use_stdio) {
+ off_t res = fseek(file->fptr, offset, whence);
+
+ return res < 0 ? -1 : ftell(file->fptr);
+ }
+ return lseek(file->fd, offset, whence);
+}
+
+off_t perf_data__seek(struct perf_data *data, off_t offset, int whence)
+{
+ assert(!data->is_pipe);
+ return perf_data_file__seek(&data->file, offset, whence);
+}
+
int perf_data__switch(struct perf_data *data,
const char *postfix,
size_t pos, bool at_exit,
@@ -420,19 +448,18 @@ int perf_data__switch(struct perf_data *data,
pr_warning("Failed to rename %s to %s\n", data->path, *new_filepath);
if (!at_exit) {
- close(data->file.fd);
+ perf_data_file__close(&data->file);
ret = perf_data__open(data);
if (ret < 0)
goto out;
- if (lseek(data->file.fd, pos, SEEK_SET) == (off_t)-1) {
+ if (perf_data__seek(data, pos, SEEK_SET) == (off_t)-1) {
ret = -errno;
- pr_debug("Failed to lseek to %zu: %m\n",
- pos);
+ pr_debug("Failed to seek to %zu: %m", pos);
goto out;
}
}
- ret = data->file.fd;
+ ret = perf_data__fd(data);
out:
return ret;
}
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index 1438e32e0451..8299fb5fa7da 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h
@@ -17,32 +17,70 @@ enum perf_dir_version {
PERF_DIR_VERSION = 1,
};
+/**
+ * struct perf_data_file: A wrapper around a file used for perf.data reading or writing. Generally
+ * part of struct perf_data.
+ */
struct perf_data_file {
+ /**
+ * @path: Path of file. Generally a copy of perf_data.path but for a
+ * directory it is the file within the directory.
+ */
char *path;
union {
+ /** @fd: File descriptor for read/writes. Valid if use_stdio is false. */
int fd;
+ /**
+ * @fptr: Stdio FILE. Valid if use_stdio is true, currently just
+ * pipes in perf inject.
+ */
FILE *fptr;
};
+ /** @size: Size of file when opened. */
unsigned long size;
+ /** @use_stdio: Use buffered stdio operations. */
+ bool use_stdio;
};
+/**
+ * struct perf_data: A wrapper around a file used for perf.data reading or writing.
+ */
struct perf_data {
+ /** @path: Path to open and of the file. NULL implies 'perf.data' will be used. */
const char *path;
+ /** @file: Underlying file to be used. */
struct perf_data_file file;
+ /** @is_pipe: Underlying file is a pipe. */
bool is_pipe;
+ /** @is_dir: Underlying file is a directory. */
bool is_dir;
+ /** @force: Ignore opening a file creating created by a different user. */
bool force;
- bool use_stdio;
+ /** @in_place_update: A file opened for reading but will be written to. */
bool in_place_update;
+ /** @mode: Read or write mode. */
enum perf_data_mode mode;
struct {
+ /** @version: perf_dir_version. */
u64 version;
+ /** @files: perf data files for the directory. */
struct perf_data_file *files;
+ /** @nr: Number of perf data files for the directory. */
int nr;
} dir;
};
+static inline int perf_data_file__fd(struct perf_data_file *file)
+{
+ return file->use_stdio ? fileno(file->fptr) : file->fd;
+}
+
+ssize_t perf_data_file__write(struct perf_data_file *file,
+ void *buf, size_t size);
+off_t perf_data_file__seek(struct perf_data_file *file, off_t offset, int whence);
+
+
static inline bool perf_data__is_read(struct perf_data *data)
{
return data->mode == PERF_DATA_MODE_READ;
@@ -70,10 +108,7 @@ static inline bool perf_data__is_single_file(struct perf_data *data)
static inline int perf_data__fd(struct perf_data *data)
{
- if (data->use_stdio)
- return fileno(data->file.fptr);
-
- return data->file.fd;
+ return perf_data_file__fd(&data->file);
}
int perf_data__open(struct perf_data *data);
@@ -81,8 +116,7 @@ void perf_data__close(struct perf_data *data);
ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size);
ssize_t perf_data__write(struct perf_data *data,
void *buf, size_t size);
-ssize_t perf_data_file__write(struct perf_data_file *file,
- void *buf, size_t size);
+off_t perf_data__seek(struct perf_data *data, off_t offset, int whence);
/*
* If at_exit is set, only rename current perf.data to
* perf.data.<postfix>, continue write on original data.
@@ -99,8 +133,10 @@ int perf_data__open_dir(struct perf_data *data);
void perf_data__close_dir(struct perf_data *data);
unsigned long perf_data__size(struct perf_data *data);
int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz);
-bool has_kcore_dir(const char *path);
char *perf_data__kallsyms_name(struct perf_data *data);
char *perf_data__guest_kallsyms_name(struct perf_data *data, pid_t machine_pid);
+
+bool has_kcore_dir(const char *path);
bool is_perf_data(const char *path);
+
#endif /* __PERF_DATA_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 3a911c70cd0e..f8f358bac5d3 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2569,7 +2569,7 @@ static int __perf_session__process_dir_events(struct perf_session *session)
if (!data->dir.files[i].size)
continue;
rd[readers] = (struct reader) {
- .fd = data->dir.files[i].fd,
+ .fd = perf_data_file__fd(&data->dir.files[i]),
.path = data->dir.files[i].path,
.data_size = data->dir.files[i].size,
.data_offset = 0,
--
2.53.0.1213.gd9a14994de-goog
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [PATCH v2] perf data: Clean up use_stdio and structures
2026-04-08 0:14 [PATCH v2] perf data: Clean up use_stdio and structures Ian Rogers
@ 2026-04-08 0:37 ` sashiko-bot
0 siblings, 0 replies; 2+ messages in thread
From: sashiko-bot @ 2026-04-08 0:37 UTC (permalink / raw)
To: Ian Rogers; +Cc: linux-perf-users
Sashiko AI review found 3 potential issue(s):
- [High] Uninitialized `use_stdio` flag in `perf_data__open_dir` leads to undefined behavior and potential segmentation faults.
- [High] `perf_data__seek` triggers an assertion failure when attempting to switch output on a pipe.
- [High] Unchecked `fptr` in `perf_data_file__close` leads to `fclose(NULL)` (segmentation fault) on error paths.
--
commit ec97569bb8fdfc232fc8bacde6d26424b25c9ef5
Author: Ian Rogers <irogers@google.com>
perf data: Clean up use_stdio and structures
use_stdio was associated with struct perf_data and not perf_data_file
meaning there was implicit use of fd rather than fptr that may not be
safe. For example, in perf_data_file__write. Reorganize perf_data_file
to better abstract use_stdio, add kernel-doc and more consistently use
perf_data__ accessors so that use_stdio is better respected.
> diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
> index 90df41da1a32b..41993dfd6c2bc 100644
> --- a/tools/perf/util/data.c
> +++ b/tools/perf/util/data.c
> @@ -20,18 +20,30 @@
> #include "rlimit.h"
> #include <internal/lib.h>
>
> -static void close_dir(struct perf_data_file *files, int nr)
> +static void perf_data_file__close(struct perf_data_file *file)
> {
> - while (--nr >= 0) {
> - close(files[nr].fd);
> - zfree(&files[nr].path);
> + if (file->use_stdio) {
> + fclose(file->fptr);
Does this need a check for file->fptr != NULL before calling fclose()?
If a tool initializes its output with .file.use_stdio = true and
.file.fptr = NULL (such as inject.output in cmd_inject() when using
--in-place), and an error occurs before the file is fully opened, the error
path jumps to out_close_output and calls perf_data__close(). This blindly
executes fclose(NULL), which will cause a segmentation fault.
Also, since use_stdio is read here, can it contain uninitialized memory?
In perf_data__open_dir(), the files array is dynamically grown using
realloc(files, (nr + 1) * sizeof(*files)). When a new struct perf_data_file
is appended, its path, fd, and size fields are set, but the newly added
use_stdio boolean is never explicitly initialized. Because realloc() does
not zero memory, use_stdio will contain uninitialized heap garbage.
When perf_data__close_dir() iterates over the files to close them via
this function, it reads this garbage flag. If it evaluates to true, the code
incorrectly assumes the file uses stdio and calls fclose(file->fptr). Since
fptr is part of a union with fd (or also garbage), this results in an
arbitrary pointer dereference and a segmentation fault.
[ ... ]
> +off_t perf_data__seek(struct perf_data *data, off_t offset, int whence)
> +{
> + assert(!data->is_pipe);
> + return perf_data_file__seek(&data->file, offset, whence);
> +}
> +
> int perf_data__switch(struct perf_data *data,
> const char *postfix,
> size_t pos, bool at_exit,
> @@ -420,19 +448,18 @@ int perf_data__switch(struct perf_data *data,
> pr_warning("Failed to rename %s to %s\n", data->path, *new_filepath);
>
> if (!at_exit) {
> - close(data->file.fd);
> + perf_data_file__close(&data->file);
> ret = perf_data__open(data);
> if (ret < 0)
> goto out;
>
> - if (lseek(data->file.fd, pos, SEEK_SET) == (off_t)-1) {
> + if (perf_data__seek(data, pos, SEEK_SET) == (off_t)-1) {
Will this assertion crash the tool if a user specifies a pipe for output and
triggers an output switch (e.g., perf record -o - --switch-output)?
When the user triggers an output switch, perf_data__switch() re-opens the
pipe (correctly setting data->is_pipe = true) and then unconditionally calls
perf_data__seek() to restore the offset.
Before this patch, calling lseek() on a pipe simply returned -1 with
errno == ESPIPE and the tool handled the error gracefully. The new assertion
turns this previously handled error into a hard crash that can be reached
directly via user arguments.
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260408001416.101493-1-irogers@google.com?part=1
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2026-04-08 0:37 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-08 0:14 [PATCH v2] perf data: Clean up use_stdio and structures Ian Rogers
2026-04-08 0:37 ` sashiko-bot
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox