From: Namhyung Kim <namhyung@kernel.org>
To: Alejandro Colomar <alx@kernel.org>
Cc: linux-man@vger.kernel.org, Peter Zijlstra <peterz@infradead.org>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
Jiri Olsa <jolsa@kernel.org>, Alexei Starovoitov <ast@kernel.org>,
Marco Elver <elver@google.com>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Kan Liang <kan.liang@linux.intel.com>,
Stephane Eranian <eranian@google.com>,
linux-perf-users@vger.kernel.org
Subject: [PATCH v2] perf_event_open.2: Update recent changes
Date: Tue, 3 Jan 2023 22:17:15 -0800 [thread overview]
Message-ID: <20230104061715.1412340-1-namhyung@kernel.org> (raw)
Add missing perf_event_attr fields, new event codes and sample type.
Also add descriptions for PERF_FORMAT_LOST.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
man2/perf_event_open.2 | 200 ++++++++++++++++++++++++++++++++++++++---
1 file changed, 190 insertions(+), 10 deletions(-)
diff --git a/man2/perf_event_open.2 b/man2/perf_event_open.2
index 8e94fb4ac..2cf673f3d 100644
--- a/man2/perf_event_open.2
+++ b/man2/perf_event_open.2
@@ -245,8 +245,15 @@ struct perf_event_attr {
instead of events */
cgroup : 1, /* include cgroup events */
text_poke : 1, /* include text poke events */
+ build_id : 1, /* use build id in mmap2 events */
+ inherit_thread : 1, /* children only inherit */
+ /* if cloned with CLONE_THREAD */
+ remove_on_exec : 1, /* event is removed from task
+ on exec */
+ sigtrap : 1, /* send synchronous SIGTRAP
+ on event */
- __reserved_1 : 30;
+ __reserved_1 : 26;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -277,6 +284,9 @@ struct perf_event_attr {
__u32 aux_watermark; /* aux bytes before wakeup */
__u16 sample_max_stack; /* max frames in callchain */
__u16 __reserved_2; /* align to u64 */
+ __u32 aux_sample_size; /* max aux sample size */
+ __u32 __reserved_3; /* align to u64 */
+ __u64 sig_data; /* user data for sigtrap */
};
.EE
@@ -538,6 +548,19 @@ Informational sample record types such as mmap or comm
must be associated with an active event.
This dummy event allows gathering such records without requiring
a counting event.
+.TP
+.BR PERF_COUNT_SW_BPF_OUTPUT " (since Linux 4.4)"
+.\" commit a43eec304259a6c637f4014a6d4767159b6a3aa3
+This is used to generate raw sample data from BPF.
+BPF programs can write to this event using
+.B bpf_perf_event_output
+helper.
+.TP
+.BR PERF_COUNT_SW_CGROUP_SWITCHES " (since Linux 5.13)"
+.\" commit d0d1dd628527c77db2391ce0293c1ed344b2365f
+This counts context switches to a task in a different cgroup.
+In other words, if the next task is in the same cgroup,
+it won't count the switch.
.RE
.PP
.RS
@@ -879,6 +902,41 @@ This corresponds to the
field in the
.B PERF_RECORD_CGROUP
event.
+.TP
+.BR PERF_SAMPLE_DATA_PAGE_SIZE " (since Linux 5.11)"
+.\" commit 8d97e71811aaafe4abf611dc24822fd6e73df1a1
+Records page size of data like in
+.BR PERF_SAMPLE_ADDR .
+.TP
+.BR PERF_SAMPLE_CODE_PAGE_SIZE " (since Linux 5.11)"
+.\" commit 995f088efebe1eba0282a6ffa12411b37f8990c2
+Records page size of ip like in
+.BR PERF_SAMPLE_IP .
+.TP
+.BR PERF_SAMPLE_WEIGHT_STRUCT " (since Linux 5.12)"
+.\" commit 2a6c6b7d7ad346f0679d0963cb19b3f0ea7ef32c
+Records hardware provided weight values like in
+.BR PERF_SAMPLE_WEIGHT ,
+but it can represent multiple values in a struct.
+This shares the same space as the
+.BR PERF_SAMPLE_WEIGHT ,
+so users can apply either of those, not the both.
+When used, it would have the following format and
+the meaning of each fields is dependent to the
+hardware implementation.
+.PP
+.in +4n
+.EX
+union perf_sample_weight {
+ u64 full; /* PERF_SAMPLE_WEIGHT */
+ struct { /* PERF_SAMPLE_WEIGHT_STRUCT */
+ u32 var1_dw;
+ u16 var2_w;
+ u16 var3_w;
+ };
+};
+.EE
+.in
.RE
.TP
.I read_format
@@ -908,6 +966,15 @@ Adds a 64-bit unique value that corresponds to the event group.
.TP
.B PERF_FORMAT_GROUP
Allows all counter values in an event group to be read with one read.
+.TP
+.B PERF_FORMAT_LOST " (since Linux 6.0)"
+.\" commit 119a784c81270eb88e573174ed2209225d646656
+Adds a 64-bit value that is the number of lost samples for this event.
+This would be only meaningful when
+.I sample_period
+or
+.I sample_freq
+is set.
.RE
.TP
.I disabled
@@ -1243,7 +1310,7 @@ This enables the generation of
.B PERF_RECORD_BPF_EVENT
records when an eBPF program is loaded or unloaded.
.TP
-.IR auxevent " (since Linux 5.4)"
+.IR aux_output " (since Linux 5.4)"
.\" commit ab43762ef010967e4ccd53627f70a2eecbeafefb
This allows normal (non-AUX) events to generate data for AUX events
if the hardware supports it.
@@ -1261,6 +1328,31 @@ This enables the generation of
records when there's a change to the kernel text
(i.e., self-modifying code).
.TP
+.IR build_id " (since Linux 5.12)"
+.\" commit 88a16a1309333e43d328621ece3e9fa37027e8eb
+This changes the contents in the
+.B PERF_RECORD_MMAP2
+to have a build-id instead of device and inode numbers.
+.TP
+.IR inherit_thread " (since Linux 5.13)"
+.\" commit 2b26f0aa004995f49f7b6f4100dd0e4c39a9ed5f
+This disables the inheritance of the event to a child process.
+Only new threads in the same process (which is cloned with
+.BR CLONE_THREAD )
+will inherit the event.
+.TP
+.IR remove_on_exec " (since Linux 5.13)"
+.\" commit 2e498d0a74e5b88a6689ae1b811f247f91ff188e
+This closes the event when it starts a new process image by
+.BR execve (2)
+system call.
+.TP
+.IR sigtrap " (since Linux 5.13)"
+.\" commit 97ba62b278674293762c3d91f724f1bb922f04e0
+This enables synchronous signal delivery of
+.B SIGTRAP
+on event overflow.
+.TP
.IR wakeup_events ", " wakeup_watermark
This union sets how many samples
.RI ( wakeup_events )
@@ -1471,6 +1563,21 @@ includes
.BR PERF_SAMPLE_CALLCHAIN ,
this field specifies how many stack frames to report when
generating the callchain.
+.TP
+.IR aux_sample_size " (since Linux 5.5)"
+.\" commit a4faf00d994c40e64f656805ac375c65e324eefb
+When
+.B PERF_SAMPLE_AUX
+flag is set, specify the desired size of AUX data.
+Note that it can get smaller data than the specified size.
+.TP
+.IR sig_data " (since Linux 5.13)"
+.\" commit 97ba62b278674293762c3d91f724f1bb922f04e0
+This data will be copied to user's signal handler (through
+.I si_perf
+in the
+.IR siginfo_t )
+to disambiguate which event triggered the signal.
.SS Reading results
Once a
.BR perf_event_open ()
@@ -1502,6 +1609,7 @@ struct read_format {
struct {
u64 value; /* The value of the event */
u64 id; /* if PERF_FORMAT_ID */
+ u64 lost; /* if PERF_FORMAT_LOST */
} values[nr];
};
.EE
@@ -1520,6 +1628,7 @@ struct read_format {
u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
u64 id; /* if PERF_FORMAT_ID */
+ u64 lost; /* if PERF_FORMAT_LOST */
};
.EE
.in
@@ -1551,6 +1660,12 @@ A globally unique value for this particular event; only present if
.B PERF_FORMAT_ID
was specified in
.IR read_format .
+.TP
+.I lost
+The number of lost samples of this event; only present if
+.B PERF_FORMAT_LOST
+was specified in
+.IR read_format .
.SS MMAP layout
When using
.BR perf_event_open ()
@@ -2002,6 +2117,22 @@ to the actual instruction that triggered the event.
See also
.IR perf_event_attr.precise_ip .
.TP
+.BR PERF_RECORD_MISC_SWITCH_OUT_PREEMPT " (since Linux 4.17)"
+.\" commit 101592b4904ecf6b8ed2a4784d41d180319d95a1
+When a
+.B PERF_RECORD_SWITCH
+or
+.B PERF_RECORD_SWITCH_CPU_WIDE
+record is generated, this indicates the context switch
+was a preemption.
+.TP
+.BR PERF_RECORD_MISC_MMAP_BUILD_ID " (since Linux 5.12)"
+.\" commit 88a16a1309333e43d328621ece3e9fa37027e8eb
+This indicates that the content of
+.B PERF_SAMPLE_MMAP2
+contains build-ID data instead of device major and minor numbers
+as well as the inode number.
+.TP
.BR PERF_RECORD_MISC_EXT_RESERVED " (since Linux 2.6.35)"
.\" commit 1676b8a077c352085d52578fb4f29350b58b6e74
This indicates there is extended data available (currently not used).
@@ -2201,7 +2332,9 @@ struct {
char data[size]; /* if PERF_SAMPLE_STACK_USER */
u64 dyn_size; /* if PERF_SAMPLE_STACK_USER &&
size != 0 */
- u64 weight; /* if PERF_SAMPLE_WEIGHT */
+ union perf_sample_weight;
+ /* if PERF_SAMPLE_WEIGHT */
+ /* || PERF_SAMPLE_WEIGHT_STRUCT */
u64 data_src; /* if PERF_SAMPLE_DATA_SRC */
u64 transaction; /* if PERF_SAMPLE_TRANSACTION */
u64 abi; /* if PERF_SAMPLE_REGS_INTR */
@@ -2209,6 +2342,12 @@ struct {
/* if PERF_SAMPLE_REGS_INTR */
u64 phys_addr; /* if PERF_SAMPLE_PHYS_ADDR */
u64 cgroup; /* if PERF_SAMPLE_CGROUP */
+ u64 data_page_size;
+ /* if PERF_SAMPLE_DATA_PAGE_SIZE */
+ u64 code_page_size;
+ /* if PERF_SAMPLE_CODE_PAGE_SIZE */
+ u64 size; /* if PERF_SAMPLE_AUX */
+ char data[size]; /* if PERF_SAMPLE_AUX */
};
.EE
.in
@@ -2410,7 +2549,7 @@ is 0.
.TP
.I weight
If
-.B PERF_SAMPLE_WEIGHT
+.B PERF_SAMPLE_WEIGHT "or" PERF_SAMPLE_WEIGHT_STRUCT
is enabled, then a 64-bit value provided by the hardware
is recorded that indicates how costly the event was.
This allows expensive events to stand out more clearly
@@ -2643,7 +2782,28 @@ If the
flag is set,
then the 64-bit cgroup ID (for the perf_event subsystem) is recorded.
To get the pathname of the cgroup, the ID should match to one in a
-.B PERF_RECORD_CGROUP .
+.BR PERF_RECORD_CGROUP .
+.TP
+.I data_page_size
+If the
+.B PERF_SAMPLE_DATA_PAGE_SIZE
+flag is set,
+then the 64-bit page size value of the
+.B data
+address is recorded.
+.TP
+.I code_page_size
+If the
+.B PERF_SAMPLE_CODE_PAGE_SIZE
+flag is set,
+then the 64-bit page size value of the
+.B ip
+address is recorded.
+.TP
+.IR size ", " data[size]
+If
+.B PERF_SAMPLE_AUX
+is enabled, then a snapshot of the aux buffer is recorded.
.RE
.TP
.B PERF_RECORD_MMAP2
@@ -2653,7 +2813,9 @@ calls returning executable mappings.
The format is similar to that of the
.B PERF_RECORD_MMAP
record, but includes extra values that allow uniquely identifying
-shared mappings.
+shared mappings. Depending on the
+.B PERF_RECORD_MISC_MMAP_BUILD_ID
+bit in the header, the extra values have different layout and meanings.
.IP
.in +4n
.EX
@@ -2664,10 +2826,20 @@ struct {
u64 addr;
u64 len;
u64 pgoff;
- u32 maj;
- u32 min;
- u64 ino;
- u64 ino_generation;
+ union {
+ struct {
+ u32 maj;
+ u32 min;
+ u64 ino;
+ u64 ino_generation;
+ };
+ struct { /* if PERF_RECORD_MISC_MMAP_BUILD_ID */
+ u8 build_id_size;
+ u8 __reserved_1;
+ u16 __reserved_2;
+ u8 build_id[20];
+ };
+ };
u32 prot;
u32 flags;
char filename[];
@@ -2704,6 +2876,14 @@ is the inode number.
.I ino_generation
is the inode generation.
.TP
+.I build_id_size
+is the actual size of
+.I build_id
+field (up to 20).
+.TP
+.I build_id
+is a raw data to identify a binary.
+.TP
.I prot
is the protection information.
.TP
--
2.39.0.314.g84b9a713c41-goog
next reply other threads:[~2023-01-04 6:17 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-04 6:17 Namhyung Kim [this message]
2023-01-07 22:53 ` [PATCH v2] perf_event_open.2: Update recent changes Alejandro Colomar
2023-01-17 8:05 ` Namhyung Kim
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230104061715.1412340-1-namhyung@kernel.org \
--to=namhyung@kernel.org \
--cc=acme@kernel.org \
--cc=alexander.shishkin@linux.intel.com \
--cc=alx@kernel.org \
--cc=ast@kernel.org \
--cc=elver@google.com \
--cc=eranian@google.com \
--cc=jolsa@kernel.org \
--cc=kan.liang@linux.intel.com \
--cc=linux-man@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).