From: Ian Rogers <irogers@google.com>
To: Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@redhat.com>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
Mark Rutland <mark.rutland@arm.com>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Jiri Olsa <jolsa@kernel.org>, Namhyung Kim <namhyung@kernel.org>,
Ian Rogers <irogers@google.com>,
Adrian Hunter <adrian.hunter@intel.com>,
Nick Terrell <terrelln@fb.com>,
Kan Liang <kan.liang@linux.intel.com>,
Andi Kleen <ak@linux.intel.com>, Leo Yan <leo.yan@linaro.org>,
Song Liu <song@kernel.org>, Sandipan Das <sandipan.das@amd.com>,
James Clark <james.clark@arm.com>,
Anshuman Khandual <anshuman.khandual@arm.com>,
Miguel Ojeda <ojeda@kernel.org>,
Liam Howlett <liam.howlett@oracle.com>,
Yang Jihong <yangjihong1@huawei.com>,
Athira Rajeev <atrajeev@linux.vnet.ibm.com>,
Kajol Jain <kjain@linux.ibm.com>,
K Prateek Nayak <kprateek.nayak@amd.com>,
Sean Christopherson <seanjc@google.com>,
Yanteng Si <siyanteng@loongson.cn>,
Ravi Bangoria <ravi.bangoria@amd.com>,
German Gomez <german.gomez@arm.com>,
Changbin Du <changbin.du@huawei.com>,
Paolo Bonzini <pbonzini@redhat.com>,
Masami Hiramatsu <mhiramat@kernel.org>,
liuwenyu <liuwenyu7@huawei.com>,
linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org
Subject: [PATCH v3 44/50] perf dso: Reorder variables to save space in struct dso
Date: Tue, 24 Oct 2023 15:23:47 -0700 [thread overview]
Message-ID: <20231024222353.3024098-45-irogers@google.com> (raw)
In-Reply-To: <20231024222353.3024098-1-irogers@google.com>
Save 40 bytes and move from 8 to 7 cache lines. Make variable dwfl
dependent on being a powerpc build. Squeeze bits of int/enum types
when appropriate. Remove holes/padding by reordering variables.
Before:
```
struct dso {
struct mutex lock; /* 0 40 */
struct list_head node; /* 40 16 */
struct rb_node rb_node __attribute__((__aligned__(8))); /* 56 24 */
/* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
struct rb_root * root; /* 80 8 */
struct rb_root_cached symbols; /* 88 16 */
struct symbol * * symbol_names; /* 104 8 */
size_t symbol_names_len; /* 112 8 */
struct rb_root_cached inlined_nodes; /* 120 16 */
/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
struct rb_root_cached srclines; /* 136 16 */
struct {
u64 addr; /* 152 8 */
struct symbol * symbol; /* 160 8 */
} last_find_result; /* 152 16 */
void * a2l; /* 168 8 */
char * symsrc_filename; /* 176 8 */
unsigned int a2l_fails; /* 184 4 */
enum dso_space_type kernel; /* 188 4 */
/* --- cacheline 3 boundary (192 bytes) --- */
_Bool is_kmod; /* 192 1 */
/* XXX 3 bytes hole, try to pack */
enum dso_swap_type needs_swap; /* 196 4 */
enum dso_binary_type symtab_type; /* 200 4 */
enum dso_binary_type binary_type; /* 204 4 */
enum dso_load_errno load_errno; /* 208 4 */
u8 adjust_symbols:1; /* 212: 0 1 */
u8 has_build_id:1; /* 212: 1 1 */
u8 header_build_id:1; /* 212: 2 1 */
u8 has_srcline:1; /* 212: 3 1 */
u8 hit:1; /* 212: 4 1 */
u8 annotate_warned:1; /* 212: 5 1 */
u8 auxtrace_warned:1; /* 212: 6 1 */
u8 short_name_allocated:1; /* 212: 7 1 */
u8 long_name_allocated:1; /* 213: 0 1 */
u8 is_64_bit:1; /* 213: 1 1 */
/* XXX 6 bits hole, try to pack */
_Bool sorted_by_name; /* 214 1 */
_Bool loaded; /* 215 1 */
u8 rel; /* 216 1 */
/* XXX 7 bytes hole, try to pack */
struct build_id bid; /* 224 32 */
/* --- cacheline 4 boundary (256 bytes) --- */
u64 text_offset; /* 256 8 */
u64 text_end; /* 264 8 */
const char * short_name; /* 272 8 */
const char * long_name; /* 280 8 */
u16 long_name_len; /* 288 2 */
u16 short_name_len; /* 290 2 */
/* XXX 4 bytes hole, try to pack */
void * dwfl; /* 296 8 */
struct auxtrace_cache * auxtrace_cache; /* 304 8 */
int comp; /* 312 4 */
/* XXX 4 bytes hole, try to pack */
/* --- cacheline 5 boundary (320 bytes) --- */
struct {
struct rb_root cache; /* 320 8 */
int fd; /* 328 4 */
int status; /* 332 4 */
u32 status_seen; /* 336 4 */
/* XXX 4 bytes hole, try to pack */
u64 file_size; /* 344 8 */
struct list_head open_entry; /* 352 16 */
u64 elf_base_addr; /* 368 8 */
u64 debug_frame_offset; /* 376 8 */
/* --- cacheline 6 boundary (384 bytes) --- */
u64 eh_frame_hdr_addr; /* 384 8 */
u64 eh_frame_hdr_offset; /* 392 8 */
} data; /* 320 80 */
struct {
u32 id; /* 400 4 */
u32 sub_id; /* 404 4 */
struct perf_env * env; /* 408 8 */
} bpf_prog; /* 400 16 */
union {
void * priv; /* 416 8 */
u64 db_id; /* 416 8 */
}; /* 416 8 */
struct nsinfo * nsinfo; /* 424 8 */
struct dso_id id; /* 432 24 */
/* --- cacheline 7 boundary (448 bytes) was 8 bytes ago --- */
refcount_t refcnt; /* 456 4 */
char name[]; /* 460 0 */
/* size: 464, cachelines: 8, members: 49 */
/* sum members: 440, holes: 4, sum holes: 18 */
/* sum bitfield members: 10 bits, bit holes: 1, sum bit holes: 6 bits */
/* padding: 4 */
/* forced alignments: 1 */
/* last cacheline: 16 bytes */
} __attribute__((__aligned__(8)));
```
After:
```
struct dso {
struct mutex lock; /* 0 40 */
struct list_head node; /* 40 16 */
struct rb_node rb_node __attribute__((__aligned__(8))); /* 56 24 */
/* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
struct rb_root * root; /* 80 8 */
struct rb_root_cached symbols; /* 88 16 */
struct symbol * * symbol_names; /* 104 8 */
size_t symbol_names_len; /* 112 8 */
struct rb_root_cached inlined_nodes; /* 120 16 */
/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
struct rb_root_cached srclines; /* 136 16 */
struct {
u64 addr; /* 152 8 */
struct symbol * symbol; /* 160 8 */
} last_find_result; /* 152 16 */
struct build_id bid; /* 168 32 */
/* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */
u64 text_offset; /* 200 8 */
u64 text_end; /* 208 8 */
const char * short_name; /* 216 8 */
const char * long_name; /* 224 8 */
void * a2l; /* 232 8 */
char * symsrc_filename; /* 240 8 */
struct nsinfo * nsinfo; /* 248 8 */
/* --- cacheline 4 boundary (256 bytes) --- */
struct auxtrace_cache * auxtrace_cache; /* 256 8 */
union {
void * priv; /* 264 8 */
u64 db_id; /* 264 8 */
}; /* 264 8 */
struct {
struct perf_env * env; /* 272 8 */
u32 id; /* 280 4 */
u32 sub_id; /* 284 4 */
} bpf_prog; /* 272 16 */
struct {
struct rb_root cache; /* 288 8 */
struct list_head open_entry; /* 296 16 */
u64 file_size; /* 312 8 */
/* --- cacheline 5 boundary (320 bytes) --- */
u64 elf_base_addr; /* 320 8 */
u64 debug_frame_offset; /* 328 8 */
u64 eh_frame_hdr_addr; /* 336 8 */
u64 eh_frame_hdr_offset; /* 344 8 */
int fd; /* 352 4 */
int status; /* 356 4 */
u32 status_seen; /* 360 4 */
} data; /* 288 80 */
/* XXX last struct has 4 bytes of padding */
struct dso_id id; /* 368 24 */
/* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
unsigned int a2l_fails; /* 392 4 */
int comp; /* 396 4 */
refcount_t refcnt; /* 400 4 */
enum dso_load_errno load_errno; /* 404 4 */
u16 long_name_len; /* 408 2 */
u16 short_name_len; /* 410 2 */
enum dso_binary_type symtab_type:8; /* 412: 0 4 */
enum dso_binary_type binary_type:8; /* 412: 8 4 */
enum dso_space_type kernel:2; /* 412:16 4 */
enum dso_swap_type needs_swap:2; /* 412:18 4 */
/* Bitfield combined with next fields */
_Bool is_kmod:1; /* 414: 4 1 */
u8 adjust_symbols:1; /* 414: 5 1 */
u8 has_build_id:1; /* 414: 6 1 */
u8 header_build_id:1; /* 414: 7 1 */
u8 has_srcline:1; /* 415: 0 1 */
u8 hit:1; /* 415: 1 1 */
u8 annotate_warned:1; /* 415: 2 1 */
u8 auxtrace_warned:1; /* 415: 3 1 */
u8 short_name_allocated:1; /* 415: 4 1 */
u8 long_name_allocated:1; /* 415: 5 1 */
u8 is_64_bit:1; /* 415: 6 1 */
/* XXX 1 bit hole, try to pack */
_Bool sorted_by_name; /* 416 1 */
_Bool loaded; /* 417 1 */
u8 rel; /* 418 1 */
char name[]; /* 419 0 */
/* size: 424, cachelines: 7, members: 48 */
/* sum members: 415 */
/* sum bitfield members: 31 bits, bit holes: 1, sum bit holes: 1 bits */
/* padding: 5 */
/* paddings: 1, sum paddings: 4 */
/* forced alignments: 1 */
/* last cacheline: 40 bytes */
} __attribute__((__aligned__(8)));
```
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/util/dso.h | 84 +++++++++++++++++++++----------------------
1 file changed, 42 insertions(+), 42 deletions(-)
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 3759de8c2267..8bdc17d78b02 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -158,66 +158,66 @@ struct dso {
u64 addr;
struct symbol *symbol;
} last_find_result;
- void *a2l;
- char *symsrc_filename;
- unsigned int a2l_fails;
- enum dso_space_type kernel;
- bool is_kmod;
- enum dso_swap_type needs_swap;
- enum dso_binary_type symtab_type;
- enum dso_binary_type binary_type;
- enum dso_load_errno load_errno;
- u8 adjust_symbols:1;
- u8 has_build_id:1;
- u8 header_build_id:1;
- u8 has_srcline:1;
- u8 hit:1;
- u8 annotate_warned:1;
- u8 auxtrace_warned:1;
- u8 short_name_allocated:1;
- u8 long_name_allocated:1;
- u8 is_64_bit:1;
- bool sorted_by_name;
- bool loaded;
- u8 rel;
struct build_id bid;
u64 text_offset;
u64 text_end;
const char *short_name;
const char *long_name;
- u16 long_name_len;
- u16 short_name_len;
+ void *a2l;
+ char *symsrc_filename;
+#if defined(__powerpc__)
void *dwfl; /* DWARF debug info */
+#endif
+ struct nsinfo *nsinfo;
struct auxtrace_cache *auxtrace_cache;
- int comp;
-
+ union { /* Tool specific area */
+ void *priv;
+ u64 db_id;
+ };
+ /* bpf prog information */
+ struct {
+ struct perf_env *env;
+ u32 id;
+ u32 sub_id;
+ } bpf_prog;
/* dso data file */
struct {
struct rb_root cache;
- int fd;
- int status;
- u32 status_seen;
- u64 file_size;
struct list_head open_entry;
+ u64 file_size;
u64 elf_base_addr;
u64 debug_frame_offset;
u64 eh_frame_hdr_addr;
u64 eh_frame_hdr_offset;
+ int fd;
+ int status;
+ u32 status_seen;
} data;
- /* bpf prog information */
- struct {
- u32 id;
- u32 sub_id;
- struct perf_env *env;
- } bpf_prog;
-
- union { /* Tool specific area */
- void *priv;
- u64 db_id;
- };
- struct nsinfo *nsinfo;
struct dso_id id;
+ unsigned int a2l_fails;
+ int comp;
refcount_t refcnt;
+ enum dso_load_errno load_errno;
+ u16 long_name_len;
+ u16 short_name_len;
+ enum dso_binary_type symtab_type:8;
+ enum dso_binary_type binary_type:8;
+ enum dso_space_type kernel:2;
+ enum dso_swap_type needs_swap:2;
+ bool is_kmod:1;
+ u8 adjust_symbols:1;
+ u8 has_build_id:1;
+ u8 header_build_id:1;
+ u8 has_srcline:1;
+ u8 hit:1;
+ u8 annotate_warned:1;
+ u8 auxtrace_warned:1;
+ u8 short_name_allocated:1;
+ u8 long_name_allocated:1;
+ u8 is_64_bit:1;
+ bool sorted_by_name;
+ bool loaded;
+ u8 rel;
char name[];
};
--
2.42.0.758.gaed0368e0e-goog
next prev parent reply other threads:[~2023-10-24 22:37 UTC|newest]
Thread overview: 62+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-10-24 22:23 [PATCH v3 00/50] Improvements to memory use Ian Rogers
2023-10-24 22:23 ` [PATCH v3 01/50] perf rwsem: Add debug mode that uses a mutex Ian Rogers
2023-10-24 22:23 ` [PATCH v3 02/50] perf machine: Avoid out of bounds LBR memory read Ian Rogers
2023-10-24 22:23 ` [PATCH v3 03/50] libperf rc_check: Make implicit enabling work for GCC Ian Rogers
2023-10-24 22:23 ` [PATCH v3 04/50] libperf rc_check: Add RC_CHK_EQUAL Ian Rogers
2023-10-24 22:23 ` [PATCH v3 05/50] perf hist: Add missing puts to hist__account_cycles Ian Rogers
2023-10-24 22:23 ` [PATCH v3 06/50] perf threads: Remove unused dead thread list Ian Rogers
2023-10-24 22:23 ` [PATCH v3 07/50] perf offcpu: Add missed btf_free Ian Rogers
2023-10-24 22:23 ` [PATCH v3 08/50] perf callchain: Make display use of branch_type_stat const Ian Rogers
2023-10-24 22:23 ` [PATCH v3 09/50] perf callchain: Make brtype_stat in callchain_list optional Ian Rogers
2023-10-24 22:23 ` [PATCH v3 10/50] perf callchain: Minor layout changes to callchain_list Ian Rogers
2023-10-24 22:23 ` [PATCH v3 11/50] perf mem_info: Add and use map_symbol__exit and addr_map_symbol__exit Ian Rogers
2023-10-24 22:23 ` [PATCH v3 12/50] perf record: Lazy load kernel symbols Ian Rogers
2023-10-25 18:25 ` Namhyung Kim
2023-10-25 18:35 ` Adrian Hunter
2023-10-24 22:23 ` [PATCH v3 13/50] libperf: Lazily allocate mmap event copy Ian Rogers
2023-10-25 2:38 ` Yang Jihong
2023-10-25 3:28 ` Ian Rogers
2023-10-24 22:23 ` [PATCH v3 14/50] perf mmap: Lazily initialize zstd streams Ian Rogers
2023-10-24 22:23 ` [PATCH v3 15/50] perf machine thread: Remove exited threads by default Ian Rogers
2023-10-24 22:23 ` [PATCH v3 16/50] tools api fs: Switch filename__read_str to use io.h Ian Rogers
2023-10-24 22:23 ` [PATCH v3 17/50] tools api fs: Avoid reading whole file for a 1 byte bool Ian Rogers
2023-10-24 22:23 ` [PATCH v3 18/50] tools lib api: Add io_dir an allocation free readdir alternative Ian Rogers
2023-10-25 18:43 ` Namhyung Kim
2023-10-25 22:15 ` Ian Rogers
2023-10-24 22:23 ` [PATCH v3 19/50] perf maps: Switch modules tree walk to io_dir__readdir Ian Rogers
2023-10-24 22:23 ` [PATCH v3 20/50] perf record: Be lazier in allocating lost samples buffer Ian Rogers
2023-10-25 3:44 ` Yang Jihong
2023-10-25 17:00 ` Ian Rogers
2023-10-25 19:04 ` Namhyung Kim
2023-10-25 19:00 ` Namhyung Kim
2023-10-24 22:23 ` [PATCH v3 21/50] perf pmu: Switch to io_dir__readdir Ian Rogers
2023-10-24 22:23 ` [PATCH v3 22/50] perf bpf: Don't synthesize BPF events when disabled Ian Rogers
2023-10-24 22:23 ` [PATCH v3 23/50] perf header: Switch mem topology to io_dir__readdir Ian Rogers
2023-10-24 22:23 ` [PATCH v3 24/50] perf events: Remove scandir in thread synthesis Ian Rogers
2023-10-24 22:23 ` [PATCH v3 25/50] perf map: Simplify map_ip/unmap_ip and make map size smaller Ian Rogers
2023-10-24 22:23 ` [PATCH v3 26/50] perf maps: Move symbol maps functions to maps.c Ian Rogers
2023-10-24 22:23 ` [PATCH v3 27/50] perf thread: Add missing RC_CHK_ACCESS Ian Rogers
2023-10-24 22:23 ` [PATCH v3 28/50] perf maps: Add maps__for_each_map to call a function on each entry Ian Rogers
2023-10-24 22:23 ` [PATCH v3 29/50] perf maps: Add remove maps function to remove a map based on callback Ian Rogers
2023-10-24 22:23 ` [PATCH v3 30/50] perf debug: Expose debug file Ian Rogers
2023-10-24 22:23 ` [PATCH v3 31/50] perf maps: Refactor maps__fixup_overlappings Ian Rogers
2023-10-24 22:23 ` [PATCH v3 32/50] perf maps: Do simple merge if given map doesn't overlap Ian Rogers
2023-10-24 22:23 ` [PATCH v3 33/50] perf maps: Rename clone to copy from Ian Rogers
2023-10-24 22:23 ` [PATCH v3 34/50] perf maps: Add maps__load_first Ian Rogers
2023-10-24 22:23 ` [PATCH v3 35/50] perf maps: Add find next entry to give entry after the given map Ian Rogers
2023-10-24 22:23 ` [PATCH v3 36/50] perf maps: Reduce scope of map_rb_node and maps internals Ian Rogers
2023-10-24 22:23 ` [PATCH v3 37/50] perf maps: Fix up overlaps during fixup_end Ian Rogers
2023-10-24 22:23 ` [PATCH v3 38/50] perf maps: Switch from rbtree to lazily sorted array for addresses Ian Rogers
2023-10-24 22:23 ` [PATCH v3 39/50] perf maps: Get map before returning in maps__find Ian Rogers
2023-10-24 22:23 ` [PATCH v3 40/50] perf maps: Get map before returning in maps__find_by_name Ian Rogers
2023-10-24 22:23 ` [PATCH v3 41/50] perf maps: Get map before returning in maps__find_next_entry Ian Rogers
2023-10-24 22:23 ` [PATCH v3 42/50] perf maps: Hide maps internals Ian Rogers
2023-10-24 22:23 ` [PATCH v3 43/50] perf maps: Locking tidy up of nr_maps Ian Rogers
2023-10-24 22:23 ` Ian Rogers [this message]
2023-10-24 22:23 ` [PATCH v3 45/50] perf report: Sort child tasks by tid Ian Rogers
2023-10-24 22:23 ` [PATCH v3 46/50] perf trace: Ignore thread hashing in summary Ian Rogers
2023-10-24 22:23 ` [PATCH v3 47/50] perf machine: Move fprintf to for_each loop and a callback Ian Rogers
2023-10-24 22:23 ` [PATCH v3 48/50] perf threads: Move threads to its own files Ian Rogers
2023-10-24 22:23 ` [PATCH v3 49/50] perf threads: Switch from rbtree to hashmap Ian Rogers
2023-10-24 22:23 ` [PATCH v3 50/50] perf threads: Reduce table size from 256 to 8 Ian Rogers
2023-10-26 17:11 ` (subset) [PATCH v3 00/50] Improvements to memory use Namhyung Kim
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231024222353.3024098-45-irogers@google.com \
--to=irogers@google.com \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=anshuman.khandual@arm.com \
--cc=atrajeev@linux.vnet.ibm.com \
--cc=changbin.du@huawei.com \
--cc=german.gomez@arm.com \
--cc=james.clark@arm.com \
--cc=jolsa@kernel.org \
--cc=kan.liang@linux.intel.com \
--cc=kjain@linux.ibm.com \
--cc=kprateek.nayak@amd.com \
--cc=leo.yan@linaro.org \
--cc=liam.howlett@oracle.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=liuwenyu7@huawei.com \
--cc=mark.rutland@arm.com \
--cc=mhiramat@kernel.org \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=ojeda@kernel.org \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=ravi.bangoria@amd.com \
--cc=sandipan.das@amd.com \
--cc=seanjc@google.com \
--cc=siyanteng@loongson.cn \
--cc=song@kernel.org \
--cc=terrelln@fb.com \
--cc=yangjihong1@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox