From: Alan Maguire <alan.maguire@oracle.com>
To: dwarves@vger.kernel.org
Cc: andrii@kernel.org, eddyz87@gmail.com, ast@kernel.org,
daniel@iogearbox.net, martin.lau@linux.dev, acme@kernel.org,
ttreyer@meta.com, yonghong.song@linux.dev, song@kernel.org,
john.fastabend@gmail.com, kpsingh@kernel.org, sdf@fomichev.me,
haoluo@google.com, jolsa@kernel.org, qmo@kernel.org,
ihor.solodrai@linux.dev, david.faust@oracle.com,
jose.marchesi@oracle.com, bpf@vger.kernel.org,
Alan Maguire <alan.maguire@oracle.com>
Subject: [RFC dwarves 3/5] dwarf_loader: Collect inline expansion location information
Date: Fri, 24 Oct 2025 08:33:26 +0100 [thread overview]
Message-ID: <20251024073328.370457-4-alan.maguire@oracle.com> (raw)
In-Reply-To: <20251024073328.370457-1-alan.maguire@oracle.com>
Collect location information for parameters, inline expansions and ensure it
does not rely on aspects of the CU that go away when it is freed.
(This is a slightly differerent approach from Thierry's but it was helped
greatly by his series; would happily add a Co-developed by here or
whatever suits)
Signed-off-by: Alan Maguire <alan.maguire>
---
dwarf_loader.c | 277 +++++++++++++++++++++++++++++++++++++++----------
dwarves.h | 48 ++++++++-
2 files changed, 266 insertions(+), 59 deletions(-)
diff --git a/dwarf_loader.c b/dwarf_loader.c
index 4656575..a7ae497 100644
--- a/dwarf_loader.c
+++ b/dwarf_loader.c
@@ -1185,29 +1185,54 @@ static ptrdiff_t __dwarf_getlocations(Dwarf_Attribute *attr,
return ret;
}
-/* For DW_AT_location 'attr':
- * - if first location is DW_OP_regXX with expected number, return the register;
- * otherwise save the register for later return
- * - if location DW_OP_entry_value(DW_OP_regXX) with expected number is in the
- * list, return the register; otherwise save register for later return
- * - otherwise if no register was found for locations, return -1.
+/* Retrieve location information for parameter; focus on simple locations
+ * like constants and register values. Support multiple registers as
+ * it is possible for a value (struct) to be passed via multiple registers.
+ * Handle edge cases like multiple instances of same location value, but
+ * avoid cases with large (>1 size) expressions to keep things simple.
+ * This covers the vast majority of cases. The only unhandled atom is
+ * DW_OP_GNU_parameter_ref; future work could add that and improve
+ * location handling. In practice the below supports the majority
+ * of parameter locations.
*/
-static int parameter__reg(Dwarf_Attribute *attr, int expected_reg)
+static int parameter__locs(Dwarf_Die *die, Dwarf_Attribute *attr, struct parameter *parm)
{
- Dwarf_Addr base, start, end;
- Dwarf_Op *expr, *entry_ops;
- Dwarf_Attribute entry_attr;
- size_t exprlen, entry_len;
+ Dwarf_Addr base, start, end, first = -1;
+ Dwarf_Attribute next_attr;
ptrdiff_t offset = 0;
- int loc_num = -1;
+ Dwarf_Op *expr;
+ size_t exprlen;
int ret = -1;
+ /* parameter__locs() can be called recursively, but at toplevel
+ * die is non-NULL signalling we need to look up loc/const attrs.
+ */
+ if (die) {
+ if (dwarf_attr(die, DW_AT_const_value, attr) != NULL) {
+ parm->has_loc = 1;
+ parm->optimized = 1;
+ parm->locs[0].is_const = 1;
+ parm->nlocs = 1;
+ parm->locs[0].size = 8;
+ parm->locs[0].value = attr_numeric(die, DW_AT_const_value);
+ return 0;
+ }
+ if (dwarf_attr(die, DW_AT_location, attr) == NULL)
+ return 0;
+ }
+
/* use libdw__lock as dwarf_getlocation(s) has concurrency issues
* when libdw is not compiled with experimental --enable-thread-safety
*/
pthread_mutex_lock(&libdw__lock);
while ((offset = __dwarf_getlocations(attr, offset, &base, &start, &end, &expr, &exprlen)) > 0) {
- loc_num++;
+ /* We only want location info referring to start of function;
+ * assumes we get location info in address order; empirically
+ * this is the case. Only exception is DW_OP_*entry_value
+ * location info which always refers to the value on entry.
+ */
+ if (first == -1)
+ first = start;
/* Convert expression list (XX DW_OP_stack_value) -> (XX).
* DW_OP_stack_value instructs interpreter to pop current value from
@@ -1216,33 +1241,154 @@ static int parameter__reg(Dwarf_Attribute *attr, int expected_reg)
if (exprlen > 1 && expr[exprlen - 1].atom == DW_OP_stack_value)
exprlen--;
- if (exprlen != 1)
- continue;
+ if (exprlen > 1) {
+ /* ignore complex exprs not at start of function,
+ * but bail if we hit a complex loc expr at the start.
+ */
+ if (start != first)
+ continue;
+ ret = -1;
+ goto out;
+ }
switch (expr->atom) {
- /* match DW_OP_regXX at first location */
+ case DW_OP_deref:
+ if (parm->nlocs > 0)
+ parm->locs[parm->nlocs - 1].is_deref = 1;
+ else
+ ret = -1;
+ break;
case DW_OP_reg0 ... DW_OP_reg31:
- if (loc_num != 0)
+ if (start != first || parm->nlocs > 1)
+ break;
+ /* avoid duplicate location value */
+ if (parm->nlocs > 0 && parm->locs[parm->nlocs - 1].reg ==
+ (expr->atom - DW_OP_reg0))
+ break;
+ parm->locs[parm->nlocs].reg = expr->atom - DW_OP_reg0;
+ parm->locs[parm->nlocs].is_deref = 0;
+ parm->locs[parm->nlocs].size = 8;
+ parm->locs[parm->nlocs++].offset = 0;
+ ret = 0;
+ break;
+ case DW_OP_fbreg:
+ case DW_OP_breg0 ... DW_OP_breg31:
+ if (start != first || parm->nlocs > 1)
break;
- ret = expr->atom;
- if (ret == expected_reg)
- goto out;
+ /* avoid duplicate location value */
+ if (parm->nlocs > 0 && parm->locs[parm->nlocs - 1].reg ==
+ (expr->atom - DW_OP_breg0)) {
+ if (parm->locs[parm->nlocs - 1].offset != expr->offset)
+ ret = -1;
+ break;
+ }
+ parm->locs[parm->nlocs].reg = expr->atom - DW_OP_breg0;
+ parm->locs[parm->nlocs].is_deref = 1;
+ parm->locs[parm->nlocs].size = 8;
+ parm->locs[parm->nlocs++].offset = expr->offset;
+ ret = 0;
+ break;
+ case DW_OP_lit0 ... DW_OP_lit31:
+ if (start != first)
+ break;
+
+ if (parm->nlocs > 0 && (expr->atom - DW_OP_lit0) ==
+ parm->locs[parm->nlocs - 1].value)
+ break;
+ parm->locs[parm->nlocs].is_const = 1;
+ parm->locs[parm->nlocs].size = 1;
+ parm->locs[parm->nlocs++].value = expr->atom - DW_OP_lit0;
+ ret = 0;
+ break;
+ case DW_OP_const1u ... DW_OP_consts:
+ if (start != first)
+ break;
+ if (parm->nlocs > 0 && (parm->locs[parm->nlocs - 1].is_const &&
+ expr->number == parm->locs[parm->nlocs - 1].value))
+ break;
+ parm->locs[parm->nlocs].is_const = 1;
+ parm->locs[parm->nlocs].value = expr->number;
+ switch (expr->atom) {
+ case DW_OP_const1u:
+ parm->locs[parm->nlocs].size = 1;
+ break;
+ case DW_OP_const1s:
+ parm->locs[parm->nlocs].size = -1;
+ break;
+ case DW_OP_const2u:
+ parm->locs[parm->nlocs].size = 2;
+ break;
+ case DW_OP_const2s:
+ parm->locs[parm->nlocs].size = -2;
+ break;
+ case DW_OP_const4u:
+ parm->locs[parm->nlocs].size = 4;
+ break;
+ case DW_OP_const4s:
+ parm->locs[parm->nlocs].size = -4;
+ break;
+ case DW_OP_const8u:
+ case DW_OP_constu:
+ parm->locs[parm->nlocs].size = 8;
+ break;
+ case DW_OP_const8s:
+ case DW_OP_consts:
+ parm->locs[parm->nlocs].size = -8;
+ break;
+ }
+ parm->nlocs++;
+ ret = 0;
+ break;
+ case DW_OP_addr:
+ if (start != first || parm->nlocs > 0)
+ break;
+ parm->locs[parm->nlocs].is_const = 1;
+ parm->locs[parm->nlocs].is_addr = 1;
+ parm->locs[parm->nlocs].size = 8;
+ parm->locs[parm->nlocs++].value = expr->number;
+ ret = 0;
break;
- /* match DW_OP_entry_value(DW_OP_regXX) at any location */
case DW_OP_entry_value:
case DW_OP_GNU_entry_value:
- if (dwarf_getlocation_attr(attr, expr, &entry_attr) == 0 &&
- dwarf_getlocation(&entry_attr, &entry_ops, &entry_len) == 0 &&
- entry_len == 1) {
- ret = entry_ops->atom;
- if (ret == expected_reg)
- goto out;
+ /* Match DW_OP_entry_value(DW_OP_regXX) at any offset
+ * in function since it always describes value on entry.
+ */
+ if (dwarf_getlocation_attr(attr, expr, &next_attr) == 0) {
+ pthread_mutex_unlock(&libdw__lock);
+ return parameter__locs(NULL, &next_attr, parm);
}
+ ret = -1;
+ break;
+ case DW_OP_implicit_pointer:
+ if (start != first)
+ break;
+ if (dwarf_getlocation_implicit_pointer(attr, expr, &next_attr) == 0) {
+ pthread_mutex_unlock(&libdw__lock);
+ return parameter__locs(NULL, &next_attr, parm);
+ }
+ ret = -1;
+ break;
+ case DW_OP_implicit_value:
+ if (start != first)
+ break;
+ if (dwarf_getlocation_attr(attr, expr, &next_attr) == 0) {
+ pthread_mutex_unlock(&libdw__lock);
+ return parameter__locs(NULL, &next_attr, parm);
+ }
+ ret = -1;
+ break;
+ default:
+ /* unhandled op */
+ ret = -1;
break;
}
+ if (ret == -1)
+ break;
}
out:
pthread_mutex_unlock(&libdw__lock);
+ if (ret == 0)
+ parm->has_loc = 1;
return ret;
}
@@ -1250,10 +1396,11 @@ static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu,
struct conf_load *conf, int param_idx)
{
struct parameter *parm = tag__alloc(cu, sizeof(*parm));
+ int ret;
if (parm != NULL) {
- bool has_const_value;
Dwarf_Attribute attr;
+ int expected_reg;
tag__init(&parm->tag, cu, die);
parm->name = attr_string(die, DW_AT_name, conf);
@@ -1293,28 +1440,31 @@ static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu,
* between these parameter representations. See
* ftype__recode_dwarf_types() below for how this is handled.
*/
- has_const_value = dwarf_attr(die, DW_AT_const_value, &attr) != NULL;
- parm->has_loc = dwarf_attr(die, DW_AT_location, &attr) != NULL;
-
- if (parm->has_loc) {
- struct location location;
- attr_location(die, &location.expr, &location.exprlen);
-
- int expected_reg = cu->register_params[param_idx];
- int actual_reg = parameter__reg(&attr, expected_reg);
-
- if (actual_reg < 0)
- parm->optimized = 1;
- else if (expected_reg >= 0 && expected_reg != actual_reg)
- /* mark parameters that use an unexpected
- * register to hold a parameter; these will
- * be problematic for users of BTF as they
- * violate expectations about register
- * contents.
- */
- parm->unexpected_reg = 1;
- } else if (has_const_value) {
+ expected_reg = cu->register_params[param_idx];
+ if (expected_reg >= DW_OP_reg0)
+ expected_reg -= DW_OP_reg0;
+
+ ret = parameter__locs(die, &attr, parm);
+
+ if (!parm->has_loc)
+ return parm;
+
+ if (ret < 0) {
+ /* undecipherable location */
parm->optimized = 1;
+ return parm;
+ }
+ if (parm->locs[0].is_const) {
+ parm->optimized = 1;
+ } else if (expected_reg >= 0 &&
+ expected_reg != parm->locs[0].reg) {
+ /* mark parameters that use an unexpected
+ * register to hold a parameter; these will
+ * be problematic for users of BTF as they
+ * violate expectations about register
+ * contents.
+ */
+ parm->unexpected_reg = 1;
}
}
@@ -1377,10 +1527,14 @@ static struct inline_expansion *inline_expansion__new(Dwarf_Die *die, struct cu
dwarf_tag__set_attr_type(dtag, type, die, DW_AT_abstract_origin);
Dwarf_Attribute attr_orig;
+ exp->name = 0;
if (dwarf_attr(die, DW_AT_abstract_origin, &attr_orig)) {
Dwarf_Die die_orig;
- if (dwarf_formref_die(&attr_orig, &die_orig)) {
- exp->name = attr_string(&die_orig, DW_AT_name, conf);
+
+ if (dwarf_formref_die(&attr_orig, &die_orig) &&
+ dwarf_tag(&die_orig) == DW_TAG_subprogram) {
+ if (dwarf_hasattr(&die_orig, DW_AT_name))
+ exp->name = attr_string(&die_orig, DW_AT_name, conf);
}
}
@@ -2686,12 +2840,12 @@ static void inline_expansion__recode_dwarf_types(struct tag *tag, struct cu *cu)
* in fact an abtract origin, i.e. must be looked up in the tags_table,
* not in the types_table.
*/
- struct dwarf_tag *ftype = NULL;
+ struct dwarf_tag *function = NULL;
if (dtag->type != 0)
- ftype = dwarf_cu__find_tag_by_ref(dcu, dtag, type);
+ function = dwarf_cu__find_tag_by_ref(dcu, dtag, type);
else
- ftype = dwarf_cu__find_tag_by_ref(dcu, dtag, abstract_origin);
- if (ftype == NULL) {
+ function = dwarf_cu__find_tag_by_ref(dcu, dtag, abstract_origin);
+ if (function == NULL) {
if (dtag->type != 0)
tag__print_type_not_found(tag);
else
@@ -2699,13 +2853,14 @@ static void inline_expansion__recode_dwarf_types(struct tag *tag, struct cu *cu)
return;
}
- ftype__recode_dwarf_types(dtag__tag(ftype), cu);
+ ftype__recode_dwarf_types(dtag__tag(function), cu);
struct tag *pos;
struct inline_expansion *exp = tag__inline_expansion(tag);
list_for_each_entry(pos, &exp->parms, node)
parameter__recode_dwarf_type(tag__parameter(pos), cu);
- exp->ip.tag.type = ftype->small_id;
+ exp->ip.tag.type = function->small_id;
+ exp->function = tag__function(dtag__tag(function));
}
static void lexblock__recode_dwarf_types(struct lexblock *tag, struct cu *cu)
@@ -2983,6 +3138,14 @@ static int cu__resolve_func_ret_types_optimized(struct cu *cu)
struct function *fn = tag__function(tag);
bool has_unexpected_reg = false, has_struct_param = false;
+ /* Inlined function representations likely have parameters
+ * in wrong locations; ensure they do not contribute to
+ * classification of unexpected regs for a function that
+ * is partially inlined.
+ */
+ if (fn->inlined)
+ continue;
+
/* mark function as optimized if parameter is, or
* if parameter does not have a location; at this
* point location presence has been marked in
diff --git a/dwarves.h b/dwarves.h
index 284bc02..d6efdd0 100644
--- a/dwarves.h
+++ b/dwarves.h
@@ -481,6 +481,19 @@ bool languages__cu_filtered(struct languages *languages, struct cu *cu, bool ver
continue; \
else
+/**
+ * cu__for_each_inline_expansion - iterate thru all inline expansions
+ * @cu: struct cu instance to iterate
+ * @pos: struct tag iterator
+ * @id: uint32_t tag id
+ */
+#define cu__for_each_inline_expansion(cu, id, pos) \
+ for (id = 0; id < cu->tags_table.nr_entries; ++id) \
+ if (!tag__is_inline_expansion(cu->tags_table.entries[id]) || \
+ !(pos = tag__inline_expansion(cu->tags_table.entries[id])))\
+ continue; \
+ else
+
int cu__add_tag(struct cu *cu, struct tag *tag, uint32_t *id);
int cu__add_tag_with_id(struct cu *cu, struct tag *tag, uint32_t id);
int cu__table_add_tag(struct cu *cu, struct tag *tag, uint32_t *id);
@@ -615,6 +628,11 @@ static inline bool tag__is_restrict(const struct tag *tag)
return tag->tag == DW_TAG_restrict_type;
}
+static inline bool tag__is_inline_expansion(const struct tag *tag)
+{
+ return tag->tag == DW_TAG_inlined_subroutine;
+}
+
static inline int tag__is_modifier(const struct tag *tag)
{
return tag__is_const(tag) ||
@@ -821,13 +839,22 @@ struct ip_tag {
struct inline_expansion {
struct ip_tag ip;
- const char *name;
+ const char *name;
size_t size;
uint64_t high_pc;
struct list_head parms;
uint16_t nr_parms;
+ struct function *function;
};
+/**
+ * inline_expansion__for_each_parameter - iterate thru all the parameters
+ * @ie: struct inline_expansion instance to iterate
+ * @pos: struct parameter iterator
+ */
+#define inline_expansion__for_each_parameter(ie, pos) \
+ list_for_each_entry(pos, &(ie)->parms, tag.node)
+
static inline struct inline_expansion *
tag__inline_expansion(const struct tag *tag)
{
@@ -944,13 +971,30 @@ size_t lexblock__fprintf(const struct lexblock *lexblock, const struct cu *cu,
struct function *function, uint16_t indent,
const struct conf_fprintf *conf, FILE *fp);
+
+struct parameter_loc {
+ uint8_t is_const:1;
+ uint8_t is_deref:1;
+ uint8_t is_addr:1;
+ int8_t size;
+ union {
+ struct {
+ uint16_t reg;
+ uint16_t flags;
+ uint32_t offset;
+ };
+ uint64_t value;
+ };
+};
+
struct parameter {
struct tag tag;
const char *name;
- struct location location;
uint8_t optimized:1;
uint8_t unexpected_reg:1;
uint8_t has_loc:1;
+ struct parameter_loc locs[2]; /* multiple locs may be used */
+ uint8_t nlocs;
};
static inline struct parameter *tag__parameter(const struct tag *tag)
--
2.39.3
next prev parent reply other threads:[~2025-10-24 7:34 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-24 7:33 [RFC dwarves 0/5] pahole: support BTF inline encoding Alan Maguire
2025-10-24 7:33 ` [RFC dwarves 1/5] dwarf_loader: Add parameters list to inlined expansion Alan Maguire
2025-10-24 7:33 ` [RFC dwarves 2/5] dwarf_loader: Add name to inline expansion Alan Maguire
2025-10-24 7:33 ` Alan Maguire [this message]
2025-10-24 17:55 ` [RFC dwarves 3/5] dwarf_loader: Collect inline expansion location information Eduard Zingerman
2025-10-29 17:40 ` Alan Maguire
2025-10-29 18:32 ` Eduard Zingerman
2025-10-29 18:46 ` Alan Maguire
2025-10-24 7:33 ` [RFC dwarves 4/5] btf_encoder: Support encoding of inline " Alan Maguire
2025-10-24 18:04 ` Eduard Zingerman
2025-10-24 7:33 ` [RFC dwarves 5/5] pahole: Support inline encoding with inline[.extra] BTF feature Alan Maguire
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251024073328.370457-4-alan.maguire@oracle.com \
--to=alan.maguire@oracle.com \
--cc=acme@kernel.org \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=david.faust@oracle.com \
--cc=dwarves@vger.kernel.org \
--cc=eddyz87@gmail.com \
--cc=haoluo@google.com \
--cc=ihor.solodrai@linux.dev \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=jose.marchesi@oracle.com \
--cc=kpsingh@kernel.org \
--cc=martin.lau@linux.dev \
--cc=qmo@kernel.org \
--cc=sdf@fomichev.me \
--cc=song@kernel.org \
--cc=ttreyer@meta.com \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox