BPF List
 help / color / mirror / Atom feed
From: Alan Maguire <alan.maguire@oracle.com>
To: dwarves@vger.kernel.org
Cc: andrii@kernel.org, eddyz87@gmail.com, ast@kernel.org,
	daniel@iogearbox.net, martin.lau@linux.dev, acme@kernel.org,
	ttreyer@meta.com, yonghong.song@linux.dev, song@kernel.org,
	john.fastabend@gmail.com, kpsingh@kernel.org, sdf@fomichev.me,
	haoluo@google.com, jolsa@kernel.org, qmo@kernel.org,
	ihor.solodrai@linux.dev, david.faust@oracle.com,
	jose.marchesi@oracle.com, bpf@vger.kernel.org,
	Alan Maguire <alan.maguire@oracle.com>
Subject: [RFC dwarves 3/5] dwarf_loader: Collect inline expansion location information
Date: Fri, 24 Oct 2025 08:33:26 +0100	[thread overview]
Message-ID: <20251024073328.370457-4-alan.maguire@oracle.com> (raw)
In-Reply-To: <20251024073328.370457-1-alan.maguire@oracle.com>

Collect location information for parameters, inline expansions and ensure it
does not rely on aspects of the CU that go away when it is freed.

(This is a slightly differerent approach from Thierry's but it was helped
greatly by his series; would happily add a Co-developed by here or
whatever suits)

Signed-off-by: Alan Maguire <alan.maguire>
---
 dwarf_loader.c | 277 +++++++++++++++++++++++++++++++++++++++----------
 dwarves.h      |  48 ++++++++-
 2 files changed, 266 insertions(+), 59 deletions(-)

diff --git a/dwarf_loader.c b/dwarf_loader.c
index 4656575..a7ae497 100644
--- a/dwarf_loader.c
+++ b/dwarf_loader.c
@@ -1185,29 +1185,54 @@ static ptrdiff_t __dwarf_getlocations(Dwarf_Attribute *attr,
 	return ret;
 }
 
-/* For DW_AT_location 'attr':
- * - if first location is DW_OP_regXX with expected number, return the register;
- *   otherwise save the register for later return
- * - if location DW_OP_entry_value(DW_OP_regXX) with expected number is in the
- *   list, return the register; otherwise save register for later return
- * - otherwise if no register was found for locations, return -1.
+/* Retrieve location information for parameter; focus on simple locations
+ * like constants and register values.  Support multiple registers as
+ * it is possible for a value (struct) to be passed via multiple registers.
+ * Handle edge cases like multiple instances of same location value, but
+ * avoid cases with large (>1 size) expressions to keep things simple.
+ * This covers the vast majority of cases.  The only unhandled atom is
+ * DW_OP_GNU_parameter_ref; future work could add that and improve
+ * location handling.  In practice the below supports the majority
+ * of parameter locations.
  */
-static int parameter__reg(Dwarf_Attribute *attr, int expected_reg)
+static int parameter__locs(Dwarf_Die *die, Dwarf_Attribute *attr, struct parameter *parm)
 {
-	Dwarf_Addr base, start, end;
-	Dwarf_Op *expr, *entry_ops;
-	Dwarf_Attribute entry_attr;
-	size_t exprlen, entry_len;
+	Dwarf_Addr base, start, end, first = -1;
+	Dwarf_Attribute next_attr;
 	ptrdiff_t offset = 0;
-	int loc_num = -1;
+	Dwarf_Op *expr;
+	size_t exprlen;
 	int ret = -1;
 
+	/* parameter__locs() can be called recursively, but at toplevel
+	 * die is non-NULL signalling we need to look up loc/const attrs.
+	 */
+	if (die) {
+		if (dwarf_attr(die, DW_AT_const_value, attr) != NULL) {
+			parm->has_loc = 1;
+			parm->optimized = 1;
+			parm->locs[0].is_const = 1;
+			parm->nlocs = 1;
+			parm->locs[0].size = 8;
+			parm->locs[0].value = attr_numeric(die, DW_AT_const_value);
+			return 0;
+		}
+		if (dwarf_attr(die, DW_AT_location, attr) == NULL)
+			return 0;
+	}
+
 	/* use libdw__lock as dwarf_getlocation(s) has concurrency issues
 	 * when libdw is not compiled with experimental --enable-thread-safety
 	 */
 	pthread_mutex_lock(&libdw__lock);
 	while ((offset = __dwarf_getlocations(attr, offset, &base, &start, &end, &expr, &exprlen)) > 0) {
-		loc_num++;
+		/* We only want location info referring to start of function;
+		 * assumes we get location info in address order; empirically
+		 * this is the case.  Only exception is DW_OP_*entry_value
+		 * location info which always refers to the value on entry.
+		 */
+		if (first == -1)
+			first = start;
 
 		/* Convert expression list (XX DW_OP_stack_value) -> (XX).
 		 * DW_OP_stack_value instructs interpreter to pop current value from
@@ -1216,33 +1241,154 @@ static int parameter__reg(Dwarf_Attribute *attr, int expected_reg)
 		if (exprlen > 1 && expr[exprlen - 1].atom == DW_OP_stack_value)
 			exprlen--;
 
-		if (exprlen != 1)
-			continue;
+		if (exprlen > 1) {
+			/* ignore complex exprs not at start of function,
+			 * but bail if we hit a complex loc expr at the start.
+			 */
+			if (start != first)
+				continue;
+			ret = -1;
+			goto out;
+		}
 
 		switch (expr->atom) {
-		/* match DW_OP_regXX at first location */
+		case DW_OP_deref:
+			if (parm->nlocs > 0)
+				parm->locs[parm->nlocs - 1].is_deref = 1;
+			else
+				ret = -1;
+			break;
 		case DW_OP_reg0 ... DW_OP_reg31:
-			if (loc_num != 0)
+			if (start != first || parm->nlocs > 1)
+				break;
+			/* avoid duplicate location value */
+			if (parm->nlocs > 0 && parm->locs[parm->nlocs - 1].reg ==
+					       (expr->atom - DW_OP_reg0))
+				break;
+			parm->locs[parm->nlocs].reg = expr->atom - DW_OP_reg0;
+			parm->locs[parm->nlocs].is_deref = 0;
+			parm->locs[parm->nlocs].size = 8;
+			parm->locs[parm->nlocs++].offset = 0;
+			ret = 0;
+			break;
+		case DW_OP_fbreg:
+		case DW_OP_breg0 ... DW_OP_breg31:
+			if (start != first || parm->nlocs > 1)
 				break;
-			ret = expr->atom;
-			if (ret == expected_reg)
-				goto out;
+			/* avoid duplicate location value */
+			if (parm->nlocs > 0 && parm->locs[parm->nlocs - 1].reg ==
+					       (expr->atom - DW_OP_breg0)) {
+				if (parm->locs[parm->nlocs - 1].offset != expr->offset)
+					ret = -1;
+				break;
+			}
+			parm->locs[parm->nlocs].reg = expr->atom - DW_OP_breg0;
+			parm->locs[parm->nlocs].is_deref = 1;
+			parm->locs[parm->nlocs].size = 8;
+			parm->locs[parm->nlocs++].offset = expr->offset;
+			ret = 0;
+			break;
+		case DW_OP_lit0 ... DW_OP_lit31:
+			if (start != first)
+				break;
+
+			if (parm->nlocs > 0 && (expr->atom - DW_OP_lit0) ==
+					       parm->locs[parm->nlocs - 1].value)
+				break;
+			parm->locs[parm->nlocs].is_const = 1;
+			parm->locs[parm->nlocs].size = 1;
+			parm->locs[parm->nlocs++].value = expr->atom - DW_OP_lit0;
+			ret = 0;
+			break;
+		case DW_OP_const1u ... DW_OP_consts:
+			if (start != first)
+				break;
+			if (parm->nlocs > 0 && (parm->locs[parm->nlocs - 1].is_const &&
+			    expr->number == parm->locs[parm->nlocs - 1].value))
+				break;
+			parm->locs[parm->nlocs].is_const = 1;
+			parm->locs[parm->nlocs].value = expr->number;
+			switch (expr->atom) {
+			case DW_OP_const1u:
+				parm->locs[parm->nlocs].size = 1;
+				break;
+			case DW_OP_const1s:
+				parm->locs[parm->nlocs].size = -1;
+				break;
+			case DW_OP_const2u:
+				parm->locs[parm->nlocs].size = 2;
+				break;
+			case DW_OP_const2s:
+				parm->locs[parm->nlocs].size = -2;
+				break;
+			case DW_OP_const4u:
+				parm->locs[parm->nlocs].size = 4;
+				break;
+			case DW_OP_const4s:
+				parm->locs[parm->nlocs].size = -4;
+				break;
+			case DW_OP_const8u:
+			case DW_OP_constu:
+				parm->locs[parm->nlocs].size = 8;
+				break;
+			case DW_OP_const8s:
+			case DW_OP_consts:
+				parm->locs[parm->nlocs].size = -8;
+				break;
+			}
+			parm->nlocs++;
+			ret = 0;
+			break;
+		case DW_OP_addr:
+			if (start != first || parm->nlocs > 0)
+				break;
+			parm->locs[parm->nlocs].is_const = 1;
+			parm->locs[parm->nlocs].is_addr = 1;
+			parm->locs[parm->nlocs].size = 8;
+			parm->locs[parm->nlocs++].value = expr->number;
+			ret = 0;
 			break;
-		/* match DW_OP_entry_value(DW_OP_regXX) at any location */
 		case DW_OP_entry_value:
 		case DW_OP_GNU_entry_value:
-			if (dwarf_getlocation_attr(attr, expr, &entry_attr) == 0 &&
-			    dwarf_getlocation(&entry_attr, &entry_ops, &entry_len) == 0 &&
-			    entry_len == 1) {
-				ret = entry_ops->atom;
-				if (ret == expected_reg)
-					goto out;
+			/* Match DW_OP_entry_value(DW_OP_regXX) at any offset
+			 * in function since it always describes value on entry.
+			 */
+			if (dwarf_getlocation_attr(attr, expr, &next_attr) == 0) {
+				pthread_mutex_unlock(&libdw__lock);
+				return parameter__locs(NULL, &next_attr, parm);
 			}
+			ret = -1;
+			break;
+		case DW_OP_implicit_pointer:
+			if (start != first)
+				break;
+			if (dwarf_getlocation_implicit_pointer(attr, expr, &next_attr) == 0) {
+				pthread_mutex_unlock(&libdw__lock);
+				return parameter__locs(NULL, &next_attr, parm);
+			}
+			ret = -1;
+			break;
+		case DW_OP_implicit_value:
+			if (start != first)
+				break;
+			if (dwarf_getlocation_attr(attr, expr, &next_attr) == 0) {
+				pthread_mutex_unlock(&libdw__lock);
+				return parameter__locs(NULL, &next_attr, parm);
+			}
+			ret = -1;
+			break;
+		default:
+			/* unhandled op */
+			ret = -1;
 			break;
 		}
+		if (ret == -1)
+			break;
 	}
 out:
 	pthread_mutex_unlock(&libdw__lock);
+	if (ret == 0)
+		parm->has_loc = 1;
 	return ret;
 }
 
@@ -1250,10 +1396,11 @@ static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu,
 					struct conf_load *conf, int param_idx)
 {
 	struct parameter *parm = tag__alloc(cu, sizeof(*parm));
+	int ret;
 
 	if (parm != NULL) {
-		bool has_const_value;
 		Dwarf_Attribute attr;
+		int expected_reg;
 
 		tag__init(&parm->tag, cu, die);
 		parm->name = attr_string(die, DW_AT_name, conf);
@@ -1293,28 +1440,31 @@ static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu,
 		 * between these parameter representations.  See
 		 * ftype__recode_dwarf_types() below for how this is handled.
 		 */
-		has_const_value = dwarf_attr(die, DW_AT_const_value, &attr) != NULL;
-		parm->has_loc = dwarf_attr(die, DW_AT_location, &attr) != NULL;
-
-		if (parm->has_loc) {
-			struct location location;
-			attr_location(die, &location.expr, &location.exprlen);
-
-			int expected_reg = cu->register_params[param_idx];
-			int actual_reg = parameter__reg(&attr, expected_reg);
-
-			if (actual_reg < 0)
-				parm->optimized = 1;
-			else if (expected_reg >= 0 && expected_reg != actual_reg)
-				/* mark parameters that use an unexpected
-				 * register to hold a parameter; these will
-				 * be problematic for users of BTF as they
-				 * violate expectations about register
-				 * contents.
-				 */
-				parm->unexpected_reg = 1;
-		} else if (has_const_value) {
+		expected_reg = cu->register_params[param_idx];
+		if (expected_reg >= DW_OP_reg0)
+			expected_reg -= DW_OP_reg0;
+
+		ret = parameter__locs(die, &attr, parm);
+
+		if (!parm->has_loc)
+			return parm;
+
+		if (ret < 0) {
+			/* undecipherable location */
 			parm->optimized = 1;
+			return parm;
+		}
+		if (parm->locs[0].is_const) {
+			parm->optimized = 1;
+		} else if (expected_reg >= 0 &&
+			   expected_reg != parm->locs[0].reg) {
+			/* mark parameters that use an unexpected
+			 * register to hold a parameter; these will
+			 * be problematic for users of BTF as they
+			 * violate expectations about register
+			 * contents.
+			 */
+			parm->unexpected_reg = 1;
 		}
 	}
 
@@ -1377,10 +1527,14 @@ static struct inline_expansion *inline_expansion__new(Dwarf_Die *die, struct cu
 		dwarf_tag__set_attr_type(dtag, type, die, DW_AT_abstract_origin);
 
 		Dwarf_Attribute attr_orig;
+		exp->name = 0;
 		if (dwarf_attr(die, DW_AT_abstract_origin, &attr_orig)) {
 			Dwarf_Die die_orig;
-			if (dwarf_formref_die(&attr_orig, &die_orig)) {
-				exp->name = attr_string(&die_orig, DW_AT_name, conf);
+
+			if (dwarf_formref_die(&attr_orig, &die_orig) &&
+			    dwarf_tag(&die_orig) == DW_TAG_subprogram) {
+				if (dwarf_hasattr(&die_orig, DW_AT_name))
+					exp->name = attr_string(&die_orig, DW_AT_name, conf);
 			}
 		}
 
@@ -2686,12 +2840,12 @@ static void inline_expansion__recode_dwarf_types(struct tag *tag, struct cu *cu)
 	 * in fact an abtract origin, i.e. must be looked up in the tags_table,
 	 * not in the types_table.
 	 */
-	struct dwarf_tag *ftype = NULL;
+	struct dwarf_tag *function = NULL;
 	if (dtag->type != 0)
-		ftype = dwarf_cu__find_tag_by_ref(dcu, dtag, type);
+		function = dwarf_cu__find_tag_by_ref(dcu, dtag, type);
 	else
-		ftype = dwarf_cu__find_tag_by_ref(dcu, dtag, abstract_origin);
-	if (ftype == NULL) {
+		function = dwarf_cu__find_tag_by_ref(dcu, dtag, abstract_origin);
+	if (function == NULL) {
 		if (dtag->type != 0)
 			tag__print_type_not_found(tag);
 		else
@@ -2699,13 +2853,14 @@ static void inline_expansion__recode_dwarf_types(struct tag *tag, struct cu *cu)
 		return;
 	}
 
-	ftype__recode_dwarf_types(dtag__tag(ftype), cu);
+	ftype__recode_dwarf_types(dtag__tag(function), cu);
 
 	struct tag *pos;
 	struct inline_expansion *exp = tag__inline_expansion(tag);
 	list_for_each_entry(pos, &exp->parms, node)
 		parameter__recode_dwarf_type(tag__parameter(pos), cu);
-	exp->ip.tag.type = ftype->small_id;
+	exp->ip.tag.type = function->small_id;
+	exp->function = tag__function(dtag__tag(function));
 }
 
 static void lexblock__recode_dwarf_types(struct lexblock *tag, struct cu *cu)
@@ -2983,6 +3138,14 @@ static int cu__resolve_func_ret_types_optimized(struct cu *cu)
 		struct function *fn = tag__function(tag);
 		bool has_unexpected_reg = false, has_struct_param = false;
 
+		/* Inlined function representations likely have parameters
+		 * in wrong locations; ensure they do not contribute to
+		 * classification of unexpected regs for a function that
+		 * is partially inlined.
+		 */
+		if (fn->inlined)
+			continue;
+
 		/* mark function as optimized if parameter is, or
 		 * if parameter does not have a location; at this
 		 * point location presence has been marked in
diff --git a/dwarves.h b/dwarves.h
index 284bc02..d6efdd0 100644
--- a/dwarves.h
+++ b/dwarves.h
@@ -481,6 +481,19 @@ bool languages__cu_filtered(struct languages *languages, struct cu *cu, bool ver
 			continue;			\
 		else
 
+/**
+ * cu__for_each_inline_expansion - iterate thru all inline expansions
+ * @cu: struct cu instance to iterate
+ * @pos: struct tag iterator
+ * @id: uint32_t tag id
+ */
+#define cu__for_each_inline_expansion(cu, id, pos)	\
+	for (id = 0; id < cu->tags_table.nr_entries; ++id) \
+		if (!tag__is_inline_expansion(cu->tags_table.entries[id]) || \
+		    !(pos = tag__inline_expansion(cu->tags_table.entries[id])))\
+			continue;			\
+		else
+
 int cu__add_tag(struct cu *cu, struct tag *tag, uint32_t *id);
 int cu__add_tag_with_id(struct cu *cu, struct tag *tag, uint32_t id);
 int cu__table_add_tag(struct cu *cu, struct tag *tag, uint32_t *id);
@@ -615,6 +628,11 @@ static inline bool tag__is_restrict(const struct tag *tag)
 	return tag->tag == DW_TAG_restrict_type;
 }
 
+static inline bool tag__is_inline_expansion(const struct tag *tag)
+{
+	return tag->tag == DW_TAG_inlined_subroutine;
+}
+
 static inline int tag__is_modifier(const struct tag *tag)
 {
 	return tag__is_const(tag) ||
@@ -821,13 +839,22 @@ struct ip_tag {
 
 struct inline_expansion {
 	struct ip_tag	 ip;
-	const char	 *name;
+	const char	*name;
 	size_t		 size;
 	uint64_t	 high_pc;
 	struct list_head parms;
 	uint16_t   nr_parms;
+	struct function	*function;
 };
 
+/**
+ * inline_expansion__for_each_parameter - iterate thru all the parameters
+ * @ie: struct inline_expansion instance to iterate
+ * @pos: struct parameter iterator
+ */
+#define inline_expansion__for_each_parameter(ie, pos) \
+	list_for_each_entry(pos, &(ie)->parms, tag.node)
+
 static inline struct inline_expansion *
 				tag__inline_expansion(const struct tag *tag)
 {
@@ -944,13 +971,30 @@ size_t lexblock__fprintf(const struct lexblock *lexblock, const struct cu *cu,
 			 struct function *function, uint16_t indent,
 			 const struct conf_fprintf *conf, FILE *fp);
 
+
+struct parameter_loc {
+	uint8_t is_const:1;
+	uint8_t is_deref:1;
+	uint8_t is_addr:1;
+	int8_t size;
+	union {
+		struct {
+			uint16_t reg;
+			uint16_t flags;
+			uint32_t offset;
+		};
+		uint64_t value;
+	};
+};
+
 struct parameter {
 	struct tag tag;
 	const char *name;
-	struct location location;
 	uint8_t optimized:1;
 	uint8_t unexpected_reg:1;
 	uint8_t has_loc:1;
+	struct parameter_loc locs[2];	/* multiple locs may be used */
+	uint8_t nlocs;
 };
 
 static inline struct parameter *tag__parameter(const struct tag *tag)
-- 
2.39.3


  parent reply	other threads:[~2025-10-24  7:34 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-24  7:33 [RFC dwarves 0/5] pahole: support BTF inline encoding Alan Maguire
2025-10-24  7:33 ` [RFC dwarves 1/5] dwarf_loader: Add parameters list to inlined expansion Alan Maguire
2025-10-24  7:33 ` [RFC dwarves 2/5] dwarf_loader: Add name to inline expansion Alan Maguire
2025-10-24  7:33 ` Alan Maguire [this message]
2025-10-24 17:55   ` [RFC dwarves 3/5] dwarf_loader: Collect inline expansion location information Eduard Zingerman
2025-10-29 17:40     ` Alan Maguire
2025-10-29 18:32       ` Eduard Zingerman
2025-10-29 18:46         ` Alan Maguire
2025-10-24  7:33 ` [RFC dwarves 4/5] btf_encoder: Support encoding of inline " Alan Maguire
2025-10-24 18:04   ` Eduard Zingerman
2025-10-24  7:33 ` [RFC dwarves 5/5] pahole: Support inline encoding with inline[.extra] BTF feature Alan Maguire

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251024073328.370457-4-alan.maguire@oracle.com \
    --to=alan.maguire@oracle.com \
    --cc=acme@kernel.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=david.faust@oracle.com \
    --cc=dwarves@vger.kernel.org \
    --cc=eddyz87@gmail.com \
    --cc=haoluo@google.com \
    --cc=ihor.solodrai@linux.dev \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=jose.marchesi@oracle.com \
    --cc=kpsingh@kernel.org \
    --cc=martin.lau@linux.dev \
    --cc=qmo@kernel.org \
    --cc=sdf@fomichev.me \
    --cc=song@kernel.org \
    --cc=ttreyer@meta.com \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox