* [PATCH dwarves v6 1/5] dwarf_loader: Detect aggregate ABI register usage and signature changes
2026-06-18 1:13 [PATCH dwarves v6 0/5] pahole: Encode true signatures in kernel BTF Yonghong Song
@ 2026-06-18 1:14 ` Yonghong Song
2026-06-18 1:14 ` [PATCH dwarves v6 2/5] dwarf_loader: Collect per-parameter information Yonghong Song
` (4 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Yonghong Song @ 2026-06-18 1:14 UTC (permalink / raw)
To: Alan Maguire, Arnaldo Carvalho de Melo, dwarves
Cc: Alexei Starovoitov, Andrii Nakryiko, bpf, kernel-team
Aggregate ABI register usage applies for both clang and gcc.
The signature change detection is clang only.
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
dwarf_loader.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++----
dwarves.h | 3 +++
2 files changed, 61 insertions(+), 4 deletions(-)
diff --git a/dwarf_loader.c b/dwarf_loader.c
index 16fb7be..ad1dc94 100644
--- a/dwarf_loader.c
+++ b/dwarf_loader.c
@@ -1100,6 +1100,16 @@ static void arch__set_register_params(const GElf_Ehdr *ehdr, struct cu *cu)
}
}
+static bool arch__agg_use_two_regs(const GElf_Ehdr *ehdr)
+{
+ switch (ehdr->e_machine) {
+ case EM_S390:
+ return false;
+ default:
+ return true;
+ }
+}
+
static struct template_type_param *template_type_param__new(Dwarf_Die *die, struct cu *cu, struct conf_load *conf)
{
struct template_type_param *ttparm = tag__alloc(cu, sizeof(*ttparm));
@@ -1502,6 +1512,29 @@ static struct ftype *ftype__new(Dwarf_Die *die, struct cu *cu)
return ftype;
}
+static bool function__signature_changed(struct function *func, Dwarf_Die *die)
+{
+ /* The inlined DW_TAG_subprogram typically has the original source type for
+ * abstract origin of a concrete function with address range, inlined subroutine,
+ * or call site.
+ */
+ if (func->inlined)
+ return false;
+
+ if (!func->abstract_origin)
+ return attr_numeric(die, DW_AT_calling_convention) == DW_CC_nocall;
+
+ Dwarf_Attribute attr;
+ if (dwarf_attr(die, DW_AT_abstract_origin, &attr)) {
+ Dwarf_Die origin;
+ if (dwarf_formref_die(&attr, &origin))
+ return attr_numeric(&origin, DW_AT_calling_convention) == DW_CC_nocall;
+ }
+
+ /* This should not happen */
+ return false;
+}
+
static struct function *function__new(Dwarf_Die *die, struct cu *cu, struct conf_load *conf)
{
struct function *func = tag__alloc(cu, sizeof(*func));
@@ -2392,10 +2425,17 @@ static struct tag *die__create_new_function(Dwarf_Die *die, struct cu *cu, struc
{
struct function *function = function__new(die, cu, conf);
- if (function != NULL &&
- die__process_function(die, &function->proto, &function->lexblock, cu, conf) != 0) {
- function__delete(function, cu);
- function = NULL;
+ if (function != NULL) {
+ /* For clang, we determine if function signature changes via DW_AT_calling_convention
+ * set to DW_CC_nocall.
+ */
+ if (cu->producer_clang)
+ function->proto.signature_changed = function__signature_changed(function, die);
+
+ if (die__process_function(die, &function->proto, &function->lexblock, cu, conf) != 0) {
+ function__delete(function, cu);
+ function = NULL;
+ }
}
return function ? &function->proto.tag : NULL;
@@ -3045,6 +3085,17 @@ static unsigned long long dwarf_tag__orig_id(const struct tag *tag,
return cu->extra_dbg_info ? dtag->id : 0;
}
+static bool attr_producer_clang(Dwarf_Die *die)
+{
+ const char *producer;
+
+ producer = attr_string(die, DW_AT_producer, NULL);
+ if (!producer)
+ return false;
+
+ return !!strstr(producer, "clang");
+}
+
struct debug_fmt_ops dwarf__ops;
static int die__process(Dwarf_Die *die, struct cu *cu, struct conf_load *conf)
@@ -3082,6 +3133,7 @@ static int die__process(Dwarf_Die *die, struct cu *cu, struct conf_load *conf)
}
cu->language = attr_numeric(die, DW_AT_language);
+ cu->producer_clang = attr_producer_clang(die);
if (conf->early_cu_filter)
cu = conf->early_cu_filter(cu);
@@ -3300,6 +3352,7 @@ static int cu__set_common(struct cu *cu, struct conf_load *conf,
cu->little_endian = ehdr.e_ident[EI_DATA] == ELFDATA2LSB;
cu->nr_register_params = arch__nr_register_params(&ehdr);
+ cu->agg_use_two_regs = arch__agg_use_two_regs(&ehdr);
arch__set_register_params(&ehdr, cu);
return 0;
}
@@ -3841,6 +3894,7 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
cu->priv = dcu;
cu->dfops = &dwarf__ops;
cu->language = attr_numeric(cu_die, DW_AT_language);
+ cu->producer_clang = attr_producer_clang(cu_die);
cus__add(cus, cu);
}
diff --git a/dwarves.h b/dwarves.h
index 5ec16e7..fcc3976 100644
--- a/dwarves.h
+++ b/dwarves.h
@@ -306,6 +306,8 @@ struct cu {
uint8_t has_addr_info:1;
uint8_t uses_global_strings:1;
uint8_t little_endian:1;
+ uint8_t producer_clang:1;
+ uint8_t agg_use_two_regs:1; /* An aggregate like {long a; long b;} */
uint8_t nr_register_params;
int register_params[ARCH_MAX_REGISTER_PARAMS];
int functions_saved;
@@ -1030,6 +1032,7 @@ struct ftype {
uint8_t inconsistent_proto:1;
uint8_t uncertain_parm_loc:1;
uint8_t reordered_parm:1;
+ uint8_t signature_changed:1;
struct list_head template_type_params;
struct list_head template_value_params;
struct template_parameter_pack *template_parameter_pack;
--
2.53.0-Meta
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH dwarves v6 2/5] dwarf_loader: Collect per-parameter information
2026-06-18 1:13 [PATCH dwarves v6 0/5] pahole: Encode true signatures in kernel BTF Yonghong Song
2026-06-18 1:14 ` [PATCH dwarves v6 1/5] dwarf_loader: Detect aggregate ABI register usage and signature changes Yonghong Song
@ 2026-06-18 1:14 ` Yonghong Song
2026-06-18 1:14 ` [PATCH dwarves v6 3/5] dwarf_loader: Analyze per-parameter information for true signatures Yonghong Song
` (3 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Yonghong Song @ 2026-06-18 1:14 UTC (permalink / raw)
To: Alan Maguire, Arnaldo Carvalho de Melo, dwarves
Cc: Alexei Starovoitov, Andrii Nakryiko, bpf, kernel-team
Scan all parameters and save necessary information in struct
parameter and such information will be used in the next patch
for analysis.
The collected per-parameter information includes
- whether the parameter is const value or not
- whether the parameter is a DW_OP_fbreg (location stack) or not
- the location register for this parameter
- the type byte size for this parameter (from parameter type)
- whether the parameter is passed in memory
- whether the parameter needs to two registers
- If the source parameter needs 2 registers but the actual
parameter (after optimization) only needs 1 register and only
one field is used, record true_sig_member name and type.
Such information is also propagated to abstract-origin parameters in
ftype__recode_dwarf_types().
parameter__new() now only decodes this location state; the optimized and
unexpected_reg decisions that parameter__reg() used to drive are made by
the function-level analysis pass added in the next commit, which consumes
the decoded fields.
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
dwarf_loader.c | 335 +++++++++++++++++++++++++++++++++++++++++--------
dwarves.h | 11 ++
2 files changed, 292 insertions(+), 54 deletions(-)
diff --git a/dwarf_loader.c b/dwarf_loader.c
index ad1dc94..ae4ac2f 100644
--- a/dwarf_loader.c
+++ b/dwarf_loader.c
@@ -1200,14 +1200,231 @@ static ptrdiff_t __dwarf_getlocations(Dwarf_Attribute *attr,
return ret;
}
-/* For DW_AT_location 'attr':
- * - if first location is DW_OP_regXX with expected number, return the register;
- * otherwise save the register for later return
- * - if location DW_OP_entry_value(DW_OP_regXX) with expected number is in the
- * list, return the register; otherwise save register for later return
- * - otherwise if no register was found for locations, return -1.
+#define PARAMETER_UNKNOWN_REG -1
+
+static int __get_type_byte_size(Dwarf_Die *die, struct cu *cu)
+{
+ Dwarf_Attribute attr;
+ if (dwarf_attr(die, DW_AT_type, &attr) == NULL)
+ return 0;
+
+ Dwarf_Die type_die;
+ if (dwarf_formref_die(&attr, &type_die) == NULL)
+ return 0;
+
+ /* A type does not have byte_size.
+ * 0x000dac83: DW_TAG_formal_parameter
+ DW_AT_location (indexed (0x385) loclist = 0x00016175:
+ [0xffff800080098cb0, 0xffff800080098cb4): DW_OP_breg8 W8+0
+ [0xffff800080098cb4, 0xffff800080098ff4): DW_OP_breg31 WSP+16, DW_OP_deref
+ [0xffff800080099054, 0xffff80008009908c): DW_OP_breg31 WSP+16, DW_OP_deref)
+ DW_AT_name ("ubuf")
+ DW_AT_decl_file ("/home/yhs/work/bpf-next/arch/arm64/kernel/ptrace.c")
+ DW_AT_decl_line (886)
+ DW_AT_type (0x000d467e "const void *")
+
+ * 0x000d467e: DW_TAG_pointer_type
+ DW_AT_type (0x000c4320 "const void")
+
+ * 0x000c4320: DW_TAG_const_type
+ */
+ if (dwarf_tag(&type_die) == DW_TAG_pointer_type)
+ return cu->addr_size;
+
+ uint64_t bsize = attr_numeric(&type_die, DW_AT_byte_size);
+ if (bsize == 0)
+ return __get_type_byte_size(&type_die, cu);
+
+ return bsize;
+}
+
+static int get_type_byte_size(Dwarf_Die *die, struct cu *cu)
+{
+ int byte_size = 0;
+
+ Dwarf_Attribute attr;
+ if (dwarf_attr(die, DW_AT_abstract_origin, &attr)) {
+ Dwarf_Die origin;
+ if (dwarf_formref_die(&attr, &origin))
+ byte_size = __get_type_byte_size(&origin, cu);
+ } else {
+ byte_size = __get_type_byte_size(die, cu);
+ }
+ return byte_size;
+}
+
+/* Traverse the parameter type until finding the member type which has expected
+ * struct type offset.
+ */
+static Dwarf_Die *get_member_with_offset(Dwarf_Die *die, int offset, Dwarf_Die *member_die)
+{
+ Dwarf_Attribute attr;
+ if (dwarf_attr(die, DW_AT_type, &attr) == NULL)
+ return NULL;
+
+ Dwarf_Die type_die;
+ if (dwarf_formref_die(&attr, &type_die) == NULL)
+ return NULL;
+
+ uint64_t bsize = attr_numeric(&type_die, DW_AT_byte_size);
+ if (bsize == 0)
+ return get_member_with_offset(&type_die, offset, member_die);
+
+ if (dwarf_tag(&type_die) != DW_TAG_structure_type)
+ return NULL;
+
+ if (!dwarf_haschildren(&type_die) || dwarf_child(&type_die, member_die) != 0)
+ return NULL;
+ do {
+ if (dwarf_tag(member_die) != DW_TAG_member)
+ continue;
+
+ Dwarf_Attribute attr;
+ Dwarf_Off bit_offset;
+
+ if (dwarf_attr(member_die, DW_AT_data_bit_offset, &attr) != NULL)
+ bit_offset = __attr_offset(&attr);
+ else if (dwarf_attr(member_die, DW_AT_data_member_location, &attr) != NULL)
+ bit_offset = __attr_offset(&attr) * 8;
+ else
+ continue;
+
+ if (bit_offset == offset * 8)
+ return member_die;
+ } while (dwarf_siblingof(member_die, member_die) == 0);
+
+ return NULL;
+}
+
+static bool dwarf_op__is_reg(unsigned int atom)
+{
+ return atom >= DW_OP_reg0 && atom <= DW_OP_reg31;
+}
+
+static bool dwarf_expr__has_stack_value(Dwarf_Op *expr, size_t exprlen)
+{
+ for (size_t i = 1; i < exprlen; i++) {
+ if (expr[i].atom == DW_OP_stack_value)
+ return true;
+ }
+ return false;
+}
+
+static void parameter__set_loc_reg(struct parameter *parm, int reg)
+{
+ if (parm->loc_reg == PARAMETER_UNKNOWN_REG)
+ parm->loc_reg = reg;
+}
+
+static void parameter__set_field_bit(unsigned long *fields, int byte_offset)
+{
+ if (byte_offset >= 0 && byte_offset < (int)(sizeof(*fields) * 8))
+ *fields |= 1UL << byte_offset;
+}
+
+static void parameter__record_true_sig_member(struct parameter *parm, Dwarf_Die *die,
+ int field_offset, struct conf_load *conf)
+{
+ Dwarf_Die member_die;
+
+ if (parm->true_sig_member_name)
+ return;
+ if (!parm->name)
+ return;
+ if (!get_member_with_offset(die, field_offset, &member_die))
+ return;
+
+ parm->true_sig_member_name = attr_string(&member_die, DW_AT_name, conf);
+ if (!parm->true_sig_member_name)
+ return;
+
+ parm->true_sig_type_from_types = attr_type(&member_die, DW_AT_type, &parm->true_sig_type);
+ if (parm->true_sig_type == 0)
+ parm->true_sig_member_name = NULL;
+}
+
+static void parameter__finish_piece_decode(struct parameter *parm, Dwarf_Die *die,
+ struct conf_load *conf, struct cu *cu)
+{
+ unsigned long first = parm->first_reg_fields;
+ unsigned long second = parm->second_reg_fields;
+ int field_offset;
+
+ if (!first && !second)
+ return;
+ if (first && second)
+ return;
+ if (__builtin_popcountl(first) >= 2 || __builtin_popcountl(second) >= 2)
+ return;
+
+ if (__builtin_popcountl(first) == 1)
+ field_offset = __builtin_ctzl(first);
+ else
+ field_offset = cu->addr_size + __builtin_ctzl(second);
+
+ parameter__record_true_sig_member(parm, die, field_offset, conf);
+}
+
+/* For aggregate parameters represented by pieces, first_reg_fields and
+ * second_reg_fields record the byte offsets materialized in each ABI register.
+ * The later function-level pass decides whether the source aggregate is still
+ * ABI-preserved or should be replaced by the single used member candidate.
*/
-static int parameter__reg(Dwarf_Attribute *attr, int expected_reg)
+static void parameter__multi_exprs(Dwarf_Op *expr, int loc_num, struct cu *cu,
+ size_t exprlen, struct parameter *parm)
+{
+ switch (expr[0].atom) {
+ case DW_OP_lit0 ... DW_OP_lit31:
+ case DW_OP_constu:
+ case DW_OP_consts:
+ if (loc_num == 0)
+ parm->loc_const_value = 1;
+ return;
+ }
+
+ if (parm->type_byte_size <= cu->addr_size || !cu->agg_use_two_regs) {
+ switch (expr[0].atom) {
+ case DW_OP_reg0 ... DW_OP_reg31:
+ if (loc_num == 0)
+ parameter__set_loc_reg(parm, expr[0].atom);
+ return;
+ case DW_OP_breg0 ... DW_OP_breg31:
+ if (loc_num == 0 && dwarf_expr__has_stack_value(expr, exprlen))
+ parameter__set_loc_reg(parm, expr[0].atom - DW_OP_breg0 + DW_OP_reg0);
+ return;
+ default:
+ return;
+ }
+ }
+
+ int off = 0;
+ for (size_t i = 0; i < exprlen; i++) {
+ if (expr[i].atom == DW_OP_piece) {
+ int num = expr[i].number;
+
+ if (i == 0) {
+ off = num;
+ continue;
+ }
+
+ if (off < cu->addr_size)
+ parameter__set_field_bit(&parm->first_reg_fields, off);
+ else
+ parameter__set_field_bit(&parm->second_reg_fields, off - cu->addr_size);
+ off += num;
+ } else if (dwarf_op__is_reg(expr[i].atom)) {
+ if (off < cu->addr_size || parm->loc_reg == PARAMETER_UNKNOWN_REG)
+ parameter__set_loc_reg(parm, expr[i].atom);
+ }
+ /* FIXME: not handling DW_OP_bregX pieces yet since we do not
+ * have a use case for it yet in the Linux kernel.
+ */
+ }
+}
+
+static void parameter__decode_location(Dwarf_Attribute *attr, struct conf_load *conf,
+ struct cu *cu, Dwarf_Die *die,
+ struct parameter *parm)
{
Dwarf_Addr base, start, end;
Dwarf_Op *expr, *entry_ops;
@@ -1215,50 +1432,55 @@ static int parameter__reg(Dwarf_Attribute *attr, int expected_reg)
size_t exprlen, entry_len;
ptrdiff_t offset = 0;
int loc_num = -1;
- int ret = -1;
- /* use libdw__lock as dwarf_getlocation(s) has concurrency issues
- * when libdw is not compiled with experimental --enable-thread-safety
- */
pthread_mutex_lock(&libdw__lock);
while ((offset = __dwarf_getlocations(attr, offset, &base, &start, &end, &expr, &exprlen)) > 0) {
+ bool had_stack_value;
+
loc_num++;
+ if (exprlen == 0)
+ continue;
- /* Convert expression list (XX DW_OP_stack_value) -> (XX).
- * DW_OP_stack_value instructs interpreter to pop current value from
- * DWARF expression evaluation stack, and thus is not important here.
- */
- if (exprlen > 1 && expr[exprlen - 1].atom == DW_OP_stack_value)
+ had_stack_value = expr[exprlen - 1].atom == DW_OP_stack_value;
+ if (exprlen == 2 && had_stack_value)
exprlen--;
- if (exprlen != 1)
+ if (exprlen != 1) {
+ parameter__multi_exprs(expr, loc_num, cu, exprlen, parm);
continue;
+ }
switch (expr->atom) {
- /* match DW_OP_regXX at first location */
case DW_OP_reg0 ... DW_OP_reg31:
- if (loc_num != 0)
- break;
- ret = expr->atom;
- if (ret == expected_reg)
- goto out;
+ if (loc_num == 0)
+ parameter__set_loc_reg(parm, expr->atom);
+ break;
+ case DW_OP_breg0 ... DW_OP_breg31:
+ if (loc_num == 0 && had_stack_value)
+ parameter__set_loc_reg(parm, expr->atom - DW_OP_breg0 + DW_OP_reg0);
+ break;
+ case DW_OP_fbreg:
+ if (loc_num == 0)
+ parm->loc_stack = 1;
+ break;
+ case DW_OP_lit0 ... DW_OP_lit31:
+ case DW_OP_constu:
+ case DW_OP_consts:
+ if (loc_num == 0)
+ parm->loc_const_value = 1;
break;
- /* match DW_OP_entry_value(DW_OP_regXX) at any location */
case DW_OP_entry_value:
case DW_OP_GNU_entry_value:
if (dwarf_getlocation_attr(attr, expr, &entry_attr) == 0 &&
dwarf_getlocation(&entry_attr, &entry_ops, &entry_len) == 0 &&
- entry_len == 1) {
- ret = entry_ops->atom;
- if (ret == expected_reg)
- goto out;
- }
+ entry_len == 1 && dwarf_op__is_reg(entry_ops->atom))
+ parameter__set_loc_reg(parm, entry_ops->atom);
break;
}
}
-out:
pthread_mutex_unlock(&libdw__lock);
- return ret;
+
+ parameter__finish_piece_decode(parm, die, conf, cu);
}
static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu,
@@ -1267,14 +1489,16 @@ static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu,
struct parameter *parm = tag__alloc(cu, sizeof(*parm));
if (parm != NULL) {
- bool has_const_value;
Dwarf_Attribute attr;
tag__init(&parm->tag, cu, die);
parm->name = attr_string(die, DW_AT_name, conf);
parm->idx = param_idx;
- if (param_idx >= cu->nr_register_params || param_idx < 0)
- return parm;
+ parm->loc_reg = PARAMETER_UNKNOWN_REG;
+ parm->type_byte_size = get_type_byte_size(die, cu);
+ parm->passed_in_memory = parm->type_byte_size >
+ (cu->agg_use_two_regs ? 2 * cu->addr_size : cu->addr_size);
+
/* Parameters which use DW_AT_abstract_origin to point at
* the original parameter definition (with no name in the DIE)
* are the result of later DWARF generation during compilation
@@ -1308,26 +1532,10 @@ static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu,
* between these parameter representations. See
* ftype__recode_dwarf_types() below for how this is handled.
*/
- has_const_value = dwarf_attr(die, DW_AT_const_value, &attr) != NULL;
+ parm->has_const_value = dwarf_attr(die, DW_AT_const_value, &attr) != NULL;
parm->has_loc = dwarf_attr(die, DW_AT_location, &attr) != NULL;
-
- if (parm->has_loc) {
- int expected_reg = cu->register_params[param_idx];
- int actual_reg = parameter__reg(&attr, expected_reg);
-
- if (actual_reg < 0)
- parm->optimized = 1;
- else if (expected_reg >= 0 && expected_reg != actual_reg)
- /* mark parameters that use an unexpected
- * register to hold a parameter; these will
- * be problematic for users of BTF as they
- * violate expectations about register
- * contents.
- */
- parm->unexpected_reg = 1;
- } else if (has_const_value) {
- parm->optimized = 1;
- }
+ if (parm->has_loc)
+ parameter__decode_location(&attr, conf, cu, die, parm);
}
return parm;
@@ -2151,7 +2359,7 @@ out_enomem:
}
static int die__process_function(Dwarf_Die *die, struct ftype *ftype,
- struct lexblock *lexblock, struct cu *cu, struct conf_load *conf);
+ struct lexblock *lexblock, struct cu *cu, struct conf_load *conf);
static int die__create_new_lexblock(Dwarf_Die *die,
struct cu *cu, struct lexblock *father, struct conf_load *conf)
@@ -2687,6 +2895,25 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
*/
if (pos->has_loc)
opos->has_loc = pos->has_loc;
+ if (pos->has_const_value)
+ opos->has_const_value = pos->has_const_value;
+ if (pos->loc_const_value)
+ opos->loc_const_value = pos->loc_const_value;
+ if (pos->loc_stack)
+ opos->loc_stack = pos->loc_stack;
+ if (pos->loc_reg != PARAMETER_UNKNOWN_REG)
+ opos->loc_reg = pos->loc_reg;
+ if (pos->type_byte_size != 0)
+ opos->type_byte_size = pos->type_byte_size;
+ if (pos->passed_in_memory)
+ opos->passed_in_memory = pos->passed_in_memory;
+ opos->first_reg_fields |= pos->first_reg_fields;
+ opos->second_reg_fields |= pos->second_reg_fields;
+ if (pos->true_sig_member_name && !opos->true_sig_member_name) {
+ opos->true_sig_member_name = pos->true_sig_member_name;
+ opos->true_sig_type = pos->true_sig_type;
+ opos->true_sig_type_from_types = pos->true_sig_type_from_types;
+ }
if (pos->optimized)
opos->optimized = pos->optimized;
diff --git a/dwarves.h b/dwarves.h
index fcc3976..104c8c6 100644
--- a/dwarves.h
+++ b/dwarves.h
@@ -948,9 +948,20 @@ size_t lexblock__fprintf(const struct lexblock *lexblock, const struct cu *cu,
struct parameter {
struct tag tag;
const char *name;
+ const char *true_sig_member_name;
+ Dwarf_Off true_sig_type;
+ unsigned long first_reg_fields;
+ unsigned long second_reg_fields;
+ int loc_reg;
+ uint16_t type_byte_size;
+ uint8_t true_sig_type_from_types:1;
+ uint8_t has_const_value:1;
+ uint8_t loc_const_value:1;
+ uint8_t loc_stack:1;
uint8_t optimized:1;
uint8_t unexpected_reg:1;
uint8_t has_loc:1;
+ uint8_t passed_in_memory:1; /* too large for the ABI argument registers */
uint8_t idx;
};
--
2.53.0-Meta
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH dwarves v6 3/5] dwarf_loader: Analyze per-parameter information for true signatures
2026-06-18 1:13 [PATCH dwarves v6 0/5] pahole: Encode true signatures in kernel BTF Yonghong Song
2026-06-18 1:14 ` [PATCH dwarves v6 1/5] dwarf_loader: Detect aggregate ABI register usage and signature changes Yonghong Song
2026-06-18 1:14 ` [PATCH dwarves v6 2/5] dwarf_loader: Collect per-parameter information Yonghong Song
@ 2026-06-18 1:14 ` Yonghong Song
2026-06-18 1:14 ` [PATCH dwarves v6 4/5] btf_encoder: Emit true function signatures Yonghong Song
` (2 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Yonghong Song @ 2026-06-18 1:14 UTC (permalink / raw)
To: Alan Maguire, Arnaldo Carvalho de Melo, dwarves
Cc: Alexei Starovoitov, Andrii Nakryiko, bpf, kernel-team
Add a function-level pass, function__analyze_parameter_locations(), run
from cu__resolve_func_ret_types_optimized() which walks a function's
parameters in ABI argument-register order and consumes the location state
decoded by parameter__decode_location() in the previous commit. Each
parameter advances the expected-register index by the number of argument
registers it occupies (parameter__abi_slots(), e.g. a two-eightbyte
aggregate consumes two registers).
For every producer it keeps the existing bookkeeping, now driven by the
decoded fields:
- a parameter with no location, a constant value, or (for non-clang) no
register found is marked optimized out
- a parameter found in a register other than the expected one is marked
unexpected_reg
When true_signature is enabled for a signature-changed function it
reconstructs the real register-level signature:
- parameters that were optimized out are dropped from the signature
- a parameter whose location cannot be tied to its expected register,
wrong register, no register found, or a non-aggregate sitting on the
stack - marks the function unexpected_reg so no untrustworthy signature
is emitted;
- an aggregate genuinely passed on the stack (passed_in_memory) is kept;
- an aggregate split across registers via DW_OP_piece is kept whole when
it is fully used or the next parameter still lands on its expected
register, otherwise it is rewritten to the single member actually passed
in a register (true_sig_*).
Together with the decoding commit this replaces the previous inline,
per-parameter register check in parameter__new().
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
dwarf_loader.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 148 insertions(+), 3 deletions(-)
diff --git a/dwarf_loader.c b/dwarf_loader.c
index ae4ac2f..31498e5 100644
--- a/dwarf_loader.c
+++ b/dwarf_loader.c
@@ -2931,6 +2931,149 @@ static void ftype__recode_dwarf_types(struct tag *tag, struct cu *cu)
}
}
+static struct parameter *ftype__next_parameter(struct ftype *ftype, struct parameter *parm)
+{
+ if (parm->tag.node.next == &ftype->parms)
+ return NULL;
+ return list_entry(parm->tag.node.next, struct parameter, tag.node);
+}
+
+static int parameter__abi_slots(const struct parameter *parm, const struct cu *cu)
+{
+ int slots;
+
+ if (!cu->agg_use_two_regs || parm->type_byte_size <= cu->addr_size)
+ return 1;
+
+ slots = (parm->type_byte_size + cu->addr_size - 1) / cu->addr_size;
+ return slots > 0 ? slots : 1;
+}
+
+static bool parameter__has_piece_info(const struct parameter *parm)
+{
+ return parm->first_reg_fields || parm->second_reg_fields;
+}
+
+static bool parameter__uses_full_aggregate(const struct parameter *parm)
+{
+ return parm->first_reg_fields && parm->second_reg_fields;
+}
+
+static bool ftype__next_parameter_preserves_slots(struct ftype *ftype, struct parameter *parm,
+ int reg_idx, int slots, struct cu *cu)
+{
+ struct parameter *next = ftype__next_parameter(ftype, parm);
+ int next_reg_idx;
+
+ if (!next || next->loc_reg == PARAMETER_UNKNOWN_REG)
+ return false;
+
+ next_reg_idx = reg_idx + slots;
+ return next_reg_idx < cu->nr_register_params &&
+ next->loc_reg == cu->register_params[next_reg_idx];
+}
+
+static bool parameter__apply_true_sig_member(struct parameter *parm, struct cu *cu)
+{
+ struct dwarf_tag tmp = {};
+ struct dwarf_tag *dtype;
+
+ if (!parm->true_sig_member_name || parm->true_sig_type == 0)
+ return false;
+
+ tmp.type = parm->true_sig_type;
+ tmp.from_types_section.type = parm->true_sig_type_from_types;
+ dtype = __dwarf_cu__find_type_by_ref(cu->priv, tmp.type, tmp.from_types_section.type);
+ if (!dtype)
+ return false;
+
+ parm->tag.type = dtype->small_id;
+ return true;
+}
+
+static void function__analyze_parameter_locations(struct function *fn, struct cu *cu,
+ struct conf_load *conf)
+{
+ struct ftype *ftype = &fn->proto;
+ struct parameter *pos;
+ bool true_sig_enabled = conf->true_signature && ftype->signature_changed;
+ bool check_registers = !cu->producer_clang || true_sig_enabled;
+ int reg_idx = 0;
+
+ if (!check_registers)
+ return;
+
+ ftype__for_each_parameter(ftype, pos) {
+ bool consumes_register = true;
+ bool regs_available = reg_idx < cu->nr_register_params;
+ int slots = parameter__abi_slots(pos, cu);
+ int expected_reg = regs_available ? cu->register_params[reg_idx] : -1;
+ int reg_slots = pos->passed_in_memory ? 1 : slots;
+
+ if (pos->has_loc) {
+ if (true_sig_enabled && pos->loc_const_value) {
+ pos->optimized = 1;
+ consumes_register = false;
+ goto next;
+ }
+
+ if (!regs_available) {
+ consumes_register = false;
+ goto next;
+ }
+
+ if (true_sig_enabled && pos->loc_stack) {
+ if (pos->passed_in_memory)
+ consumes_register = false;
+ else
+ pos->unexpected_reg = 1;
+ goto next;
+ }
+
+ if (pos->loc_reg == PARAMETER_UNKNOWN_REG) {
+ if (true_sig_enabled)
+ pos->unexpected_reg = 1;
+ else
+ pos->optimized = 1;
+ goto next;
+ }
+
+ if (expected_reg >= 0 && expected_reg != pos->loc_reg) {
+ pos->unexpected_reg = 1;
+ goto next;
+ }
+
+ if (true_sig_enabled && parameter__has_piece_info(pos)) {
+ if (parameter__uses_full_aggregate(pos) ||
+ ftype__next_parameter_preserves_slots(ftype, pos, reg_idx, slots, cu)) {
+ reg_idx += slots;
+ continue;
+ }
+
+ if (parameter__apply_true_sig_member(pos, cu)) {
+ reg_idx++;
+ continue;
+ }
+ }
+ } else if (pos->has_const_value && !cu->producer_clang) {
+ pos->optimized = 1;
+ } else if (true_sig_enabled) {
+ if (regs_available &&
+ ftype__next_parameter_preserves_slots(ftype, pos, reg_idx, slots, cu)) {
+ reg_idx += slots;
+ continue;
+ }
+
+ pos->optimized = 1;
+ consumes_register = false;
+ }
+
+next:
+ if (consumes_register)
+ reg_idx += reg_slots;
+ }
+}
+
static void lexblock__recode_dwarf_types(struct lexblock *tag, struct cu *cu)
{
struct tag *pos;
@@ -3206,7 +3349,7 @@ static bool param__is_struct(struct cu *cu, struct tag *tag)
}
}
-static int cu__resolve_func_ret_types_optimized(struct cu *cu)
+static int cu__resolve_func_ret_types_optimized(struct cu *cu, struct conf_load *conf)
{
struct ptr_table *pt = &cu->functions_table;
uint32_t i;
@@ -3217,6 +3360,8 @@ static int cu__resolve_func_ret_types_optimized(struct cu *cu)
struct function *fn = tag__function(tag);
bool has_unexpected_reg = false, has_struct_param = false;
+ function__analyze_parameter_locations(fn, cu, conf);
+
/* mark function as optimized if parameter is, or
* if parameter does not have a location; at this
* point location presence has been marked in
@@ -3395,7 +3540,7 @@ static int die__process_and_recode(Dwarf_Die *die, struct cu *cu, struct conf_lo
if (ret != 0)
return ret;
- return cu__resolve_func_ret_types_optimized(cu);
+ return cu__resolve_func_ret_types_optimized(cu, conf);
}
static int class_member__cache_byte_size(struct tag *tag, struct cu *cu,
@@ -4158,7 +4303,7 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
* encoded in another subprogram through abstract_origin
* tag. Let us visit all subprograms again to resolve this.
*/
- if (cu__resolve_func_ret_types_optimized(cu) != LSK__KEEPIT)
+ if (cu__resolve_func_ret_types_optimized(cu, conf) != LSK__KEEPIT)
goto out_abort;
cu__finalize(cu, cus, conf);
--
2.53.0-Meta
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH dwarves v6 4/5] btf_encoder: Emit true function signatures
2026-06-18 1:13 [PATCH dwarves v6 0/5] pahole: Encode true signatures in kernel BTF Yonghong Song
` (2 preceding siblings ...)
2026-06-18 1:14 ` [PATCH dwarves v6 3/5] dwarf_loader: Analyze per-parameter information for true signatures Yonghong Song
@ 2026-06-18 1:14 ` Yonghong Song
2026-06-18 1:14 ` [PATCH dwarves v6 5/5] tests: add BTF true_signature encoding tests Yonghong Song
2026-06-20 8:46 ` [PATCH dwarves v6 0/5] pahole: Encode true signatures in kernel BTF Alan Maguire
5 siblings, 0 replies; 8+ messages in thread
From: Yonghong Song @ 2026-06-18 1:14 UTC (permalink / raw)
To: Alan Maguire, Arnaldo Carvalho de Melo, dwarves
Cc: Alexei Starovoitov, Andrii Nakryiko, bpf, kernel-team
When true_signature is enabled, consume the per-parameter analysis the
DWARF loader recorded while saving a function's BTF:
- Drop parameters that were optimized out of a signature-changed function
(ftype->signature_changed && param->optimized), adjusting the saved
parameter count accordingly.
- When a parameter was reduced to a single aggregate member
(param->true_sig_member_name is set), emit it under the synthesized name
"<parameter>__<member>" so the BTF reflects the value actually passed in
the register.
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
btf_encoder.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/btf_encoder.c b/btf_encoder.c
index 633bc61..c8e8437 100644
--- a/btf_encoder.c
+++ b/btf_encoder.c
@@ -1297,14 +1297,36 @@ static int32_t btf_encoder__save_func(struct btf_encoder *encoder, struct functi
state->reordered_parm = ftype->reordered_parm;
ftype__for_each_parameter(ftype, param) {
const char *name;
+ char *final_name = NULL;
/* No location info/optimized + reordered means optimized out. */
if (ftype->reordered_parm && (!param->has_loc || param->optimized)) {
state->nr_parms--;
continue;
}
- name = parameter__name(param) ?: "";
+ if (encoder->true_signature && ftype->signature_changed && param->optimized) {
+ state->nr_parms--;
+ continue;
+ }
+
+ name = parameter__name(param);
+ if (!name) {
+ name = "";
+ } else if (param->true_sig_member_name) {
+ /* Non-null param->true_sig_member_name indicates that the parameter
+ * name is <parameter_name>__<field_name>.
+ */
+ if (asprintf(&final_name, "%s__%s", name, param->true_sig_member_name) == -1) {
+ err = -ENOMEM;
+ goto out;
+ }
+ name = final_name;
+ }
+
str_off = btf__add_str(btf, name);
+ if (final_name)
+ free(final_name);
+
if (str_off < 0) {
err = str_off;
goto out;
--
2.53.0-Meta
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH dwarves v6 5/5] tests: add BTF true_signature encoding tests
2026-06-18 1:13 [PATCH dwarves v6 0/5] pahole: Encode true signatures in kernel BTF Yonghong Song
` (3 preceding siblings ...)
2026-06-18 1:14 ` [PATCH dwarves v6 4/5] btf_encoder: Emit true function signatures Yonghong Song
@ 2026-06-18 1:14 ` Yonghong Song
2026-06-20 8:46 ` [PATCH dwarves v6 0/5] pahole: Encode true signatures in kernel BTF Alan Maguire
5 siblings, 0 replies; 8+ messages in thread
From: Yonghong Song @ 2026-06-18 1:14 UTC (permalink / raw)
To: Alan Maguire, Arnaldo Carvalho de Melo, dwarves
Cc: Alexei Starovoitov, Andrii Nakryiko, bpf, kernel-team
Add four tests exercising true_signature BTF encoding for clang-built,
signature-changed (DW_CC_nocall) functions:
- clang_parm_optimized: an unused scalar parameter is dropped from the
true signature.
- clang_parm_optimized_stack: with more arguments than argument registers,
optimized-out parameters (including stack-passed ones) are dropped.
- clang_parm_aggregate: a two-register aggregate that is only partially
used is rewritten to its single used member, while a fully-used
aggregate is preserved.
- clang_parm_memory: a large aggregate classified MEMORY and passed on the
stack is kept while an unused parameter is dropped. A union is used so
the struct-parameter exception in cu__resolve_func_ret_types_optimized()
does not mask a wrong unexpected_reg.
Each test compares the BTF signature against the DWARF signature. Since
clang only emits DW_CC_nocall on some architectures, the tests assert the
signatures differ on x86_64 and skip (or, on arm64, expect them equal)
elsewhere. The following is an example to dump BTF vs. Dwarf:
$ VERBOSE=1 ./clang_parm_memory.sh
Validation of BTF encoding of true_signatures.
BTF: long foo(union big u, int x); DWARF: long foo(union big u, int dead, int x);
Test ./clang_parm_memory.sh passed
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
tests/clang_parm_aggregate.sh | 85 +++++++++++++++++++++++++++++
tests/clang_parm_memory.sh | 77 ++++++++++++++++++++++++++
tests/clang_parm_optimized.sh | 63 +++++++++++++++++++++
tests/clang_parm_optimized_stack.sh | 63 +++++++++++++++++++++
4 files changed, 288 insertions(+)
create mode 100755 tests/clang_parm_aggregate.sh
create mode 100755 tests/clang_parm_memory.sh
create mode 100755 tests/clang_parm_optimized.sh
create mode 100755 tests/clang_parm_optimized_stack.sh
diff --git a/tests/clang_parm_aggregate.sh b/tests/clang_parm_aggregate.sh
new file mode 100755
index 0000000..9502f8b
--- /dev/null
+++ b/tests/clang_parm_aggregate.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source test_lib.sh
+
+outdir=$(make_tmpdir)
+
+# Comment this out to save test data.
+trap cleanup EXIT
+
+title_log "Validation of BTF encoding of true_signatures."
+
+clang_true="${outdir}/clang_true"
+CC=$(which clang 2>/dev/null)
+
+if [[ -z "$CC" ]]; then
+ info_log "skip: clang not available"
+ test_skip
+fi
+
+cat > ${clang_true}.c << EOF
+struct t { long f1; long f2; };
+__attribute__((noinline)) static long foo(struct t a, struct t b, int i)
+{
+ return a.f1 + b.f1 + b.f2 + i;
+}
+
+struct t p1, p2;
+int i;
+int main()
+{
+ return (int)foo(p1, p2, i);
+}
+EOF
+
+CFLAGS="$CFLAGS -g -O2"
+${CC} ${CFLAGS} -o $clang_true ${clang_true}.c
+if [[ $? -ne 0 ]]; then
+ error_log "Could not compile ${clang_true}.c"
+ test_fail
+fi
+LLVM_OBJCOPY=objcopy pahole -J --btf_features=+true_signature $clang_true
+if [[ $? -ne 0 ]]; then
+ error_log "Could not encode BTF for $clang_true"
+ test_fail
+fi
+
+btf_optimized=$(pfunct --all --format_path=btf $clang_true |grep "foo")
+if [[ -z "$btf_optimized" ]]; then
+ info_log "skip: no optimizations applied."
+ test_skip
+fi
+
+btf_cmp=$btf_optimized
+dwarf=$(pfunct --all $clang_true |grep "foo")
+
+verbose_log "BTF: $btf_optimized DWARF: $dwarf"
+
+arch=$(uname -m)
+
+if [[ "$arch" == "x86_64" ]]; then
+ # On x86_64, clang emits DW_CC_nocall for optimized functions,
+ # so pahole should detect the optimization and produce a
+ # different BTF signature.
+ if [[ "$btf_cmp" == "$dwarf" ]]; then
+ error_log "BTF and DWARF signatures should be different and they are not: BTF: $btf_optimized ; DWARF $dwarf"
+ test_fail
+ fi
+elif [[ "$arch" == "aarch64" ]]; then
+ # On arm64, clang does not emit DW_CC_nocall, so pahole cannot
+ # detect the optimization. BTF and DWARF signatures are expected
+ # to be the same.
+ if [[ "$btf_cmp" != "$dwarf" ]]; then
+ error_log "On arm64, BTF and DWARF signatures should be the same but they are not: BTF: $btf_optimized ; DWARF $dwarf"
+ test_fail
+ fi
+else
+ # On other architectures, skip if we cannot determine the
+ # expected behavior.
+ if [[ "$btf_cmp" == "$dwarf" ]]; then
+ info_log "skip: no optimization detected on $arch"
+ test_skip
+ fi
+fi
+test_pass
diff --git a/tests/clang_parm_memory.sh b/tests/clang_parm_memory.sh
new file mode 100755
index 0000000..d0d798d
--- /dev/null
+++ b/tests/clang_parm_memory.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source test_lib.sh
+
+outdir=$(make_tmpdir)
+
+# Comment this out to save test data.
+trap cleanup EXIT
+
+title_log "Validation of BTF encoding of true_signatures."
+
+clang_true="${outdir}/clang_true"
+CC=$(which clang 2>/dev/null)
+
+if [[ -z "$CC" ]]; then
+ info_log "skip: clang not available"
+ test_skip
+fi
+
+# the expected true signature: long foo(union big u, int x).
+cat > ${clang_true}.c << EOF
+union big { long a; char buf[24]; };
+__attribute__((noinline)) static long foo(union big u, int dead, int x)
+{
+ return u.a + x;
+}
+
+union big g;
+int dead, x;
+int main()
+{
+ return (int)foo(g, dead, x);
+}
+EOF
+
+CFLAGS="$CFLAGS -g -O2"
+${CC} ${CFLAGS} -o $clang_true ${clang_true}.c
+if [[ $? -ne 0 ]]; then
+ error_log "Could not compile ${clang_true}.c"
+ test_fail
+fi
+LLVM_OBJCOPY=objcopy pahole -J --btf_features=+true_signature $clang_true
+if [[ $? -ne 0 ]]; then
+ error_log "Could not encode BTF for $clang_true"
+ test_fail
+fi
+
+btf_optimized=$(pfunct --all --format_path=btf $clang_true |grep "foo")
+dwarf=$(pfunct --all $clang_true |grep "foo")
+
+verbose_log "BTF: $btf_optimized DWARF: $dwarf"
+
+arch=$(uname -m)
+
+if [[ "$arch" == "x86_64" ]]; then
+ # On x86_64, clang emits DW_CC_nocall for optimized functions. The
+ # stack-passed aggregate must remain present and 'dead' must be
+ # dropped, so a true signature must be produced and it must differ
+ # from the DWARF signature.
+ if [[ -z "$btf_optimized" ]]; then
+ error_log "BTF for foo missing; the stack-passed aggregate was likely rejected"
+ test_fail
+ fi
+ if [[ "$btf_optimized" == "$dwarf" ]]; then
+ error_log "BTF and DWARF signatures should be different and they are not: BTF: $btf_optimized ; DWARF $dwarf"
+ test_fail
+ fi
+else
+ # On other architectures clang may not emit DW_CC_nocall, so we
+ # cannot assert the optimization was detected.
+ if [[ -z "$btf_optimized" ]]; then
+ info_log "skip: no optimization detected on $arch"
+ test_skip
+ fi
+fi
+test_pass
diff --git a/tests/clang_parm_optimized.sh b/tests/clang_parm_optimized.sh
new file mode 100755
index 0000000..81d50af
--- /dev/null
+++ b/tests/clang_parm_optimized.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source test_lib.sh
+
+outdir=$(make_tmpdir)
+
+# Comment this out to save test data.
+trap cleanup EXIT
+
+title_log "Validation of BTF encoding of true_signatures."
+
+clang_true="${outdir}/clang_true"
+CC=$(which clang 2>/dev/null)
+
+if [[ -z "$CC" ]]; then
+ info_log "skip: clang not available"
+ test_skip
+fi
+
+cat > ${clang_true}.c << EOF
+__attribute__((noinline)) static int foo(int a, int b, int c)
+{
+ return a * c - a - c;
+}
+
+int a, b, c;
+int main()
+{
+ return foo(a, b, c);
+}
+EOF
+
+CFLAGS="$CFLAGS -g -O2"
+${CC} ${CFLAGS} -o $clang_true ${clang_true}.c
+if [[ $? -ne 0 ]]; then
+ error_log "Could not compile ${clang_true}.c"
+ test_fail
+fi
+LLVM_OBJCOPY=objcopy pahole -J --btf_features=+true_signature $clang_true
+if [[ $? -ne 0 ]]; then
+ error_log "Could not encode BTF for $clang_true"
+ test_fail
+fi
+
+btf_optimized=$(pfunct --all --format_path=btf $clang_true |grep "foo")
+if [[ -z "$btf_optimized" ]]; then
+ info_log "skip: no optimizations applied."
+ test_skip
+fi
+
+btf_cmp=$btf_optimized
+dwarf=$(pfunct --all $clang_true |grep "foo")
+
+if [[ -n "$VERBOSE" ]]; then
+ printf " BTF: %s DWARF: %s\n" "$btf_optimized" "$dwarf"
+fi
+
+if [[ "$btf_cmp" == "$dwarf" ]]; then
+ error_log "BTF and DWARF signatures should be different and they are not: BTF: $btf_optimized ; DWARF $dwarf"
+ test_fail
+fi
+test_pass
diff --git a/tests/clang_parm_optimized_stack.sh b/tests/clang_parm_optimized_stack.sh
new file mode 100755
index 0000000..afdc355
--- /dev/null
+++ b/tests/clang_parm_optimized_stack.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source test_lib.sh
+
+outdir=$(make_tmpdir)
+
+# Comment this out to save test data.
+trap cleanup EXIT
+
+title_log "Validation of BTF encoding of true_signatures."
+
+clang_true="${outdir}/clang_true"
+CC=$(which clang 2>/dev/null)
+
+if [[ -z "$CC" ]]; then
+ info_log "skip: clang not available"
+ test_skip
+fi
+
+cat > ${clang_true}.c << EOF
+__attribute__((noinline)) static int foo(int a, int b, int c, int d, int e, int f, int g, int h, int i)
+{
+ return a * i - a - i;
+}
+
+int a, b, c, d, e, f, g, h, i;
+int main()
+{
+ return foo(a, b, c, d, e, f, g, h, i);
+}
+EOF
+
+CFLAGS="$CFLAGS -g -O2"
+${CC} ${CFLAGS} -o $clang_true ${clang_true}.c
+if [[ $? -ne 0 ]]; then
+ error_log "Could not compile ${clang_true}.c"
+ test_fail
+fi
+LLVM_OBJCOPY=objcopy pahole -J --btf_features=+true_signature $clang_true
+if [[ $? -ne 0 ]]; then
+ error_log "Could not encode BTF for $clang_true"
+ test_fail
+fi
+
+btf_optimized=$(pfunct --all --format_path=btf $clang_true |grep "foo")
+if [[ -z "$btf_optimized" ]]; then
+ info_log "skip: no optimizations applied."
+ test_skip
+fi
+
+btf_cmp=$btf_optimized
+dwarf=$(pfunct --all $clang_true |grep "foo")
+
+if [[ -n "$VERBOSE" ]]; then
+ printf " BTF: %s DWARF: %s\n" "$btf_optimized" "$dwarf"
+fi
+
+if [[ "$btf_cmp" == "$dwarf" ]]; then
+ error_log "BTF and DWARF signatures should be different and they are not: BTF: $btf_optimized ; DWARF $dwarf"
+ test_fail
+fi
+test_pass
--
2.53.0-Meta
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCH dwarves v6 0/5] pahole: Encode true signatures in kernel BTF
2026-06-18 1:13 [PATCH dwarves v6 0/5] pahole: Encode true signatures in kernel BTF Yonghong Song
` (4 preceding siblings ...)
2026-06-18 1:14 ` [PATCH dwarves v6 5/5] tests: add BTF true_signature encoding tests Yonghong Song
@ 2026-06-20 8:46 ` Alan Maguire
2026-06-21 16:47 ` Yonghong Song
5 siblings, 1 reply; 8+ messages in thread
From: Alan Maguire @ 2026-06-20 8:46 UTC (permalink / raw)
To: Yonghong Song, Arnaldo Carvalho de Melo, dwarves
Cc: Alexei Starovoitov, Andrii Nakryiko, bpf, kernel-team
On 18/06/2026 02:13, Yonghong Song wrote:
> Current vmlinux BTF encoding is based on the source level signatures.
> But the compiler may do some optimization and changed the signature.
> If the user tried with source level signature, their initial implementation
> may have wrong results and then the user need to check what is the
> problem and work around it, e.g. through kprobe since kprobe does not
> need vmlinux BTF.
>
> Majority of changed signatures are due to dead argument elimination.
> The following is a more complex one. The original source signature:
> typedef struct {
> union {
> void *kernel;
> void __user *user;
> };
> bool is_kernel : 1;
> } sockptr_t;
> typedef sockptr_t bpfptr_t;
> static int map_create(union bpf_attr *attr, bpfptr_t uattr) { ... }
> After compiler optimization, the signature becomes:
> static int map_create(union bpf_attr *attr, bool uattr__is_kernel) { ... }
> In the above, uattr__is_kernel corresponds to 'is_kernel' field in sockptr_t.
> This makes it easier for developers to understand what changed.
>
> The new signature needs to properly follow ABI specification based on
> locations. Otherwise, that signature should be discarded. For example,
>
> 0x0242f1f7: DW_TAG_subprogram
> DW_AT_name ("memblock_find_in_range")
> DW_AT_calling_convention (DW_CC_nocall)
> DW_AT_type (0x0242decc "phys_addr_t")
> ...
> 0x0242f22e: DW_TAG_formal_parameter
> DW_AT_location (indexed (0x14a) loclist = 0x005595bc:
> [0xffffffff87a000f9, 0xffffffff87a00178): DW_OP_reg5 RDI
> [0xffffffff87a00178, 0xffffffff87a001be): DW_OP_reg14 R14
> [0xffffffff87a001be, 0xffffffff87a001c7): DW_OP_entry_value(DW_OP_reg5 RDI), DW_OP_stack_value
> [0xffffffff87a001c7, 0xffffffff87a00214): DW_OP_reg14 R14)
> DW_AT_name ("start")
> DW_AT_type (0x0242decc "phys_addr_t")
> ...
> 0x0242f239: DW_TAG_formal_parameter
> DW_AT_location (indexed (0x14b) loclist = 0x005595e6:
> [0xffffffff87a000f9, 0xffffffff87a00175): DW_OP_reg4 RSI
> [0xffffffff87a00175, 0xffffffff87a001b8): DW_OP_reg3 RBX
> [0xffffffff87a001b8, 0xffffffff87a001c7): DW_OP_entry_value(DW_OP_reg4 RSI), DW_OP_stack_value
> [0xffffffff87a001c7, 0xffffffff87a00214): DW_OP_reg3 RBX)
> DW_AT_name ("end")
> DW_AT_type (0x0242decc "phys_addr_t")
> ...
> 0x0242f245: DW_TAG_formal_parameter
> DW_AT_location (indexed (0x14c) loclist = 0x00559610:
> [0xffffffff87a001e3, 0xffffffff87a001ef): DW_OP_breg4 RSI+0)
> DW_AT_name ("size")
> DW_AT_type (0x0242decc "phys_addr_t")
> ...
> 0x0242f250: DW_TAG_formal_parameter
> DW_AT_const_value (4096)
> DW_AT_name ("align")
> DW_AT_type (0x0242decc "phys_addr_t")
> ...
>
> The third argument should correspond to RDX for x86_64. But the location suggests that
> the parameter value is stored in the address with 'RSI + 0'. It is not clear whether
> the parameter value is stored in RDX or not. So we have to discard this funciton in
> vmlinux BTF to avoid incorrect true signatures.
>
> For llvm, any function having
> DW_AT_calling_convention (DW_CC_nocall)
> in dwarf DW_TAG_subprogram will indicate that this function has signature changed.
> I did experiment with latest bpf-next. For x86_64, there are 69103 kernel functions
> and 875 kernel functions having signature changed. A series of patches are intended
> to ensure true signatures are properly represented. Eventually, only 20 functions
> cannot have true signatures due to locations.
>
> For arm64, there are 863 kernel functions having signature changed, and
> 108 functions cannot have true signatures due to locations. I checked those
> functions and look like llvm arm64 backend more relaxed to compute parameter
> values.
>
> For full testing, I enabled true signature support in kernel scripts/Makefile.btf like below:
> -pahole-flags-$(call test-ge, $(pahole-ver), 131) += --btf_features=attributes
> +pahole-flags-$(call test-ge, $(pahole-ver), 131) += --btf_features=attributes --btf_features=+true_signature
>
> See individual patches for details.
>
hi Yonghong, changes look good but we do hit a CI issue; specifically
in run_selftests in [1] for gcc+aarch64:
3: clang_parm_aggregate.sh
Validation of BTF encoding of true_signatures.
On arm64, BTF and DWARF signatures should be the same but they are not: BTF: long foo(struct t a__f1, struct t b, int i); ; DWARF long foo(struct t a, struct t b, int i);
Test ./clang_parm_aggregate.sh failed
Test data is in /tmp/clang_parm_aggregate.sh.NH5a6D
I think the problem is that as well as creating aggregate parameter names we
need to decide whether they should actually be used; in this case it looks like
we hit a function using aggregates, but without DW_CC_nocall. Perhaps the
reason is that the calling conventions are preserved while we only get a piece
of the "struct t a" argument? Something like [2] seems to resolve the problem,
please take a look and feel free to roll the fix into one of the patches if it makes
sense. You might find it convenient to use the merges of your series at [3]; they
merge your work with Vineet's tag changes now that they have landed (just patch 1
required merging IIRC).
I also think it would be better to add clang+aarch64 to the CI matrix in light of
your changes, since it will give us test coverage for changed functions for clang
for both x86_64 and aarch64; I've sent [4] to do that.
[1] https://github.com/alan-maguire/dwarves/actions/runs/27839367799/job/82394707921#step:7:24
[2] https://github.com/acmel/dwarves/commit/22d0512680d2ff5b6dd4d1e34ae603efe0f2d098
[3] https://github.com/alan-maguire/dwarves/commits/dwarves-true-sig-v6/
[4] https://lore.kernel.org/dwarves/20260620083056.361658-1-alan.maguire@oracle.com/
> Changelog:
> v5 -> v6:
> - v5: https://lore.kernel.org/bpf/20260523165712.1225231-1-yonghong.song@linux.dev/
> - The previous change relies on parameter__new() to collect and analyze each
> parameter to decide true signatures. The new one separates collecting and
> analyzing phase from Alan. This two-phase makes logic easy to understand.
> - In btf_encoder.c, remove usage of skip_idx to simplify the code.
> v4 -> v5:
> - v4: https://lore.kernel.org/bpf/20260326013144.2901265-1-yonghong.song@linux.dev/
> - Check info.signature_changed only under clang.
> - Fix an uninitialized varable issue (var reg_dix) for gcc.
> v3 -> v4:
> - v3: https://lore.kernel.org/bpf/20260320190917.1970524-1-yonghong.song@linux.dev/
> - Add simple prescan of parameter registers in order to get true signatures
> for those functions where optimization could happen but compiler didn't do it.
> - Do not create a new name (e.g. "uattr__is_kernel") with malloc at parameter_reg()
> stage. Instead remember both "uattr" and "is_kernel" and later generate the
> name "uattr_is_kernel" in btf encoder.
> - Add comments to explain how to handle parameters which may take two registers.
> - Fix some test failures on aarch64.
> v2 -> v3:
> - v2: https://lore.kernel.org/bpf/20260309153215.1917033-1-yonghong.song@linux.dev/
> - Change tests by using newly added test_lib.sh.
> - Simplify to get bool variable producer_clang.
> - Try to avoid producer_clang appearance in dwarf_loader.c in order to avoid
> clear separation between clang and gcc.
> v1 -> v2:
> - v1: https://lore.kernel.org/bpf/20260305225455.1151066-1-yonghong.song@linux.dev/
> - Added producer_clang guarding in btf_encoder. Otherwise, gcc kernel build
> will crash pahole.
> - Fix an early return in parameter__reg() which didn't do pthread_mutex_unlock()
> which caused the deadlock for arm64.
> - Add a few more places to guard with producer_clang and conf->true_signature
> to maintain the previous behavior if not clang or conf->true_signature is false.
>
> Yonghong Song (5):
> dwarf_loader: Detect aggregate ABI register usage and signature
> changes
> dwarf_loader: Collect per-parameter information
> dwarf_loader: Analyze per-parameter information for true signatures
> btf_encoder: Emit true function signatures
> tests: add BTF true_signature encoding tests
>
> btf_encoder.c | 24 +-
> dwarf_loader.c | 548 ++++++++++++++++++++++++----
> dwarves.h | 14 +
> tests/clang_parm_aggregate.sh | 85 +++++
> tests/clang_parm_memory.sh | 77 ++++
> tests/clang_parm_optimized.sh | 63 ++++
> tests/clang_parm_optimized_stack.sh | 63 ++++
> 7 files changed, 812 insertions(+), 62 deletions(-)
> create mode 100755 tests/clang_parm_aggregate.sh
> create mode 100755 tests/clang_parm_memory.sh
> create mode 100755 tests/clang_parm_optimized.sh
> create mode 100755 tests/clang_parm_optimized_stack.sh
>
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH dwarves v6 0/5] pahole: Encode true signatures in kernel BTF
2026-06-20 8:46 ` [PATCH dwarves v6 0/5] pahole: Encode true signatures in kernel BTF Alan Maguire
@ 2026-06-21 16:47 ` Yonghong Song
0 siblings, 0 replies; 8+ messages in thread
From: Yonghong Song @ 2026-06-21 16:47 UTC (permalink / raw)
To: Alan Maguire, Arnaldo Carvalho de Melo, dwarves
Cc: Alexei Starovoitov, Andrii Nakryiko, bpf, kernel-team
On 6/20/26 1:46 AM, Alan Maguire wrote:
> On 18/06/2026 02:13, Yonghong Song wrote:
>> Current vmlinux BTF encoding is based on the source level signatures.
>> But the compiler may do some optimization and changed the signature.
>> If the user tried with source level signature, their initial implementation
>> may have wrong results and then the user need to check what is the
>> problem and work around it, e.g. through kprobe since kprobe does not
>> need vmlinux BTF.
>>
>> Majority of changed signatures are due to dead argument elimination.
>> The following is a more complex one. The original source signature:
>> typedef struct {
>> union {
>> void *kernel;
>> void __user *user;
>> };
>> bool is_kernel : 1;
>> } sockptr_t;
>> typedef sockptr_t bpfptr_t;
>> static int map_create(union bpf_attr *attr, bpfptr_t uattr) { ... }
>> After compiler optimization, the signature becomes:
>> static int map_create(union bpf_attr *attr, bool uattr__is_kernel) { ... }
>> In the above, uattr__is_kernel corresponds to 'is_kernel' field in sockptr_t.
>> This makes it easier for developers to understand what changed.
>>
>> The new signature needs to properly follow ABI specification based on
>> locations. Otherwise, that signature should be discarded. For example,
>>
>> 0x0242f1f7: DW_TAG_subprogram
>> DW_AT_name ("memblock_find_in_range")
>> DW_AT_calling_convention (DW_CC_nocall)
>> DW_AT_type (0x0242decc "phys_addr_t")
>> ...
>> 0x0242f22e: DW_TAG_formal_parameter
>> DW_AT_location (indexed (0x14a) loclist = 0x005595bc:
>> [0xffffffff87a000f9, 0xffffffff87a00178): DW_OP_reg5 RDI
>> [0xffffffff87a00178, 0xffffffff87a001be): DW_OP_reg14 R14
>> [0xffffffff87a001be, 0xffffffff87a001c7): DW_OP_entry_value(DW_OP_reg5 RDI), DW_OP_stack_value
>> [0xffffffff87a001c7, 0xffffffff87a00214): DW_OP_reg14 R14)
>> DW_AT_name ("start")
>> DW_AT_type (0x0242decc "phys_addr_t")
>> ...
>> 0x0242f239: DW_TAG_formal_parameter
>> DW_AT_location (indexed (0x14b) loclist = 0x005595e6:
>> [0xffffffff87a000f9, 0xffffffff87a00175): DW_OP_reg4 RSI
>> [0xffffffff87a00175, 0xffffffff87a001b8): DW_OP_reg3 RBX
>> [0xffffffff87a001b8, 0xffffffff87a001c7): DW_OP_entry_value(DW_OP_reg4 RSI), DW_OP_stack_value
>> [0xffffffff87a001c7, 0xffffffff87a00214): DW_OP_reg3 RBX)
>> DW_AT_name ("end")
>> DW_AT_type (0x0242decc "phys_addr_t")
>> ...
>> 0x0242f245: DW_TAG_formal_parameter
>> DW_AT_location (indexed (0x14c) loclist = 0x00559610:
>> [0xffffffff87a001e3, 0xffffffff87a001ef): DW_OP_breg4 RSI+0)
>> DW_AT_name ("size")
>> DW_AT_type (0x0242decc "phys_addr_t")
>> ...
>> 0x0242f250: DW_TAG_formal_parameter
>> DW_AT_const_value (4096)
>> DW_AT_name ("align")
>> DW_AT_type (0x0242decc "phys_addr_t")
>> ...
>>
>> The third argument should correspond to RDX for x86_64. But the location suggests that
>> the parameter value is stored in the address with 'RSI + 0'. It is not clear whether
>> the parameter value is stored in RDX or not. So we have to discard this funciton in
>> vmlinux BTF to avoid incorrect true signatures.
>>
>> For llvm, any function having
>> DW_AT_calling_convention (DW_CC_nocall)
>> in dwarf DW_TAG_subprogram will indicate that this function has signature changed.
>> I did experiment with latest bpf-next. For x86_64, there are 69103 kernel functions
>> and 875 kernel functions having signature changed. A series of patches are intended
>> to ensure true signatures are properly represented. Eventually, only 20 functions
>> cannot have true signatures due to locations.
>>
>> For arm64, there are 863 kernel functions having signature changed, and
>> 108 functions cannot have true signatures due to locations. I checked those
>> functions and look like llvm arm64 backend more relaxed to compute parameter
>> values.
>>
>> For full testing, I enabled true signature support in kernel scripts/Makefile.btf like below:
>> -pahole-flags-$(call test-ge, $(pahole-ver), 131) += --btf_features=attributes
>> +pahole-flags-$(call test-ge, $(pahole-ver), 131) += --btf_features=attributes --btf_features=+true_signature
>>
>> See individual patches for details.
>>
> hi Yonghong, changes look good but we do hit a CI issue; specifically
> in run_selftests in [1] for gcc+aarch64:
>
> 3: clang_parm_aggregate.sh
> Validation of BTF encoding of true_signatures.
> On arm64, BTF and DWARF signatures should be the same but they are not: BTF: long foo(struct t a__f1, struct t b, int i); ; DWARF long foo(struct t a, struct t b, int i);
> Test ./clang_parm_aggregate.sh failed
> Test data is in /tmp/clang_parm_aggregate.sh.NH5a6D
>
> I think the problem is that as well as creating aggregate parameter names we
> need to decide whether they should actually be used; in this case it looks like
> we hit a function using aggregates, but without DW_CC_nocall. Perhaps the
> reason is that the calling conventions are preserved while we only get a piece
> of the "struct t a" argument? Something like [2] seems to resolve the problem,
> please take a look and feel free to roll the fix into one of the patches if it makes
> sense. You might find it convenient to use the merges of your series at [3]; they
> merge your work with Vineet's tag changes now that they have landed (just patch 1
> required merging IIRC).
On my arm64 machine, I run ./clang_parm_aggregate.sh and can reproduce your failure.
In v5, it does work with llvm23. Probalby due to compiler and/or pahole change in v6,
the test failed. The following can fix the issue (I tested with llvm22 and development
llvm23):
diff --git a/tests/clang_parm_aggregate.sh b/tests/clang_parm_aggregate.sh
index 9502f8b..339cd19 100755
--- a/tests/clang_parm_aggregate.sh
+++ b/tests/clang_parm_aggregate.sh
@@ -58,7 +58,7 @@ verbose_log "BTF: $btf_optimized DWARF: $dwarf"
arch=$(uname -m)
-if [[ "$arch" == "x86_64" ]]; then
+if [[ "$arch" == "x86_64" || "$arch" == "aarch64" ]]; then
# On x86_64, clang emits DW_CC_nocall for optimized functions,
# so pahole should detect the optimization and produce a
# different BTF signature.
@@ -66,14 +66,6 @@ if [[ "$arch" == "x86_64" ]]; then
error_log "BTF and DWARF signatures should be different and they are not: BTF: $btf_optimized ; DWARF $dwarf"
test_fail
fi
-elif [[ "$arch" == "aarch64" ]]; then
- # On arm64, clang does not emit DW_CC_nocall, so pahole cannot
- # detect the optimization. BTF and DWARF signatures are expected
- # to be the same.
- if [[ "$btf_cmp" != "$dwarf" ]]; then
- error_log "On arm64, BTF and DWARF signatures should be the same but they are not: BTF: $btf_optimized ; DWARF $dwarf"
- test_fail
- fi
else
# On other architectures, skip if we cannot determine the
# expected behavior.
Currently, my test mostly on llvm23. I will test with llvm22 as well and push another
revision after your CI with llvm22 land.
>
> I also think it would be better to add clang+aarch64 to the CI matrix in light of
> your changes, since it will give us test coverage for changed functions for clang
> for both x86_64 and aarch64; I've sent [4] to do that.
>
> [1] https://github.com/alan-maguire/dwarves/actions/runs/27839367799/job/82394707921#step:7:24
> [2] https://github.com/acmel/dwarves/commit/22d0512680d2ff5b6dd4d1e34ae603efe0f2d098
> [3] https://github.com/alan-maguire/dwarves/commits/dwarves-true-sig-v6/
> [4] https://lore.kernel.org/dwarves/20260620083056.361658-1-alan.maguire@oracle.com/
>
[...]
^ permalink raw reply related [flat|nested] 8+ messages in thread