public inbox for dwarves@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] pahole: do not return an error when printing only a specific class
@ 2025-07-16  9:04 Alexis Lothoré (eBPF Foundation)
  2025-07-22 23:42 ` Ihor Solodrai
  0 siblings, 1 reply; 5+ messages in thread
From: Alexis Lothoré (eBPF Foundation) @ 2025-07-16  9:04 UTC (permalink / raw)
  To: dwarves
  Cc: Ihor Solodrai, Alan Maguire, Arnaldo Carvalho de Melo, ebpf,
	Thomas Petazzoni, Bastien Curutchet,
	Alexis Lothoré (eBPF Foundation)

When trying to print a specific class layout with recent pahole
versions, we get an error right after the expected class print:

  $ ./build/pahole -C test_bin_struct_packed -F dwarf tests/bin/test_bin
  struct test_bin_struct_packed {
          char                       a;                    /*     0     1 */
          short int                  b;                    /*     1     2 */
          int                        c;                    /*     3     4 */
          long long unsigned int     d;                    /*     7     8 */

          /* size: 15, cachelines: 1, members: 4 */
          /* last cacheline: 15 bytes */
  } __attribute__((__packed__));

  pahole: /home/alexis/src/pahole/tests/bin/test_bin: Invalid argument

This error is due to pahole_stealer returning LSK__STOP_LOADING when
dealing with a single class and when it is done. This LSK__STOP_LOADING
is then propagated to dwarf_loader__worker_thread and interpreted as an
error (and turned into a DWARF_CB_ABORT). The main issue is that this
LSK__STOP_LOADING value is sometimes generated by actual issues during
pahole stealing, sometimes by "nominal" execution.

Add a new LSK__ABORT status to distinguish between those nominal and
faulty cases. Return an error only when pahole_stealer returns
LSK__ABORT.

Signed-off-by: Alexis Lothoré (eBPF Foundation) <alexis.lothore@bootlin.com>
---
See [1] for original discussions about the issue

[1] https://lore.kernel.org/dwarves/DB5VWHU0N27I.3ETC4G47KB9Q@bootlin.com/
---
 dwarf_loader.c |  6 ++++--
 dwarves.h      |  1 +
 pahole.c       | 21 +++++++++++----------
 3 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/dwarf_loader.c b/dwarf_loader.c
index ef00cdf308166732fc0938f2eecd56d314d3354f..4e1e19c9d828e111d78012feda4c8baa0d716378 100644
--- a/dwarf_loader.c
+++ b/dwarf_loader.c
@@ -3277,6 +3277,8 @@ static int cus__steal_now(struct cus *cus, struct cu *cu, struct conf_load *conf
 		break;
 	case LSK__KEEPIT:
 		break;
+	case LSK__ABORT:
+		break;
 	}
 	return lsk;
 }
@@ -3679,7 +3681,7 @@ static void *dwarf_loader__worker_thread(void *arg)
 			break;
 
 		case JOB_STEAL:
-			if (cus__steal_now(dcus->cus, job->cu, dcus->conf) == LSK__STOP_LOADING)
+			if (cus__steal_now(dcus->cus, job->cu, dcus->conf) == LSK__ABORT)
 				goto out_abort;
 			cus_queue__inc_next_cu_id();
 			/* re-enqueue JOB_DECODE so that next CU is decoded from DWARF */
@@ -3872,7 +3874,7 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
 		goto out_abort;
 
 	cu__finalize(cu, cus, conf);
-	if (cus__steal_now(cus, cu, conf) == LSK__STOP_LOADING)
+	if (cus__steal_now(cus, cu, conf) == LSK__ABORT)
 		goto out_abort;
 
 	return 0;
diff --git a/dwarves.h b/dwarves.h
index 883852f4b60a36d73bce4568cbacd8ef3cff97ea..5df1cdad9be36212bd6548e86180860daffa3f63 100644
--- a/dwarves.h
+++ b/dwarves.h
@@ -43,6 +43,7 @@ enum load_steal_kind {
 	LSK__KEEPIT,
 	LSK__DELETE,
 	LSK__STOP_LOADING,
+	LSK__ABORT,
 };
 
 /*
diff --git a/pahole.c b/pahole.c
index 333e71ab65924d2c64771be7d40f768f02e9a0f0..66448fc430c19e5137a8467026d35d2ba3125e4c 100644
--- a/pahole.c
+++ b/pahole.c
@@ -3165,13 +3165,13 @@ static enum load_steal_kind pahole_stealer__btf_encode(struct cu *cu, struct con
 
 	if (!btf_encoder) {
 		fprintf(stderr, "Error creating BTF encoder.\n");
-		return LSK__STOP_LOADING;
+		return LSK__ABORT;
 	}
 
 	err = btf_encoder__encode_cu(btf_encoder, cu, conf_load);
 	if (err < 0) {
 		fprintf(stderr, "Error while encoding BTF.\n");
-		return LSK__STOP_LOADING;
+		return LSK__ABORT;
 	}
 
 	return LSK__DELETE;
@@ -3230,7 +3230,7 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, struct conf_load *conf
 		 *
 		 * FIXME: Disabled, should use Oracle's libctf
 		 */
-		goto dump_and_stop;
+		goto dump_and_abort;
 	}
 #endif
 	if (class_name == NULL) {
@@ -3281,7 +3281,7 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, struct conf_load *conf
 
 		if (prototype->nr_args != 0 && !tag__is_struct(class)) {
 			fprintf(stderr, "pahole: attributes are only supported with 'class' and 'struct' types\n");
-			goto dump_and_stop;
+			goto dump_and_abort;
 		}
 
 		struct type *type = tag__type(class);
@@ -3291,7 +3291,7 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, struct conf_load *conf
 			if (type->sizeof_member == NULL) {
 				fprintf(stderr, "pahole: the sizeof member '%s' wasn't found in the '%s' type\n",
 					prototype->size, prototype->name);
-				goto dump_and_stop;
+				goto dump_and_abort;
 			}
 		}
 
@@ -3300,7 +3300,7 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, struct conf_load *conf
 			if (type->type_member == NULL) {
 				fprintf(stderr, "pahole: the type member '%s' wasn't found in the '%s' type\n",
 					prototype->type, prototype->name);
-				goto dump_and_stop;
+				goto dump_and_abort;
 			}
 		}
 
@@ -3315,7 +3315,7 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, struct conf_load *conf
 			if (type->filter == NULL) {
 				fprintf(stderr, "pahole: invalid filter '%s' for '%s'\n",
 					prototype->filter, prototype->name);
-				goto dump_and_stop;
+				goto dump_and_abort;
 			}
 		}
 
@@ -3386,15 +3386,16 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, struct conf_load *conf
 	/*
 	 * If we found all the entries in --class_name, stop
 	 */
-	if (list_empty(&class_names)) {
-dump_and_stop:
+	if (list_empty(&class_names))
 		ret = LSK__STOP_LOADING;
-	}
 dump_it:
 	if (first_obj_only)
 		ret = LSK__STOP_LOADING;
 filter_it:
 	return ret;
+dump_and_abort:
+	ret = LSK__ABORT;
+	return ret;
 }
 
 static int prototypes__add(struct list_head *prototypes, const char *entry)

---
base-commit: 9845339bd09f9889dd741ed94f71eb92008dcdfc
change-id: 20250716-lsk__abort-2b517860af28

Best regards,
-- 
Alexis Lothoré, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] pahole: do not return an error when printing only a specific class
  2025-07-16  9:04 [PATCH] pahole: do not return an error when printing only a specific class Alexis Lothoré (eBPF Foundation)
@ 2025-07-22 23:42 ` Ihor Solodrai
  2025-07-23  8:39   ` Alexis Lothoré
  0 siblings, 1 reply; 5+ messages in thread
From: Ihor Solodrai @ 2025-07-22 23:42 UTC (permalink / raw)
  To: Alexis Lothoré (eBPF Foundation), dwarves
  Cc: Alan Maguire, Arnaldo Carvalho de Melo, ebpf, Thomas Petazzoni,
	Bastien Curutchet

On 7/16/25 2:04 AM, Alexis Lothoré (eBPF Foundation) wrote:
> When trying to print a specific class layout with recent pahole
> versions, we get an error right after the expected class print:
> 
>    $ ./build/pahole -C test_bin_struct_packed -F dwarf tests/bin/test_bin
>    struct test_bin_struct_packed {
>            char                       a;                    /*     0     1 */
>            short int                  b;                    /*     1     2 */
>            int                        c;                    /*     3     4 */
>            long long unsigned int     d;                    /*     7     8 */
> 
>            /* size: 15, cachelines: 1, members: 4 */
>            /* last cacheline: 15 bytes */
>    } __attribute__((__packed__));
> 
>    pahole: /home/alexis/src/pahole/tests/bin/test_bin: Invalid argument
> 
> This error is due to pahole_stealer returning LSK__STOP_LOADING when
> dealing with a single class and when it is done. This LSK__STOP_LOADING
> is then propagated to dwarf_loader__worker_thread and interpreted as an
> error (and turned into a DWARF_CB_ABORT). The main issue is that this
> LSK__STOP_LOADING value is sometimes generated by actual issues during
> pahole stealing, sometimes by "nominal" execution.
> 
> Add a new LSK__ABORT status to distinguish between those nominal and
> faulty cases. Return an error only when pahole_stealer returns
> LSK__ABORT.
> 
> Signed-off-by: Alexis Lothoré (eBPF Foundation) <alexis.lothore@bootlin.com>
> ---
> See [1] for original discussions about the issue
> 
> [1] https://lore.kernel.org/dwarves/DB5VWHU0N27I.3ETC4G47KB9Q@bootlin.com/

Hi Alexis, thank you for working on this.

The change looks good to me, just one comment.

> ---
>   dwarf_loader.c |  6 ++++--
>   dwarves.h      |  1 +
>   pahole.c       | 21 +++++++++++----------
>   3 files changed, 16 insertions(+), 12 deletions(-)
> 
> diff --git a/dwarf_loader.c b/dwarf_loader.c
> index ef00cdf308166732fc0938f2eecd56d314d3354f..4e1e19c9d828e111d78012feda4c8baa0d716378 100644
> --- a/dwarf_loader.c
> +++ b/dwarf_loader.c
> @@ -3277,6 +3277,8 @@ static int cus__steal_now(struct cus *cus, struct cu *cu, struct conf_load *conf
>   		break;
>   	case LSK__KEEPIT:
>   		break;
> +	case LSK__ABORT:
> +		break;
>   	}
>   	return lsk;
>   }
> @@ -3679,7 +3681,7 @@ static void *dwarf_loader__worker_thread(void *arg)
>   			break;
>   
>   		case JOB_STEAL:
> -			if (cus__steal_now(dcus->cus, job->cu, dcus->conf) == LSK__STOP_LOADING)
> +			if (cus__steal_now(dcus->cus, job->cu, dcus->conf) == LSK__ABORT)
>   				goto out_abort;
>   			cus_queue__inc_next_cu_id();
>   			/* re-enqueue JOB_DECODE so that next CU is decoded from DWARF */
> @@ -3872,7 +3874,7 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
>   		goto out_abort;
>   
>   	cu__finalize(cu, cus, conf);
> -	if (cus__steal_now(cus, cu, conf) == LSK__STOP_LOADING)
> +	if (cus__steal_now(cus, cu, conf) == LSK__ABORT)
>   		goto out_abort;

I think we should handle both LSK__STOP_LOADING and LSK__ABORT here,
otherwise the worker will keep loading DWARF (which is most of the
pahole's work) and passing cu-s to the stealer function.

I guess in many cases it'll complete without errors, but it's just
unnecessary work.

Maybe add a check for LSK__STOP_LOADING here with goto_ok?

>   
>   	return 0;
> diff --git a/dwarves.h b/dwarves.h
> index 883852f4b60a36d73bce4568cbacd8ef3cff97ea..5df1cdad9be36212bd6548e86180860daffa3f63 100644
> --- a/dwarves.h
> +++ b/dwarves.h
> @@ -43,6 +43,7 @@ enum load_steal_kind {
>   	LSK__KEEPIT,
>   	LSK__DELETE,
>   	LSK__STOP_LOADING,
> +	LSK__ABORT,
>   };
>   
>   /*
> diff --git a/pahole.c b/pahole.c
> index 333e71ab65924d2c64771be7d40f768f02e9a0f0..66448fc430c19e5137a8467026d35d2ba3125e4c 100644
> --- a/pahole.c
> +++ b/pahole.c
> @@ -3165,13 +3165,13 @@ static enum load_steal_kind pahole_stealer__btf_encode(struct cu *cu, struct con
>   
>   	if (!btf_encoder) {
>   		fprintf(stderr, "Error creating BTF encoder.\n");
> -		return LSK__STOP_LOADING;
> +		return LSK__ABORT;
>   	}
>   
>   	err = btf_encoder__encode_cu(btf_encoder, cu, conf_load);
>   	if (err < 0) {
>   		fprintf(stderr, "Error while encoding BTF.\n");
> -		return LSK__STOP_LOADING;
> +		return LSK__ABORT;
>   	}
>   
>   	return LSK__DELETE;
> @@ -3230,7 +3230,7 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, struct conf_load *conf
>   		 *
>   		 * FIXME: Disabled, should use Oracle's libctf
>   		 */
> -		goto dump_and_stop;
> +		goto dump_and_abort;
>   	}
>   #endif
>   	if (class_name == NULL) {
> @@ -3281,7 +3281,7 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, struct conf_load *conf
>   
>   		if (prototype->nr_args != 0 && !tag__is_struct(class)) {
>   			fprintf(stderr, "pahole: attributes are only supported with 'class' and 'struct' types\n");
> -			goto dump_and_stop;
> +			goto dump_and_abort;
>   		}
>   
>   		struct type *type = tag__type(class);
> @@ -3291,7 +3291,7 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, struct conf_load *conf
>   			if (type->sizeof_member == NULL) {
>   				fprintf(stderr, "pahole: the sizeof member '%s' wasn't found in the '%s' type\n",
>   					prototype->size, prototype->name);
> -				goto dump_and_stop;
> +				goto dump_and_abort;
>   			}
>   		}
>   
> @@ -3300,7 +3300,7 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, struct conf_load *conf
>   			if (type->type_member == NULL) {
>   				fprintf(stderr, "pahole: the type member '%s' wasn't found in the '%s' type\n",
>   					prototype->type, prototype->name);
> -				goto dump_and_stop;
> +				goto dump_and_abort;
>   			}
>   		}
>   
> @@ -3315,7 +3315,7 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, struct conf_load *conf
>   			if (type->filter == NULL) {
>   				fprintf(stderr, "pahole: invalid filter '%s' for '%s'\n",
>   					prototype->filter, prototype->name);
> -				goto dump_and_stop;
> +				goto dump_and_abort;
>   			}
>   		}
>   
> @@ -3386,15 +3386,16 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, struct conf_load *conf
>   	/*
>   	 * If we found all the entries in --class_name, stop
>   	 */
> -	if (list_empty(&class_names)) {
> -dump_and_stop:
> +	if (list_empty(&class_names))
>   		ret = LSK__STOP_LOADING;
> -	}
>   dump_it:
>   	if (first_obj_only)
>   		ret = LSK__STOP_LOADING;
>   filter_it:
>   	return ret;
> +dump_and_abort:
> +	ret = LSK__ABORT;
> +	return ret;
>   }
>   
>   static int prototypes__add(struct list_head *prototypes, const char *entry)
> 
> ---
> base-commit: 9845339bd09f9889dd741ed94f71eb92008dcdfc
> change-id: 20250716-lsk__abort-2b517860af28
> 
> Best regards,


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] pahole: do not return an error when printing only a specific class
  2025-07-22 23:42 ` Ihor Solodrai
@ 2025-07-23  8:39   ` Alexis Lothoré
  2025-07-23 10:03     ` Alan Maguire
  0 siblings, 1 reply; 5+ messages in thread
From: Alexis Lothoré @ 2025-07-23  8:39 UTC (permalink / raw)
  To: Ihor Solodrai, dwarves
  Cc: Alan Maguire, Arnaldo Carvalho de Melo, ebpf, Thomas Petazzoni,
	Bastien Curutchet

Hi Ihor,

On Wed Jul 23, 2025 at 1:42 AM CEST, Ihor Solodrai wrote:
> On 7/16/25 2:04 AM, Alexis Lothoré (eBPF Foundation) wrote:

[...]

>>   	cu__finalize(cu, cus, conf);
>> -	if (cus__steal_now(cus, cu, conf) == LSK__STOP_LOADING)
>> +	if (cus__steal_now(cus, cu, conf) == LSK__ABORT)
>>   		goto out_abort;
>
> I think we should handle both LSK__STOP_LOADING and LSK__ABORT here,
> otherwise the worker will keep loading DWARF (which is most of the
> pahole's work) and passing cu-s to the stealer function.
>
> I guess in many cases it'll complete without errors, but it's just
> unnecessary work.
>
> Maybe add a check for LSK__STOP_LOADING here with goto_ok?

Hmmm, I am not sure how to follow you here. Even if cus__steal_now returns
LSK__STOP_LOADING, I need to return DWARF_CB_OK from
cus__merge_and_process_cu, otherwise it will be processed as an error
again. Or are you suggesting just make sure to delete current cu but still
return DWARF_CB_OK when getting LSK__STOP_LOADING, as done below ?

--- a/dwarf_loader.c
+++ b/dwarf_loader.c
@@ -3796,6 +3796,7 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
        Dwarf_Off off = 0, noff;
        struct cu *cu = NULL;
        size_t cuhl;
+       int res;

        while (dwarf_nextcu(dw, off, &noff, &cuhl, NULL, &pointer_size,
                            &offset_size) == 0) {
@@ -3874,7 +3875,8 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
                goto out_abort;

        cu__finalize(cu, cus, conf);
-       if (cus__steal_now(cus, cu, conf) == LSK__ABORT)
+       res = cus__steal_now(cus, cu, conf);
+       if (res == LSK__STOP_LOADING || res == LSK__ABORT)
                goto out_abort;

        return 0;
@@ -3882,7 +3884,7 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
 out_abort:
        dwarf_cu__delete(cu);
        cu__delete(cu);
-       return DWARF_CB_ABORT;
+       return res == LSK__STOP_LOADING ? DWARF_CB_OK : DWARF_CB_ABORT;
 }


Thanks,

Alexis


-- 
Alexis Lothoré, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] pahole: do not return an error when printing only a specific class
  2025-07-23  8:39   ` Alexis Lothoré
@ 2025-07-23 10:03     ` Alan Maguire
  2025-07-23 15:37       ` Ihor Solodrai
  0 siblings, 1 reply; 5+ messages in thread
From: Alan Maguire @ 2025-07-23 10:03 UTC (permalink / raw)
  To: Alexis Lothoré, Ihor Solodrai, dwarves
  Cc: Arnaldo Carvalho de Melo, ebpf, Thomas Petazzoni,
	Bastien Curutchet

On 23/07/2025 09:39, Alexis Lothoré wrote:
> Hi Ihor,
> 
> On Wed Jul 23, 2025 at 1:42 AM CEST, Ihor Solodrai wrote:
>> On 7/16/25 2:04 AM, Alexis Lothoré (eBPF Foundation) wrote:
> 
> [...]
> 
>>>   	cu__finalize(cu, cus, conf);
>>> -	if (cus__steal_now(cus, cu, conf) == LSK__STOP_LOADING)
>>> +	if (cus__steal_now(cus, cu, conf) == LSK__ABORT)
>>>   		goto out_abort;
>>
>> I think we should handle both LSK__STOP_LOADING and LSK__ABORT here,
>> otherwise the worker will keep loading DWARF (which is most of the
>> pahole's work) and passing cu-s to the stealer function.
>>
>> I guess in many cases it'll complete without errors, but it's just
>> unnecessary work.
>>
>> Maybe add a check for LSK__STOP_LOADING here with goto_ok?
> 
> Hmmm, I am not sure how to follow you here. Even if cus__steal_now returns
> LSK__STOP_LOADING, I need to return DWARF_CB_OK from
> cus__merge_and_process_cu, otherwise it will be processed as an error
> again. Or are you suggesting just make sure to delete current cu but still
> return DWARF_CB_OK when getting LSK__STOP_LOADING, as done below ?
> 
> --- a/dwarf_loader.c
> +++ b/dwarf_loader.c
> @@ -3796,6 +3796,7 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
>         Dwarf_Off off = 0, noff;
>         struct cu *cu = NULL;
>         size_t cuhl;
> +       int res;
> 
>         while (dwarf_nextcu(dw, off, &noff, &cuhl, NULL, &pointer_size,
>                             &offset_size) == 0) {
> @@ -3874,7 +3875,8 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
>                 goto out_abort;
> 
>         cu__finalize(cu, cus, conf);
> -       if (cus__steal_now(cus, cu, conf) == LSK__ABORT)
> +       res = cus__steal_now(cus, cu, conf);
> +       if (res == LSK__STOP_LOADING || res == LSK__ABORT)
>                 goto out_abort;
> 
>         return 0;
> @@ -3882,7 +3884,7 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
>  out_abort:
>         dwarf_cu__delete(cu);
>         cu__delete(cu);
> -       return DWARF_CB_ABORT;
> +       return res == LSK__STOP_LOADING ? DWARF_CB_OK : DWARF_CB_ABORT;
>  }
> 
> 
> Thanks,
> 
> Alexis
> 
> 

Good point; I think ideally we'd like to stop processing in either case
but still distinguish error from early exit. We could perhaps set the
appropriate LSK_ value in dcus (dcus->lsk_status?) to make the
distinction and use it in cus__load_module(), renaming out_error in
dwarf_cus__process_cu()  to out_early or something to underline the fact
that it's an early exit not necessarily a failure. So we preserve the
return of CB_ABORT for both failure and early exit, but can still tell
these apart using dcus->lsk_status. Would that work?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] pahole: do not return an error when printing only a specific class
  2025-07-23 10:03     ` Alan Maguire
@ 2025-07-23 15:37       ` Ihor Solodrai
  0 siblings, 0 replies; 5+ messages in thread
From: Ihor Solodrai @ 2025-07-23 15:37 UTC (permalink / raw)
  To: Alan Maguire, Alexis Lothoré, dwarves
  Cc: Arnaldo Carvalho de Melo, ebpf, Thomas Petazzoni,
	Bastien Curutchet

On 7/23/25 3:03 AM, Alan Maguire wrote:
> On 23/07/2025 09:39, Alexis Lothoré wrote:
>> Hi Ihor,
>>
>> On Wed Jul 23, 2025 at 1:42 AM CEST, Ihor Solodrai wrote:
>>> On 7/16/25 2:04 AM, Alexis Lothoré (eBPF Foundation) wrote:
>>
>> [...]
>>
>>>>    	cu__finalize(cu, cus, conf);
>>>> -	if (cus__steal_now(cus, cu, conf) == LSK__STOP_LOADING)
>>>> +	if (cus__steal_now(cus, cu, conf) == LSK__ABORT)
>>>>    		goto out_abort;
>>>
>>> I think we should handle both LSK__STOP_LOADING and LSK__ABORT here,
>>> otherwise the worker will keep loading DWARF (which is most of the
>>> pahole's work) and passing cu-s to the stealer function.
>>>
>>> I guess in many cases it'll complete without errors, but it's just
>>> unnecessary work.
>>>
>>> Maybe add a check for LSK__STOP_LOADING here with goto_ok?
>>
>> Hmmm, I am not sure how to follow you here. Even if cus__steal_now returns
>> LSK__STOP_LOADING, I need to return DWARF_CB_OK from
>> cus__merge_and_process_cu, otherwise it will be processed as an error
>> again. Or are you suggesting just make sure to delete current cu but still
>> return DWARF_CB_OK when getting LSK__STOP_LOADING, as done below ?

LSK__STOP_LOADING signals that no more input needs to be processed.

So I was thinking we should make sure the workers terminate without
error and return DWARF_CB_OK. Whether we delete a cu or not is not
that important since we are exiting soon anyway.

>>
>> --- a/dwarf_loader.c
>> +++ b/dwarf_loader.c
>> @@ -3796,6 +3796,7 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
>>          Dwarf_Off off = 0, noff;
>>          struct cu *cu = NULL;
>>          size_t cuhl;
>> +       int res;
>>
>>          while (dwarf_nextcu(dw, off, &noff, &cuhl, NULL, &pointer_size,
>>                              &offset_size) == 0) {
>> @@ -3874,7 +3875,8 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
>>                  goto out_abort;
>>
>>          cu__finalize(cu, cus, conf);
>> -       if (cus__steal_now(cus, cu, conf) == LSK__ABORT)
>> +       res = cus__steal_now(cus, cu, conf);
>> +       if (res == LSK__STOP_LOADING || res == LSK__ABORT)
>>                  goto out_abort;
>>
>>          return 0;
>> @@ -3882,7 +3884,7 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
>>   out_abort:
>>          dwarf_cu__delete(cu);
>>          cu__delete(cu);
>> -       return DWARF_CB_ABORT;
>> +       return res == LSK__STOP_LOADING ? DWARF_CB_OK : DWARF_CB_ABORT;
>>   }
>>
>>
>> Thanks,
>>
>> Alexis
>>
>>
> 
> Good point; I think ideally we'd like to stop processing in either case
> but still distinguish error from early exit. We could perhaps set the
> appropriate LSK_ value in dcus (dcus->lsk_status?) to make the
> distinction and use it in cus__load_module(), renaming out_error in
> dwarf_cus__process_cu()  to out_early or something to underline the fact
> that it's an early exit not necessarily a failure. So we preserve the
> return of CB_ABORT for both failure and early exit, but can still tell
> these apart using dcus->lsk_status. Would that work?

Good suggestion. Yes, I think that would work better than just return
DWARF_CB_OK from the worker.

I overlooked the fact that there are many workers, and one of them
exiting with DWARF_CB_OK would not affect the others. But
DWARF_CB_ABORT would via the cus_processing_queue.abort flag.



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-07-23 15:37 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-16  9:04 [PATCH] pahole: do not return an error when printing only a specific class Alexis Lothoré (eBPF Foundation)
2025-07-22 23:42 ` Ihor Solodrai
2025-07-23  8:39   ` Alexis Lothoré
2025-07-23 10:03     ` Alan Maguire
2025-07-23 15:37       ` Ihor Solodrai

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox